Path: blob/master/arch/x86/crypto/twofish-i586-asm_32.S
170899 views
/* SPDX-License-Identifier: GPL-2.0-or-later */1/***************************************************************************2* Copyright (C) 2006 by Joachim Fritschi, <[email protected]> *3* *4***************************************************************************/56.file "twofish-i586-asm.S"7.text89#include <linux/linkage.h>10#include <asm/asm-offsets.h>1112/* return address at 0 */1314#define in_blk 12 /* input byte array address parameter*/15#define out_blk 8 /* output byte array address parameter*/16#define ctx 4 /* Twofish context structure */1718#define a_offset 019#define b_offset 420#define c_offset 821#define d_offset 122223/* Structure of the crypto context struct*/2425#define s0 0 /* S0 Array 256 Words each */26#define s1 1024 /* S1 Array */27#define s2 2048 /* S2 Array */28#define s3 3072 /* S3 Array */29#define w 4096 /* 8 whitening keys (word) */30#define k 4128 /* key 1-32 ( word ) */3132/* define a few register aliases to allow macro substitution */3334#define R0D %eax35#define R0B %al36#define R0H %ah3738#define R1D %ebx39#define R1B %bl40#define R1H %bh4142#define R2D %ecx43#define R2B %cl44#define R2H %ch4546#define R3D %edx47#define R3B %dl48#define R3H %dh495051/* performs input whitening */52#define input_whitening(src,context,offset)\53xor w+offset(context), src;5455/* performs input whitening */56#define output_whitening(src,context,offset)\57xor w+16+offset(context), src;5859/*60* a input register containing a (rotated 16)61* b input register containing b62* c input register containing c63* d input register containing d (already rol $1)64* operations on a and b are interleaved to increase performance65*/66#define encrypt_round(a,b,c,d,round)\67push d ## D;\68movzx b ## B, %edi;\69mov s1(%ebp,%edi,4),d ## D;\70movzx a ## B, %edi;\71mov s2(%ebp,%edi,4),%esi;\72movzx b ## H, %edi;\73ror $16, b ## D;\74xor s2(%ebp,%edi,4),d ## D;\75movzx a ## H, %edi;\76ror $16, a ## D;\77xor s3(%ebp,%edi,4),%esi;\78movzx b ## B, %edi;\79xor s3(%ebp,%edi,4),d ## D;\80movzx a ## B, %edi;\81xor (%ebp,%edi,4), %esi;\82movzx b ## H, %edi;\83ror $15, b ## D;\84xor (%ebp,%edi,4), d ## D;\85movzx a ## H, %edi;\86xor s1(%ebp,%edi,4),%esi;\87pop %edi;\88add d ## D, %esi;\89add %esi, d ## D;\90add k+round(%ebp), %esi;\91xor %esi, c ## D;\92rol $15, c ## D;\93add k+4+round(%ebp),d ## D;\94xor %edi, d ## D;9596/*97* a input register containing a (rotated 16)98* b input register containing b99* c input register containing c100* d input register containing d (already rol $1)101* operations on a and b are interleaved to increase performance102* last round has different rotations for the output preparation103*/104#define encrypt_last_round(a,b,c,d,round)\105push d ## D;\106movzx b ## B, %edi;\107mov s1(%ebp,%edi,4),d ## D;\108movzx a ## B, %edi;\109mov s2(%ebp,%edi,4),%esi;\110movzx b ## H, %edi;\111ror $16, b ## D;\112xor s2(%ebp,%edi,4),d ## D;\113movzx a ## H, %edi;\114ror $16, a ## D;\115xor s3(%ebp,%edi,4),%esi;\116movzx b ## B, %edi;\117xor s3(%ebp,%edi,4),d ## D;\118movzx a ## B, %edi;\119xor (%ebp,%edi,4), %esi;\120movzx b ## H, %edi;\121ror $16, b ## D;\122xor (%ebp,%edi,4), d ## D;\123movzx a ## H, %edi;\124xor s1(%ebp,%edi,4),%esi;\125pop %edi;\126add d ## D, %esi;\127add %esi, d ## D;\128add k+round(%ebp), %esi;\129xor %esi, c ## D;\130ror $1, c ## D;\131add k+4+round(%ebp),d ## D;\132xor %edi, d ## D;133134/*135* a input register containing a136* b input register containing b (rotated 16)137* c input register containing c138* d input register containing d (already rol $1)139* operations on a and b are interleaved to increase performance140*/141#define decrypt_round(a,b,c,d,round)\142push c ## D;\143movzx a ## B, %edi;\144mov (%ebp,%edi,4), c ## D;\145movzx b ## B, %edi;\146mov s3(%ebp,%edi,4),%esi;\147movzx a ## H, %edi;\148ror $16, a ## D;\149xor s1(%ebp,%edi,4),c ## D;\150movzx b ## H, %edi;\151ror $16, b ## D;\152xor (%ebp,%edi,4), %esi;\153movzx a ## B, %edi;\154xor s2(%ebp,%edi,4),c ## D;\155movzx b ## B, %edi;\156xor s1(%ebp,%edi,4),%esi;\157movzx a ## H, %edi;\158ror $15, a ## D;\159xor s3(%ebp,%edi,4),c ## D;\160movzx b ## H, %edi;\161xor s2(%ebp,%edi,4),%esi;\162pop %edi;\163add %esi, c ## D;\164add c ## D, %esi;\165add k+round(%ebp), c ## D;\166xor %edi, c ## D;\167add k+4+round(%ebp),%esi;\168xor %esi, d ## D;\169rol $15, d ## D;170171/*172* a input register containing a173* b input register containing b (rotated 16)174* c input register containing c175* d input register containing d (already rol $1)176* operations on a and b are interleaved to increase performance177* last round has different rotations for the output preparation178*/179#define decrypt_last_round(a,b,c,d,round)\180push c ## D;\181movzx a ## B, %edi;\182mov (%ebp,%edi,4), c ## D;\183movzx b ## B, %edi;\184mov s3(%ebp,%edi,4),%esi;\185movzx a ## H, %edi;\186ror $16, a ## D;\187xor s1(%ebp,%edi,4),c ## D;\188movzx b ## H, %edi;\189ror $16, b ## D;\190xor (%ebp,%edi,4), %esi;\191movzx a ## B, %edi;\192xor s2(%ebp,%edi,4),c ## D;\193movzx b ## B, %edi;\194xor s1(%ebp,%edi,4),%esi;\195movzx a ## H, %edi;\196ror $16, a ## D;\197xor s3(%ebp,%edi,4),c ## D;\198movzx b ## H, %edi;\199xor s2(%ebp,%edi,4),%esi;\200pop %edi;\201add %esi, c ## D;\202add c ## D, %esi;\203add k+round(%ebp), c ## D;\204xor %edi, c ## D;\205add k+4+round(%ebp),%esi;\206xor %esi, d ## D;\207ror $1, d ## D;208209SYM_FUNC_START(twofish_enc_blk)210push %ebp /* save registers according to calling convention*/211push %ebx212push %esi213push %edi214215mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base216* pointer to the ctx address */217mov in_blk+16(%esp),%edi /* input address in edi */218219mov (%edi), %eax220mov b_offset(%edi), %ebx221mov c_offset(%edi), %ecx222mov d_offset(%edi), %edx223input_whitening(%eax,%ebp,a_offset)224ror $16, %eax225input_whitening(%ebx,%ebp,b_offset)226input_whitening(%ecx,%ebp,c_offset)227input_whitening(%edx,%ebp,d_offset)228rol $1, %edx229230encrypt_round(R0,R1,R2,R3,0);231encrypt_round(R2,R3,R0,R1,8);232encrypt_round(R0,R1,R2,R3,2*8);233encrypt_round(R2,R3,R0,R1,3*8);234encrypt_round(R0,R1,R2,R3,4*8);235encrypt_round(R2,R3,R0,R1,5*8);236encrypt_round(R0,R1,R2,R3,6*8);237encrypt_round(R2,R3,R0,R1,7*8);238encrypt_round(R0,R1,R2,R3,8*8);239encrypt_round(R2,R3,R0,R1,9*8);240encrypt_round(R0,R1,R2,R3,10*8);241encrypt_round(R2,R3,R0,R1,11*8);242encrypt_round(R0,R1,R2,R3,12*8);243encrypt_round(R2,R3,R0,R1,13*8);244encrypt_round(R0,R1,R2,R3,14*8);245encrypt_last_round(R2,R3,R0,R1,15*8);246247output_whitening(%eax,%ebp,c_offset)248output_whitening(%ebx,%ebp,d_offset)249output_whitening(%ecx,%ebp,a_offset)250output_whitening(%edx,%ebp,b_offset)251mov out_blk+16(%esp),%edi;252mov %eax, c_offset(%edi)253mov %ebx, d_offset(%edi)254mov %ecx, (%edi)255mov %edx, b_offset(%edi)256257pop %edi258pop %esi259pop %ebx260pop %ebp261mov $1, %eax262RET263SYM_FUNC_END(twofish_enc_blk)264265SYM_FUNC_START(twofish_dec_blk)266push %ebp /* save registers according to calling convention*/267push %ebx268push %esi269push %edi270271272mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base273* pointer to the ctx address */274mov in_blk+16(%esp),%edi /* input address in edi */275276mov (%edi), %eax277mov b_offset(%edi), %ebx278mov c_offset(%edi), %ecx279mov d_offset(%edi), %edx280output_whitening(%eax,%ebp,a_offset)281output_whitening(%ebx,%ebp,b_offset)282ror $16, %ebx283output_whitening(%ecx,%ebp,c_offset)284output_whitening(%edx,%ebp,d_offset)285rol $1, %ecx286287decrypt_round(R0,R1,R2,R3,15*8);288decrypt_round(R2,R3,R0,R1,14*8);289decrypt_round(R0,R1,R2,R3,13*8);290decrypt_round(R2,R3,R0,R1,12*8);291decrypt_round(R0,R1,R2,R3,11*8);292decrypt_round(R2,R3,R0,R1,10*8);293decrypt_round(R0,R1,R2,R3,9*8);294decrypt_round(R2,R3,R0,R1,8*8);295decrypt_round(R0,R1,R2,R3,7*8);296decrypt_round(R2,R3,R0,R1,6*8);297decrypt_round(R0,R1,R2,R3,5*8);298decrypt_round(R2,R3,R0,R1,4*8);299decrypt_round(R0,R1,R2,R3,3*8);300decrypt_round(R2,R3,R0,R1,2*8);301decrypt_round(R0,R1,R2,R3,1*8);302decrypt_last_round(R2,R3,R0,R1,0);303304input_whitening(%eax,%ebp,c_offset)305input_whitening(%ebx,%ebp,d_offset)306input_whitening(%ecx,%ebp,a_offset)307input_whitening(%edx,%ebp,b_offset)308mov out_blk+16(%esp),%edi;309mov %eax, c_offset(%edi)310mov %ebx, d_offset(%edi)311mov %ecx, (%edi)312mov %edx, b_offset(%edi)313314pop %edi315pop %esi316pop %ebx317pop %ebp318mov $1, %eax319RET320SYM_FUNC_END(twofish_dec_blk)321322323