Path: blob/master/arch/x86/crypto/aes-x86_64-asm_64.S
10817 views
/* AES (Rijndael) implementation (FIPS PUB 197) for x86_641*2* Copyright (C) 2005 Andreas Steinmetz, <[email protected]>3*4* License:5* This code can be distributed under the terms of the GNU General Public6* License (GPL) Version 2 provided that the above header down to and7* including this sentence is retained in full.8*/910.extern crypto_ft_tab11.extern crypto_it_tab12.extern crypto_fl_tab13.extern crypto_il_tab1415.text1617#include <asm/asm-offsets.h>1819#define R1 %rax20#define R1E %eax21#define R1X %ax22#define R1H %ah23#define R1L %al24#define R2 %rbx25#define R2E %ebx26#define R2X %bx27#define R2H %bh28#define R2L %bl29#define R3 %rcx30#define R3E %ecx31#define R3X %cx32#define R3H %ch33#define R3L %cl34#define R4 %rdx35#define R4E %edx36#define R4X %dx37#define R4H %dh38#define R4L %dl39#define R5 %rsi40#define R5E %esi41#define R6 %rdi42#define R6E %edi43#define R7 %rbp44#define R7E %ebp45#define R8 %r846#define R9 %r947#define R10 %r1048#define R11 %r114950#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \51.global FUNC; \52.type FUNC,@function; \53.align 8; \54FUNC: movq r1,r2; \55movq r3,r4; \56leaq KEY+48(r8),r9; \57movq r10,r11; \58movl (r7),r5 ## E; \59movl 4(r7),r1 ## E; \60movl 8(r7),r6 ## E; \61movl 12(r7),r7 ## E; \62movl 480(r8),r10 ## E; \63xorl -48(r9),r5 ## E; \64xorl -44(r9),r1 ## E; \65xorl -40(r9),r6 ## E; \66xorl -36(r9),r7 ## E; \67cmpl $24,r10 ## E; \68jb B128; \69leaq 32(r9),r9; \70je B192; \71leaq 32(r9),r9;7273#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \74movq r1,r2; \75movq r3,r4; \76movl r5 ## E,(r9); \77movl r6 ## E,4(r9); \78movl r7 ## E,8(r9); \79movl r8 ## E,12(r9); \80ret;8182#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \83movzbl r2 ## H,r5 ## E; \84movzbl r2 ## L,r6 ## E; \85movl TAB+1024(,r5,4),r5 ## E;\86movw r4 ## X,r2 ## X; \87movl TAB(,r6,4),r6 ## E; \88roll $16,r2 ## E; \89shrl $16,r4 ## E; \90movzbl r4 ## H,r7 ## E; \91movzbl r4 ## L,r4 ## E; \92xorl OFFSET(r8),ra ## E; \93xorl OFFSET+4(r8),rb ## E; \94xorl TAB+3072(,r7,4),r5 ## E;\95xorl TAB+2048(,r4,4),r6 ## E;\96movzbl r1 ## L,r7 ## E; \97movzbl r1 ## H,r4 ## E; \98movl TAB+1024(,r4,4),r4 ## E;\99movw r3 ## X,r1 ## X; \100roll $16,r1 ## E; \101shrl $16,r3 ## E; \102xorl TAB(,r7,4),r5 ## E; \103movzbl r3 ## H,r7 ## E; \104movzbl r3 ## L,r3 ## E; \105xorl TAB+3072(,r7,4),r4 ## E;\106xorl TAB+2048(,r3,4),r5 ## E;\107movzbl r1 ## H,r7 ## E; \108movzbl r1 ## L,r3 ## E; \109shrl $16,r1 ## E; \110xorl TAB+3072(,r7,4),r6 ## E;\111movl TAB+2048(,r3,4),r3 ## E;\112movzbl r1 ## H,r7 ## E; \113movzbl r1 ## L,r1 ## E; \114xorl TAB+1024(,r7,4),r6 ## E;\115xorl TAB(,r1,4),r3 ## E; \116movzbl r2 ## H,r1 ## E; \117movzbl r2 ## L,r7 ## E; \118shrl $16,r2 ## E; \119xorl TAB+3072(,r1,4),r3 ## E;\120xorl TAB+2048(,r7,4),r4 ## E;\121movzbl r2 ## H,r1 ## E; \122movzbl r2 ## L,r2 ## E; \123xorl OFFSET+8(r8),rc ## E; \124xorl OFFSET+12(r8),rd ## E; \125xorl TAB+1024(,r1,4),r3 ## E;\126xorl TAB(,r2,4),r4 ## E;127128#define move_regs(r1,r2,r3,r4) \129movl r3 ## E,r1 ## E; \130movl r4 ## E,r2 ## E;131132#define entry(FUNC,KEY,B128,B192) \133prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)134135#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)136137#define encrypt_round(TAB,OFFSET) \138round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \139move_regs(R1,R2,R5,R6)140141#define encrypt_final(TAB,OFFSET) \142round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)143144#define decrypt_round(TAB,OFFSET) \145round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \146move_regs(R1,R2,R5,R6)147148#define decrypt_final(TAB,OFFSET) \149round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)150151/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */152153entry(aes_enc_blk,0,enc128,enc192)154encrypt_round(crypto_ft_tab,-96)155encrypt_round(crypto_ft_tab,-80)156enc192: encrypt_round(crypto_ft_tab,-64)157encrypt_round(crypto_ft_tab,-48)158enc128: encrypt_round(crypto_ft_tab,-32)159encrypt_round(crypto_ft_tab,-16)160encrypt_round(crypto_ft_tab, 0)161encrypt_round(crypto_ft_tab, 16)162encrypt_round(crypto_ft_tab, 32)163encrypt_round(crypto_ft_tab, 48)164encrypt_round(crypto_ft_tab, 64)165encrypt_round(crypto_ft_tab, 80)166encrypt_round(crypto_ft_tab, 96)167encrypt_final(crypto_fl_tab,112)168return169170/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */171172entry(aes_dec_blk,240,dec128,dec192)173decrypt_round(crypto_it_tab,-96)174decrypt_round(crypto_it_tab,-80)175dec192: decrypt_round(crypto_it_tab,-64)176decrypt_round(crypto_it_tab,-48)177dec128: decrypt_round(crypto_it_tab,-32)178decrypt_round(crypto_it_tab,-16)179decrypt_round(crypto_it_tab, 0)180decrypt_round(crypto_it_tab, 16)181decrypt_round(crypto_it_tab, 32)182decrypt_round(crypto_it_tab, 48)183decrypt_round(crypto_it_tab, 64)184decrypt_round(crypto_it_tab, 80)185decrypt_round(crypto_it_tab, 96)186decrypt_final(crypto_il_tab,112)187return188189190