Path: blob/master/arch/x86/crypto/aes-i586-asm_32.S
10817 views
// -------------------------------------------------------------------------1// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.2// All rights reserved.3//4// LICENSE TERMS5//6// The free distribution and use of this software in both source and binary7// form is allowed (with or without changes) provided that:8//9// 1. distributions of this source code include the above copyright10// notice, this list of conditions and the following disclaimer//11//12// 2. distributions in binary form include the above copyright13// notice, this list of conditions and the following disclaimer14// in the documentation and/or other associated materials//15//16// 3. the copyright holder's name is not used to endorse products17// built using this software without specific written permission.18//19//20// ALTERNATIVELY, provided that this notice is retained in full, this product21// may be distributed under the terms of the GNU General Public License (GPL),22// in which case the provisions of the GPL apply INSTEAD OF those given above.23//24// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>25// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>2627// DISCLAIMER28//29// This software is provided 'as is' with no explicit or implied warranties30// in respect of its properties including, but not limited to, correctness31// and fitness for purpose.32// -------------------------------------------------------------------------33// Issue Date: 29/07/20023435.file "aes-i586-asm.S"36.text3738#include <asm/asm-offsets.h>3940#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)4142/* offsets to parameters with one register pushed onto stack */43#define ctx 844#define out_blk 1245#define in_blk 164647/* offsets in crypto_aes_ctx structure */48#define klen (480)49#define ekey (0)50#define dkey (240)5152// register mapping for encrypt and decrypt subroutines5354#define r0 eax55#define r1 ebx56#define r2 ecx57#define r3 edx58#define r4 esi59#define r5 edi6061#define eaxl al62#define eaxh ah63#define ebxl bl64#define ebxh bh65#define ecxl cl66#define ecxh ch67#define edxl dl68#define edxh dh6970#define _h(reg) reg##h71#define h(reg) _h(reg)7273#define _l(reg) reg##l74#define l(reg) _l(reg)7576// This macro takes a 32-bit word representing a column and uses77// each of its four bytes to index into four tables of 256 32-bit78// words to obtain values that are then xored into the appropriate79// output registers r0, r1, r4 or r5.8081// Parameters:82// table table base address83// %1 out_state[0]84// %2 out_state[1]85// %3 out_state[2]86// %4 out_state[3]87// idx input register for the round (destroyed)88// tmp scratch register for the round89// sched key schedule9091#define do_col(table, a1,a2,a3,a4, idx, tmp) \92movzx %l(idx),%tmp; \93xor table(,%tmp,4),%a1; \94movzx %h(idx),%tmp; \95shr $16,%idx; \96xor table+tlen(,%tmp,4),%a2; \97movzx %l(idx),%tmp; \98movzx %h(idx),%idx; \99xor table+2*tlen(,%tmp,4),%a3; \100xor table+3*tlen(,%idx,4),%a4;101102// initialise output registers from the key schedule103// NB1: original value of a3 is in idx on exit104// NB2: original values of a1,a2,a4 aren't used105#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \106mov 0 sched,%a1; \107movzx %l(idx),%tmp; \108mov 12 sched,%a2; \109xor table(,%tmp,4),%a1; \110mov 4 sched,%a4; \111movzx %h(idx),%tmp; \112shr $16,%idx; \113xor table+tlen(,%tmp,4),%a2; \114movzx %l(idx),%tmp; \115movzx %h(idx),%idx; \116xor table+3*tlen(,%idx,4),%a4; \117mov %a3,%idx; \118mov 8 sched,%a3; \119xor table+2*tlen(,%tmp,4),%a3;120121// initialise output registers from the key schedule122// NB1: original value of a3 is in idx on exit123// NB2: original values of a1,a2,a4 aren't used124#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \125mov 0 sched,%a1; \126movzx %l(idx),%tmp; \127mov 4 sched,%a2; \128xor table(,%tmp,4),%a1; \129mov 12 sched,%a4; \130movzx %h(idx),%tmp; \131shr $16,%idx; \132xor table+tlen(,%tmp,4),%a2; \133movzx %l(idx),%tmp; \134movzx %h(idx),%idx; \135xor table+3*tlen(,%idx,4),%a4; \136mov %a3,%idx; \137mov 8 sched,%a3; \138xor table+2*tlen(,%tmp,4),%a3;139140141// original Gladman had conditional saves to MMX regs.142#define save(a1, a2) \143mov %a2,4*a1(%esp)144145#define restore(a1, a2) \146mov 4*a2(%esp),%a1147148// These macros perform a forward encryption cycle. They are entered with149// the first previous round column values in r0,r1,r4,r5 and150// exit with the final values in the same registers, using stack151// for temporary storage.152153// round column values154// on entry: r0,r1,r4,r5155// on exit: r2,r1,r4,r5156#define fwd_rnd1(arg, table) \157save (0,r1); \158save (1,r5); \159\160/* compute new column values */ \161do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \162do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \163restore(r0,0); \164do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \165restore(r0,1); \166do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */167168// round column values169// on entry: r2,r1,r4,r5170// on exit: r0,r1,r4,r5171#define fwd_rnd2(arg, table) \172save (0,r1); \173save (1,r5); \174\175/* compute new column values */ \176do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \177do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \178restore(r2,0); \179do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \180restore(r2,1); \181do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */182183// These macros performs an inverse encryption cycle. They are entered with184// the first previous round column values in r0,r1,r4,r5 and185// exit with the final values in the same registers, using stack186// for temporary storage187188// round column values189// on entry: r0,r1,r4,r5190// on exit: r2,r1,r4,r5191#define inv_rnd1(arg, table) \192save (0,r1); \193save (1,r5); \194\195/* compute new column values */ \196do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \197do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \198restore(r0,0); \199do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \200restore(r0,1); \201do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */202203// round column values204// on entry: r2,r1,r4,r5205// on exit: r0,r1,r4,r5206#define inv_rnd2(arg, table) \207save (0,r1); \208save (1,r5); \209\210/* compute new column values */ \211do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \212do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \213restore(r2,0); \214do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \215restore(r2,1); \216do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */217218// AES (Rijndael) Encryption Subroutine219/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */220221.global aes_enc_blk222223.extern crypto_ft_tab224.extern crypto_fl_tab225226.align 4227228aes_enc_blk:229push %ebp230mov ctx(%esp),%ebp231232// CAUTION: the order and the values used in these assigns233// rely on the register mappings2342351: push %ebx236mov in_blk+4(%esp),%r2237push %esi238mov klen(%ebp),%r3 // key size239push %edi240#if ekey != 0241lea ekey(%ebp),%ebp // key pointer242#endif243244// input four columns and xor in first round key245246mov (%r2),%r0247mov 4(%r2),%r1248mov 8(%r2),%r4249mov 12(%r2),%r5250xor (%ebp),%r0251xor 4(%ebp),%r1252xor 8(%ebp),%r4253xor 12(%ebp),%r5254255sub $8,%esp // space for register saves on stack256add $16,%ebp // increment to next round key257cmp $24,%r3258jb 4f // 10 rounds for 128-bit key259lea 32(%ebp),%ebp260je 3f // 12 rounds for 192-bit key261lea 32(%ebp),%ebp2622632: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key264fwd_rnd2( -48(%ebp), crypto_ft_tab)2653: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key266fwd_rnd2( -16(%ebp), crypto_ft_tab)2674: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key268fwd_rnd2( +16(%ebp), crypto_ft_tab)269fwd_rnd1( +32(%ebp), crypto_ft_tab)270fwd_rnd2( +48(%ebp), crypto_ft_tab)271fwd_rnd1( +64(%ebp), crypto_ft_tab)272fwd_rnd2( +80(%ebp), crypto_ft_tab)273fwd_rnd1( +96(%ebp), crypto_ft_tab)274fwd_rnd2(+112(%ebp), crypto_ft_tab)275fwd_rnd1(+128(%ebp), crypto_ft_tab)276fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table277278// move final values to the output array. CAUTION: the279// order of these assigns rely on the register mappings280281add $8,%esp282mov out_blk+12(%esp),%ebp283mov %r5,12(%ebp)284pop %edi285mov %r4,8(%ebp)286pop %esi287mov %r1,4(%ebp)288pop %ebx289mov %r0,(%ebp)290pop %ebp291ret292293// AES (Rijndael) Decryption Subroutine294/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */295296.global aes_dec_blk297298.extern crypto_it_tab299.extern crypto_il_tab300301.align 4302303aes_dec_blk:304push %ebp305mov ctx(%esp),%ebp306307// CAUTION: the order and the values used in these assigns308// rely on the register mappings3093101: push %ebx311mov in_blk+4(%esp),%r2312push %esi313mov klen(%ebp),%r3 // key size314push %edi315#if dkey != 0316lea dkey(%ebp),%ebp // key pointer317#endif318319// input four columns and xor in first round key320321mov (%r2),%r0322mov 4(%r2),%r1323mov 8(%r2),%r4324mov 12(%r2),%r5325xor (%ebp),%r0326xor 4(%ebp),%r1327xor 8(%ebp),%r4328xor 12(%ebp),%r5329330sub $8,%esp // space for register saves on stack331add $16,%ebp // increment to next round key332cmp $24,%r3333jb 4f // 10 rounds for 128-bit key334lea 32(%ebp),%ebp335je 3f // 12 rounds for 192-bit key336lea 32(%ebp),%ebp3373382: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key339inv_rnd2( -48(%ebp), crypto_it_tab)3403: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key341inv_rnd2( -16(%ebp), crypto_it_tab)3424: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key343inv_rnd2( +16(%ebp), crypto_it_tab)344inv_rnd1( +32(%ebp), crypto_it_tab)345inv_rnd2( +48(%ebp), crypto_it_tab)346inv_rnd1( +64(%ebp), crypto_it_tab)347inv_rnd2( +80(%ebp), crypto_it_tab)348inv_rnd1( +96(%ebp), crypto_it_tab)349inv_rnd2(+112(%ebp), crypto_it_tab)350inv_rnd1(+128(%ebp), crypto_it_tab)351inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table352353// move final values to the output array. CAUTION: the354// order of these assigns rely on the register mappings355356add $8,%esp357mov out_blk+12(%esp),%ebp358mov %r5,12(%ebp)359pop %edi360mov %r4,8(%ebp)361pop %esi362mov %r1,4(%ebp)363pop %ebx364mov %r0,(%ebp)365pop %ebp366ret367368369