Path: blob/master/lib/crypto/powerpc/sha1-powerpc-asm.S
26292 views
/* SPDX-License-Identifier: GPL-2.0 */1/*2* SHA-1 implementation for PowerPC.3*4* Copyright (C) 2005 Paul Mackerras <[email protected]>5*/67#include <asm/ppc_asm.h>8#include <asm/asm-offsets.h>9#include <asm/asm-compat.h>1011#ifdef __BIG_ENDIAN__12#define LWZ(rt, d, ra) \13lwz rt,d(ra)14#else15#define LWZ(rt, d, ra) \16li rt,d; \17lwbrx rt,rt,ra18#endif1920/*21* We roll the registers for T, A, B, C, D, E around on each22* iteration; T on iteration t is A on iteration t+1, and so on.23* We use registers 7 - 12 for this.24*/25#define RT(t) ((((t)+5)%6)+7)26#define RA(t) ((((t)+4)%6)+7)27#define RB(t) ((((t)+3)%6)+7)28#define RC(t) ((((t)+2)%6)+7)29#define RD(t) ((((t)+1)%6)+7)30#define RE(t) ((((t)+0)%6)+7)3132/* We use registers 16 - 31 for the W values */33#define W(t) (((t)%16)+16)3435#define LOADW(t) \36LWZ(W(t),(t)*4,r4)3738#define STEPD0_LOAD(t) \39andc r0,RD(t),RB(t); \40and r6,RB(t),RC(t); \41rotlwi RT(t),RA(t),5; \42or r6,r6,r0; \43add r0,RE(t),r15; \44add RT(t),RT(t),r6; \45add r14,r0,W(t); \46LWZ(W((t)+4),((t)+4)*4,r4); \47rotlwi RB(t),RB(t),30; \48add RT(t),RT(t),r144950#define STEPD0_UPDATE(t) \51and r6,RB(t),RC(t); \52andc r0,RD(t),RB(t); \53rotlwi RT(t),RA(t),5; \54rotlwi RB(t),RB(t),30; \55or r6,r6,r0; \56add r0,RE(t),r15; \57xor r5,W((t)+4-3),W((t)+4-8); \58add RT(t),RT(t),r6; \59xor W((t)+4),W((t)+4-16),W((t)+4-14); \60add r0,r0,W(t); \61xor W((t)+4),W((t)+4),r5; \62add RT(t),RT(t),r0; \63rotlwi W((t)+4),W((t)+4),16465#define STEPD1(t) \66xor r6,RB(t),RC(t); \67rotlwi RT(t),RA(t),5; \68rotlwi RB(t),RB(t),30; \69xor r6,r6,RD(t); \70add r0,RE(t),r15; \71add RT(t),RT(t),r6; \72add r0,r0,W(t); \73add RT(t),RT(t),r07475#define STEPD1_UPDATE(t) \76xor r6,RB(t),RC(t); \77rotlwi RT(t),RA(t),5; \78rotlwi RB(t),RB(t),30; \79xor r6,r6,RD(t); \80add r0,RE(t),r15; \81xor r5,W((t)+4-3),W((t)+4-8); \82add RT(t),RT(t),r6; \83xor W((t)+4),W((t)+4-16),W((t)+4-14); \84add r0,r0,W(t); \85xor W((t)+4),W((t)+4),r5; \86add RT(t),RT(t),r0; \87rotlwi W((t)+4),W((t)+4),18889#define STEPD2_UPDATE(t) \90and r6,RB(t),RC(t); \91and r0,RB(t),RD(t); \92rotlwi RT(t),RA(t),5; \93or r6,r6,r0; \94rotlwi RB(t),RB(t),30; \95and r0,RC(t),RD(t); \96xor r5,W((t)+4-3),W((t)+4-8); \97or r6,r6,r0; \98xor W((t)+4),W((t)+4-16),W((t)+4-14); \99add r0,RE(t),r15; \100add RT(t),RT(t),r6; \101add r0,r0,W(t); \102xor W((t)+4),W((t)+4),r5; \103add RT(t),RT(t),r0; \104rotlwi W((t)+4),W((t)+4),1105106#define STEP0LD4(t) \107STEPD0_LOAD(t); \108STEPD0_LOAD((t)+1); \109STEPD0_LOAD((t)+2); \110STEPD0_LOAD((t)+3)111112#define STEPUP4(t, fn) \113STEP##fn##_UPDATE(t); \114STEP##fn##_UPDATE((t)+1); \115STEP##fn##_UPDATE((t)+2); \116STEP##fn##_UPDATE((t)+3)117118#define STEPUP20(t, fn) \119STEPUP4(t, fn); \120STEPUP4((t)+4, fn); \121STEPUP4((t)+8, fn); \122STEPUP4((t)+12, fn); \123STEPUP4((t)+16, fn)124125_GLOBAL(powerpc_sha_transform)126PPC_STLU r1,-INT_FRAME_SIZE(r1)127SAVE_GPRS(14, 31, r1)128129/* Load up A - E */130lwz RA(0),0(r3) /* A */131lwz RB(0),4(r3) /* B */132lwz RC(0),8(r3) /* C */133lwz RD(0),12(r3) /* D */134lwz RE(0),16(r3) /* E */135136LOADW(0)137LOADW(1)138LOADW(2)139LOADW(3)140141lis r15,0x5a82 /* K0-19 */142ori r15,r15,0x7999143STEP0LD4(0)144STEP0LD4(4)145STEP0LD4(8)146STEPUP4(12, D0)147STEPUP4(16, D0)148149lis r15,0x6ed9 /* K20-39 */150ori r15,r15,0xeba1151STEPUP20(20, D1)152153lis r15,0x8f1b /* K40-59 */154ori r15,r15,0xbcdc155STEPUP20(40, D2)156157lis r15,0xca62 /* K60-79 */158ori r15,r15,0xc1d6159STEPUP4(60, D1)160STEPUP4(64, D1)161STEPUP4(68, D1)162STEPUP4(72, D1)163lwz r20,16(r3)164STEPD1(76)165lwz r19,12(r3)166STEPD1(77)167lwz r18,8(r3)168STEPD1(78)169lwz r17,4(r3)170STEPD1(79)171172lwz r16,0(r3)173add r20,RE(80),r20174add RD(0),RD(80),r19175add RC(0),RC(80),r18176add RB(0),RB(80),r17177add RA(0),RA(80),r16178mr RE(0),r20179stw RA(0),0(r3)180stw RB(0),4(r3)181stw RC(0),8(r3)182stw RD(0),12(r3)183stw RE(0),16(r3)184185REST_GPRS(14, 31, r1)186addi r1,r1,INT_FRAME_SIZE187blr188189190