Path: blob/main/sys/crypto/openssl/arm/armv4-gf2m.S
39482 views
/* Do not modify. This file is auto-generated from armv4-gf2m.pl. */1#include "arm_arch.h"23#if defined(__thumb2__)4.syntax unified5.thumb6#else7.code 328#endif910.text11.type mul_1x1_ialu,%function12.align 513mul_1x1_ialu:14mov r4,#015bic r5,r1,#3<<30 @ a1=a&0x3fffffff16str r4,[sp,#0] @ tab[0]=017add r6,r5,r5 @ a2=a1<<118str r5,[sp,#4] @ tab[1]=a119eor r7,r5,r6 @ a1^a220str r6,[sp,#8] @ tab[2]=a221mov r8,r5,lsl#2 @ a4=a1<<222str r7,[sp,#12] @ tab[3]=a1^a223eor r9,r5,r8 @ a1^a424str r8,[sp,#16] @ tab[4]=a425eor r4,r6,r8 @ a2^a426str r9,[sp,#20] @ tab[5]=a1^a427eor r7,r7,r8 @ a1^a2^a428str r4,[sp,#24] @ tab[6]=a2^a429and r8,r12,r0,lsl#230str r7,[sp,#28] @ tab[7]=a1^a2^a43132and r9,r12,r0,lsr#133ldr r5,[sp,r8] @ tab[b & 0x7]34and r8,r12,r0,lsr#435ldr r7,[sp,r9] @ tab[b >> 3 & 0x7]36and r9,r12,r0,lsr#737ldr r6,[sp,r8] @ tab[b >> 6 & 0x7]38eor r5,r5,r7,lsl#3 @ stall39mov r4,r7,lsr#2940ldr r7,[sp,r9] @ tab[b >> 9 & 0x7]4142and r8,r12,r0,lsr#1043eor r5,r5,r6,lsl#644eor r4,r4,r6,lsr#2645ldr r6,[sp,r8] @ tab[b >> 12 & 0x7]4647and r9,r12,r0,lsr#1348eor r5,r5,r7,lsl#949eor r4,r4,r7,lsr#2350ldr r7,[sp,r9] @ tab[b >> 15 & 0x7]5152and r8,r12,r0,lsr#1653eor r5,r5,r6,lsl#1254eor r4,r4,r6,lsr#2055ldr r6,[sp,r8] @ tab[b >> 18 & 0x7]5657and r9,r12,r0,lsr#1958eor r5,r5,r7,lsl#1559eor r4,r4,r7,lsr#1760ldr r7,[sp,r9] @ tab[b >> 21 & 0x7]6162and r8,r12,r0,lsr#2263eor r5,r5,r6,lsl#1864eor r4,r4,r6,lsr#1465ldr r6,[sp,r8] @ tab[b >> 24 & 0x7]6667and r9,r12,r0,lsr#2568eor r5,r5,r7,lsl#2169eor r4,r4,r7,lsr#1170ldr r7,[sp,r9] @ tab[b >> 27 & 0x7]7172tst r1,#1<<3073and r8,r12,r0,lsr#2874eor r5,r5,r6,lsl#2475eor r4,r4,r6,lsr#876ldr r6,[sp,r8] @ tab[b >> 30 ]7778#ifdef __thumb2__79itt ne80#endif81eorne r5,r5,r0,lsl#3082eorne r4,r4,r0,lsr#283tst r1,#1<<3184eor r5,r5,r7,lsl#2785eor r4,r4,r7,lsr#586#ifdef __thumb2__87itt ne88#endif89eorne r5,r5,r0,lsl#3190eorne r4,r4,r0,lsr#191eor r5,r5,r6,lsl#3092eor r4,r4,r6,lsr#29394mov pc,lr95.size mul_1x1_ialu,.-mul_1x1_ialu96.globl bn_GF2m_mul_2x297.type bn_GF2m_mul_2x2,%function98.align 599bn_GF2m_mul_2x2:100#if __ARM_MAX_ARCH__>=7101stmdb sp!,{r10,lr}102ldr r12,.LOPENSSL_armcap103# if !defined(_WIN32)104adr r10,.LOPENSSL_armcap105ldr r12,[r12,r10]106# endif107# if defined(__APPLE__) || defined(_WIN32)108ldr r12,[r12]109# endif110tst r12,#ARMV7_NEON111itt ne112ldrne r10,[sp],#8113bne .LNEON114stmdb sp!,{r4,r5,r6,r7,r8,r9}115#else116stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}117#endif118mov r10,r0 @ reassign 1st argument119mov r0,r3 @ r0=b1120sub r7,sp,#36121mov r8,sp122and r7,r7,#-32123ldr r3,[sp,#32] @ load b0124mov r12,#7<<2125mov sp,r7 @ allocate tab[8]126str r8,[r7,#32]127128bl mul_1x1_ialu @ a1·b1129str r5,[r10,#8]130str r4,[r10,#12]131132eor r0,r0,r3 @ flip b0 and b1133eor r1,r1,r2 @ flip a0 and a1134eor r3,r3,r0135eor r2,r2,r1136eor r0,r0,r3137eor r1,r1,r2138bl mul_1x1_ialu @ a0·b0139str r5,[r10]140str r4,[r10,#4]141142eor r1,r1,r2143eor r0,r0,r3144bl mul_1x1_ialu @ (a1+a0)·(b1+b0)145ldmia r10,{r6,r7,r8,r9}146eor r5,r5,r4147ldr sp,[sp,#32] @ destroy tab[8]148eor r4,r4,r7149eor r5,r5,r6150eor r4,r4,r8151eor r5,r5,r9152eor r4,r4,r9153str r4,[r10,#8]154eor r5,r5,r4155str r5,[r10,#4]156157#if __ARM_ARCH__>=5158ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}159#else160ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr}161tst lr,#1162moveq pc,lr @ be binary compatible with V4, yet163.word 0xe12fff1e @ interoperable with Thumb ISA:-)164#endif165#if __ARM_MAX_ARCH__>=7166.arch armv7-a167.fpu neon168169.align 5170.LNEON:171ldr r12, [sp] @ 5th argument172vmov d26, r2, r1173vmov d27, r12, r3174vmov.i64 d28, #0x0000ffffffffffff175vmov.i64 d29, #0x00000000ffffffff176vmov.i64 d30, #0x000000000000ffff177178vext.8 d2, d26, d26, #1 @ A1179vmull.p8 q1, d2, d27 @ F = A1*B180vext.8 d0, d27, d27, #1 @ B1181vmull.p8 q0, d26, d0 @ E = A*B1182vext.8 d4, d26, d26, #2 @ A2183vmull.p8 q2, d4, d27 @ H = A2*B184vext.8 d16, d27, d27, #2 @ B2185vmull.p8 q8, d26, d16 @ G = A*B2186vext.8 d6, d26, d26, #3 @ A3187veor q1, q1, q0 @ L = E + F188vmull.p8 q3, d6, d27 @ J = A3*B189vext.8 d0, d27, d27, #3 @ B3190veor q2, q2, q8 @ M = G + H191vmull.p8 q0, d26, d0 @ I = A*B3192veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8193vand d3, d3, d28194vext.8 d16, d27, d27, #4 @ B4195veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16196vand d5, d5, d29197vmull.p8 q8, d26, d16 @ K = A*B4198veor q3, q3, q0 @ N = I + J199veor d2, d2, d3200veor d4, d4, d5201veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24202vand d7, d7, d30203vext.8 q1, q1, q1, #15204veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32205vmov.i64 d17, #0206vext.8 q2, q2, q2, #14207veor d6, d6, d7208vmull.p8 q0, d26, d27 @ D = A*B209vext.8 q8, q8, q8, #12210vext.8 q3, q3, q3, #13211veor q1, q1, q2212veor q3, q3, q8213veor q0, q0, q1214veor q0, q0, q3215216vst1.32 {q0}, [r0]217bx lr @ bx lr218#endif219.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2220#if __ARM_MAX_ARCH__>=7221.align 5222.LOPENSSL_armcap:223# ifdef _WIN32224.word OPENSSL_armcap_P225# else226.word OPENSSL_armcap_P-.227# endif228#endif229.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0230.align 2231.align 5232233#if __ARM_MAX_ARCH__>=7234235.hidden OPENSSL_armcap_P236#endif237238239