Path: blob/master/arch/x86/crypto/ghash-clmulni-intel_asm.S
10817 views
/*1* Accelerated GHASH implementation with Intel PCLMULQDQ-NI2* instructions. This file contains accelerated part of ghash3* implementation. More information about PCLMULQDQ can be found at:4*5* http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/6*7* Copyright (c) 2009 Intel Corp.8* Author: Huang Ying <[email protected]>9* Vinodh Gopal10* Erdinc Ozturk11* Deniz Karakoyunlu12*13* This program is free software; you can redistribute it and/or modify it14* under the terms of the GNU General Public License version 2 as published15* by the Free Software Foundation.16*/1718#include <linux/linkage.h>19#include <asm/inst.h>2021.data2223.align 1624.Lbswap_mask:25.octa 0x000102030405060708090a0b0c0d0e0f26.Lpoly:27.octa 0xc200000000000000000000000000000128.Ltwo_one:29.octa 0x000000010000000000000000000000013031#define DATA %xmm032#define SHASH %xmm133#define T1 %xmm234#define T2 %xmm335#define T3 %xmm436#define BSWAP %xmm537#define IN1 %xmm63839.text4041/*42* __clmul_gf128mul_ble: internal ABI43* input:44* DATA: operand145* SHASH: operand2, hash_key << 1 mod poly46* output:47* DATA: operand1 * operand2 mod poly48* changed:49* T150* T251* T352*/53__clmul_gf128mul_ble:54movaps DATA, T155pshufd $0b01001110, DATA, T256pshufd $0b01001110, SHASH, T357pxor DATA, T258pxor SHASH, T35960PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b061PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b162PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0)63pxor DATA, T264pxor T1, T2 # T2 = a0 * b1 + a1 * b06566movaps T2, T367pslldq $8, T368psrldq $8, T269pxor T3, DATA70pxor T2, T1 # <T1:DATA> is result of71# carry-less multiplication7273# first phase of the reduction74movaps DATA, T375psllq $1, T376pxor DATA, T377psllq $5, T378pxor DATA, T379psllq $57, T380movaps T3, T281pslldq $8, T282psrldq $8, T383pxor T2, DATA84pxor T3, T18586# second phase of the reduction87movaps DATA, T288psrlq $5, T289pxor DATA, T290psrlq $1, T291pxor DATA, T292psrlq $1, T293pxor T2, T194pxor T1, DATA95ret9697/* void clmul_ghash_mul(char *dst, const be128 *shash) */98ENTRY(clmul_ghash_mul)99movups (%rdi), DATA100movups (%rsi), SHASH101movaps .Lbswap_mask, BSWAP102PSHUFB_XMM BSWAP DATA103call __clmul_gf128mul_ble104PSHUFB_XMM BSWAP DATA105movups DATA, (%rdi)106ret107108/*109* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,110* const be128 *shash);111*/112ENTRY(clmul_ghash_update)113cmp $16, %rdx114jb .Lupdate_just_ret # check length115movaps .Lbswap_mask, BSWAP116movups (%rdi), DATA117movups (%rcx), SHASH118PSHUFB_XMM BSWAP DATA119.align 4120.Lupdate_loop:121movups (%rsi), IN1122PSHUFB_XMM BSWAP IN1123pxor IN1, DATA124call __clmul_gf128mul_ble125sub $16, %rdx126add $16, %rsi127cmp $16, %rdx128jge .Lupdate_loop129PSHUFB_XMM BSWAP DATA130movups DATA, (%rdi)131.Lupdate_just_ret:132ret133134/*135* void clmul_ghash_setkey(be128 *shash, const u8 *key);136*137* Calculate hash_key << 1 mod poly138*/139ENTRY(clmul_ghash_setkey)140movaps .Lbswap_mask, BSWAP141movups (%rsi), %xmm0142PSHUFB_XMM BSWAP %xmm0143movaps %xmm0, %xmm1144psllq $1, %xmm0145psrlq $63, %xmm1146movaps %xmm1, %xmm2147pslldq $8, %xmm1148psrldq $8, %xmm2149por %xmm1, %xmm0150# reduction151pshufd $0b00100100, %xmm2, %xmm1152pcmpeqd .Ltwo_one, %xmm1153pand .Lpoly, %xmm1154pxor %xmm1, %xmm0155movups %xmm0, (%rdi)156ret157158159