Path: blob/master/arch/x86/crypto/ghash-clmulni-intel_asm.S
26442 views
/* SPDX-License-Identifier: GPL-2.0-only */1/*2* Accelerated GHASH implementation with Intel PCLMULQDQ-NI3* instructions. This file contains accelerated part of ghash4* implementation. More information about PCLMULQDQ can be found at:5*6* https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf7*8* Copyright (c) 2009 Intel Corp.9* Author: Huang Ying <[email protected]>10* Vinodh Gopal11* Erdinc Ozturk12* Deniz Karakoyunlu13*/1415#include <linux/linkage.h>16#include <asm/frame.h>1718.section .rodata.cst16.bswap_mask, "aM", @progbits, 1619.align 1620.Lbswap_mask:21.octa 0x000102030405060708090a0b0c0d0e0f2223#define DATA %xmm024#define SHASH %xmm125#define T1 %xmm226#define T2 %xmm327#define T3 %xmm428#define BSWAP %xmm529#define IN1 %xmm63031.text3233/*34* __clmul_gf128mul_ble: internal ABI35* input:36* DATA: operand137* SHASH: operand2, hash_key << 1 mod poly38* output:39* DATA: operand1 * operand2 mod poly40* changed:41* T142* T243* T344*/45SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)46movaps DATA, T147pshufd $0b01001110, DATA, T248pshufd $0b01001110, SHASH, T349pxor DATA, T250pxor SHASH, T35152pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b053pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b154pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)55pxor DATA, T256pxor T1, T2 # T2 = a0 * b1 + a1 * b05758movaps T2, T359pslldq $8, T360psrldq $8, T261pxor T3, DATA62pxor T2, T1 # <T1:DATA> is result of63# carry-less multiplication6465# first phase of the reduction66movaps DATA, T367psllq $1, T368pxor DATA, T369psllq $5, T370pxor DATA, T371psllq $57, T372movaps T3, T273pslldq $8, T274psrldq $8, T375pxor T2, DATA76pxor T3, T17778# second phase of the reduction79movaps DATA, T280psrlq $5, T281pxor DATA, T282psrlq $1, T283pxor DATA, T284psrlq $1, T285pxor T2, T186pxor T1, DATA87RET88SYM_FUNC_END(__clmul_gf128mul_ble)8990/* void clmul_ghash_mul(char *dst, const le128 *shash) */91SYM_FUNC_START(clmul_ghash_mul)92FRAME_BEGIN93movups (%rdi), DATA94movups (%rsi), SHASH95movaps .Lbswap_mask(%rip), BSWAP96pshufb BSWAP, DATA97call __clmul_gf128mul_ble98pshufb BSWAP, DATA99movups DATA, (%rdi)100FRAME_END101RET102SYM_FUNC_END(clmul_ghash_mul)103104/*105* int clmul_ghash_update(char *dst, const char *src, unsigned int srclen,106* const le128 *shash);107*/108SYM_FUNC_START(clmul_ghash_update)109FRAME_BEGIN110cmp $16, %rdx111jb .Lupdate_just_ret # check length112movaps .Lbswap_mask(%rip), BSWAP113movups (%rdi), DATA114movups (%rcx), SHASH115pshufb BSWAP, DATA116.align 4117.Lupdate_loop:118movups (%rsi), IN1119pshufb BSWAP, IN1120pxor IN1, DATA121call __clmul_gf128mul_ble122sub $16, %rdx123add $16, %rsi124cmp $16, %rdx125jge .Lupdate_loop126pshufb BSWAP, DATA127movups DATA, (%rdi)128.Lupdate_just_ret:129mov %rdx, %rax130FRAME_END131RET132SYM_FUNC_END(clmul_ghash_update)133134135