/* SPDX-License-Identifier: GPL-2.0-only */1/*2* Accelerated GHASH implementation with Intel PCLMULQDQ-NI3* instructions. This file contains accelerated part of ghash4* implementation. More information about PCLMULQDQ can be found at:5*6* https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf7*8* Copyright (c) 2009 Intel Corp.9* Author: Huang Ying <[email protected]>10* Vinodh Gopal11* Erdinc Ozturk12* Deniz Karakoyunlu13*/1415#include <linux/linkage.h>16#include <asm/frame.h>1718.section .rodata.cst16.bswap_mask, "aM", @progbits, 1619.align 1620.Lbswap_mask:21.octa 0x000102030405060708090a0b0c0d0e0f2223#define ACC %xmm024#define KEY %xmm125#define T1 %xmm226#define T2 %xmm327#define T3 %xmm428#define BSWAP %xmm529#define IN1 %xmm63031.text3233/*34* __clmul_gf128mul_ble: internal ABI35* input:36* ACC: operand137* KEY: operand2, hash_key << 1 mod poly38* output:39* ACC: operand1 * operand2 mod poly40* changed:41* T142* T243* T344*/45SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)46movaps ACC, T147pshufd $0b01001110, ACC, T248pshufd $0b01001110, KEY, T349pxor ACC, T250pxor KEY, T35152pclmulqdq $0x00, KEY, ACC # ACC = a0 * b053pclmulqdq $0x11, KEY, T1 # T1 = a1 * b154pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)55pxor ACC, T256pxor T1, T2 # T2 = a0 * b1 + a1 * b05758movaps T2, T359pslldq $8, T360psrldq $8, T261pxor T3, ACC62pxor T2, T1 # <T1:ACC> is result of63# carry-less multiplication6465# first phase of the reduction66movaps ACC, T367psllq $1, T368pxor ACC, T369psllq $5, T370pxor ACC, T371psllq $57, T372movaps T3, T273pslldq $8, T274psrldq $8, T375pxor T2, ACC76pxor T3, T17778# second phase of the reduction79movaps ACC, T280psrlq $5, T281pxor ACC, T282psrlq $1, T283pxor ACC, T284psrlq $1, T285pxor T2, T186pxor T1, ACC87RET88SYM_FUNC_END(__clmul_gf128mul_ble)8990/*91* void polyval_mul_pclmul(struct polyval_elem *a,92* const struct polyval_elem *b)93*/94SYM_FUNC_START(polyval_mul_pclmul)95FRAME_BEGIN96movups (%rdi), ACC97movups (%rsi), KEY98call __clmul_gf128mul_ble99movups ACC, (%rdi)100FRAME_END101RET102SYM_FUNC_END(polyval_mul_pclmul)103104/*105* void ghash_blocks_pclmul(struct polyval_elem *acc,106* const struct polyval_elem *key,107* const u8 *data, size_t nblocks)108*/109SYM_FUNC_START(ghash_blocks_pclmul)110FRAME_BEGIN111movaps .Lbswap_mask(%rip), BSWAP112movups (%rdi), ACC113movups (%rsi), KEY114.align 4115.Lnext_block:116movups (%rdx), IN1117pshufb BSWAP, IN1118pxor IN1, ACC119call __clmul_gf128mul_ble120add $16, %rdx121dec %rcx122jnz .Lnext_block123movups ACC, (%rdi)124FRAME_END125RET126SYM_FUNC_END(ghash_blocks_pclmul)127128129