Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crypto/x86/ghash-pclmul.S
170891 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
4
* instructions. This file contains accelerated part of ghash
5
* implementation. More information about PCLMULQDQ can be found at:
6
*
7
* https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf
8
*
9
* Copyright (c) 2009 Intel Corp.
10
* Author: Huang Ying <[email protected]>
11
* Vinodh Gopal
12
* Erdinc Ozturk
13
* Deniz Karakoyunlu
14
*/
15
16
#include <linux/linkage.h>
17
#include <asm/frame.h>
18
19
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
20
.align 16
21
.Lbswap_mask:
22
.octa 0x000102030405060708090a0b0c0d0e0f
23
24
#define ACC %xmm0
25
#define KEY %xmm1
26
#define T1 %xmm2
27
#define T2 %xmm3
28
#define T3 %xmm4
29
#define BSWAP %xmm5
30
#define IN1 %xmm6
31
32
.text
33
34
/*
35
* __clmul_gf128mul_ble: internal ABI
36
* input:
37
* ACC: operand1
38
* KEY: operand2, hash_key << 1 mod poly
39
* output:
40
* ACC: operand1 * operand2 mod poly
41
* changed:
42
* T1
43
* T2
44
* T3
45
*/
46
SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
47
movaps ACC, T1
48
pshufd $0b01001110, ACC, T2
49
pshufd $0b01001110, KEY, T3
50
pxor ACC, T2
51
pxor KEY, T3
52
53
pclmulqdq $0x00, KEY, ACC # ACC = a0 * b0
54
pclmulqdq $0x11, KEY, T1 # T1 = a1 * b1
55
pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
56
pxor ACC, T2
57
pxor T1, T2 # T2 = a0 * b1 + a1 * b0
58
59
movaps T2, T3
60
pslldq $8, T3
61
psrldq $8, T2
62
pxor T3, ACC
63
pxor T2, T1 # <T1:ACC> is result of
64
# carry-less multiplication
65
66
# first phase of the reduction
67
movaps ACC, T3
68
psllq $1, T3
69
pxor ACC, T3
70
psllq $5, T3
71
pxor ACC, T3
72
psllq $57, T3
73
movaps T3, T2
74
pslldq $8, T2
75
psrldq $8, T3
76
pxor T2, ACC
77
pxor T3, T1
78
79
# second phase of the reduction
80
movaps ACC, T2
81
psrlq $5, T2
82
pxor ACC, T2
83
psrlq $1, T2
84
pxor ACC, T2
85
psrlq $1, T2
86
pxor T2, T1
87
pxor T1, ACC
88
RET
89
SYM_FUNC_END(__clmul_gf128mul_ble)
90
91
/*
92
* void polyval_mul_pclmul(struct polyval_elem *a,
93
* const struct polyval_elem *b)
94
*/
95
SYM_FUNC_START(polyval_mul_pclmul)
96
FRAME_BEGIN
97
movups (%rdi), ACC
98
movups (%rsi), KEY
99
call __clmul_gf128mul_ble
100
movups ACC, (%rdi)
101
FRAME_END
102
RET
103
SYM_FUNC_END(polyval_mul_pclmul)
104
105
/*
106
* void ghash_blocks_pclmul(struct polyval_elem *acc,
107
* const struct polyval_elem *key,
108
* const u8 *data, size_t nblocks)
109
*/
110
SYM_FUNC_START(ghash_blocks_pclmul)
111
FRAME_BEGIN
112
movaps .Lbswap_mask(%rip), BSWAP
113
movups (%rdi), ACC
114
movups (%rsi), KEY
115
.align 4
116
.Lnext_block:
117
movups (%rdx), IN1
118
pshufb BSWAP, IN1
119
pxor IN1, ACC
120
call __clmul_gf128mul_ble
121
add $16, %rdx
122
dec %rcx
123
jnz .Lnext_block
124
movups ACC, (%rdi)
125
FRAME_END
126
RET
127
SYM_FUNC_END(ghash_blocks_pclmul)
128
129