Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/crypto/ghash-clmulni-intel_asm.S
26442 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
4
* instructions. This file contains accelerated part of ghash
5
* implementation. More information about PCLMULQDQ can be found at:
6
*
7
* https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf
8
*
9
* Copyright (c) 2009 Intel Corp.
10
* Author: Huang Ying <[email protected]>
11
* Vinodh Gopal
12
* Erdinc Ozturk
13
* Deniz Karakoyunlu
14
*/
15
16
#include <linux/linkage.h>
17
#include <asm/frame.h>
18
19
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
20
.align 16
21
.Lbswap_mask:
22
.octa 0x000102030405060708090a0b0c0d0e0f
23
24
#define DATA %xmm0
25
#define SHASH %xmm1
26
#define T1 %xmm2
27
#define T2 %xmm3
28
#define T3 %xmm4
29
#define BSWAP %xmm5
30
#define IN1 %xmm6
31
32
.text
33
34
/*
35
* __clmul_gf128mul_ble: internal ABI
36
* input:
37
* DATA: operand1
38
* SHASH: operand2, hash_key << 1 mod poly
39
* output:
40
* DATA: operand1 * operand2 mod poly
41
* changed:
42
* T1
43
* T2
44
* T3
45
*/
46
SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
47
movaps DATA, T1
48
pshufd $0b01001110, DATA, T2
49
pshufd $0b01001110, SHASH, T3
50
pxor DATA, T2
51
pxor SHASH, T3
52
53
pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0
54
pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1
55
pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
56
pxor DATA, T2
57
pxor T1, T2 # T2 = a0 * b1 + a1 * b0
58
59
movaps T2, T3
60
pslldq $8, T3
61
psrldq $8, T2
62
pxor T3, DATA
63
pxor T2, T1 # <T1:DATA> is result of
64
# carry-less multiplication
65
66
# first phase of the reduction
67
movaps DATA, T3
68
psllq $1, T3
69
pxor DATA, T3
70
psllq $5, T3
71
pxor DATA, T3
72
psllq $57, T3
73
movaps T3, T2
74
pslldq $8, T2
75
psrldq $8, T3
76
pxor T2, DATA
77
pxor T3, T1
78
79
# second phase of the reduction
80
movaps DATA, T2
81
psrlq $5, T2
82
pxor DATA, T2
83
psrlq $1, T2
84
pxor DATA, T2
85
psrlq $1, T2
86
pxor T2, T1
87
pxor T1, DATA
88
RET
89
SYM_FUNC_END(__clmul_gf128mul_ble)
90
91
/* void clmul_ghash_mul(char *dst, const le128 *shash) */
92
SYM_FUNC_START(clmul_ghash_mul)
93
FRAME_BEGIN
94
movups (%rdi), DATA
95
movups (%rsi), SHASH
96
movaps .Lbswap_mask(%rip), BSWAP
97
pshufb BSWAP, DATA
98
call __clmul_gf128mul_ble
99
pshufb BSWAP, DATA
100
movups DATA, (%rdi)
101
FRAME_END
102
RET
103
SYM_FUNC_END(clmul_ghash_mul)
104
105
/*
106
* int clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
107
* const le128 *shash);
108
*/
109
SYM_FUNC_START(clmul_ghash_update)
110
FRAME_BEGIN
111
cmp $16, %rdx
112
jb .Lupdate_just_ret # check length
113
movaps .Lbswap_mask(%rip), BSWAP
114
movups (%rdi), DATA
115
movups (%rcx), SHASH
116
pshufb BSWAP, DATA
117
.align 4
118
.Lupdate_loop:
119
movups (%rsi), IN1
120
pshufb BSWAP, IN1
121
pxor IN1, DATA
122
call __clmul_gf128mul_ble
123
sub $16, %rdx
124
add $16, %rsi
125
cmp $16, %rdx
126
jge .Lupdate_loop
127
pshufb BSWAP, DATA
128
movups DATA, (%rdi)
129
.Lupdate_just_ret:
130
mov %rdx, %rax
131
FRAME_END
132
RET
133
SYM_FUNC_END(clmul_ghash_update)
134
135