Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_blake2/impl/blake2s-round.h
12 views
1
/*
2
BLAKE2 reference source code package - optimized C implementations
3
4
Written in 2012 by Samuel Neves <[email protected]>
5
6
To the extent possible under law, the author(s) have dedicated all copyright
7
and related and neighboring rights to this software to the public domain
8
worldwide. This software is distributed without any warranty.
9
10
You should have received a copy of the CC0 Public Domain Dedication along with
11
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
12
*/
13
#pragma once
14
#ifndef __BLAKE2S_ROUND_H__
15
#define __BLAKE2S_ROUND_H__
16
17
#define LOAD(p) _mm_load_si128( (__m128i *)(p) )
18
#define STORE(p,r) _mm_store_si128((__m128i *)(p), r)
19
20
#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) )
21
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
22
23
#define TOF(reg) _mm_castsi128_ps((reg))
24
#define TOI(reg) _mm_castps_si128((reg))
25
26
#define LIKELY(x) __builtin_expect((x),1)
27
28
29
/* Microarchitecture-specific macros */
30
#ifndef HAVE_XOP
31
#ifdef HAVE_SSSE3
32
#define _mm_roti_epi32(r, c) ( \
33
(8==-(c)) ? _mm_shuffle_epi8(r,r8) \
34
: (16==-(c)) ? _mm_shuffle_epi8(r,r16) \
35
: _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) )
36
#else
37
#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) ))
38
#endif
39
#else
40
/* ... */
41
#endif
42
43
44
#define G1(row1,row2,row3,row4,buf) \
45
row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
46
row4 = _mm_xor_si128( row4, row1 ); \
47
row4 = _mm_roti_epi32(row4, -16); \
48
row3 = _mm_add_epi32( row3, row4 ); \
49
row2 = _mm_xor_si128( row2, row3 ); \
50
row2 = _mm_roti_epi32(row2, -12);
51
52
#define G2(row1,row2,row3,row4,buf) \
53
row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
54
row4 = _mm_xor_si128( row4, row1 ); \
55
row4 = _mm_roti_epi32(row4, -8); \
56
row3 = _mm_add_epi32( row3, row4 ); \
57
row2 = _mm_xor_si128( row2, row3 ); \
58
row2 = _mm_roti_epi32(row2, -7);
59
60
#define DIAGONALIZE(row1,row2,row3,row4) \
61
row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \
62
row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \
63
row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(0,3,2,1) );
64
65
#define UNDIAGONALIZE(row1,row2,row3,row4) \
66
row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(0,3,2,1) ); \
67
row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \
68
row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(2,1,0,3) );
69
70
#if defined(HAVE_XOP)
71
#include "blake2s-load-xop.h"
72
#elif defined(HAVE_SSE4_1)
73
#include "blake2s-load-sse41.h"
74
#else
75
#include "blake2s-load-sse2.h"
76
#endif
77
78
#define ROUND(r) \
79
LOAD_MSG_ ##r ##_1(buf1); \
80
G1(row1,row2,row3,row4,buf1); \
81
LOAD_MSG_ ##r ##_2(buf2); \
82
G2(row1,row2,row3,row4,buf2); \
83
DIAGONALIZE(row1,row2,row3,row4); \
84
LOAD_MSG_ ##r ##_3(buf3); \
85
G1(row1,row2,row3,row4,buf3); \
86
LOAD_MSG_ ##r ##_4(buf4); \
87
G2(row1,row2,row3,row4,buf4); \
88
UNDIAGONALIZE(row1,row2,row3,row4); \
89
90
#endif
91
92
93