Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/lzma/src/XzCrc64Opt.c
4253 views
1
/* XzCrc64Opt.c -- CRC64 calculation (optimized functions)
2
2023-12-08 : Igor Pavlov : Public domain */
3
4
#include "Precomp.h"
5
6
#include "CpuArch.h"
7
8
#if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1
9
10
// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
11
// #define Z7_CRC64_DEBUG_BE
12
#ifdef Z7_CRC64_DEBUG_BE
13
#undef MY_CPU_LE
14
#define MY_CPU_BE
15
#endif
16
17
#if defined(MY_CPU_64BIT)
18
#define Z7_CRC64_USE_64BIT
19
#endif
20
21
// the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c
22
#ifdef Z7_CRC64_NUM_TABLES
23
#define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES
24
#else
25
#define Z7_CRC64_NUM_TABLES_USE 12
26
#endif
27
28
#if Z7_CRC64_NUM_TABLES_USE % 4 || \
29
Z7_CRC64_NUM_TABLES_USE < 4 || \
30
Z7_CRC64_NUM_TABLES_USE > 4 * 4
31
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
32
#endif
33
34
35
#ifndef MY_CPU_BE
36
37
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
38
39
#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
40
41
#define Q64LE(n, d) \
42
( (table + ((n) * 8 + 7) * 0x100)[((d) ) & 0xFF] \
43
^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \
44
^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \
45
^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \
46
^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \
47
^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \
48
^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \
49
^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] )
50
51
#define R64(a) *((const UInt64 *)(const void *)p + (a))
52
53
#else
54
55
#define Q32LE(n, d) \
56
( (table + ((n) * 4 + 3) * 0x100)[((d) ) & 0xFF] \
57
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
58
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
59
^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
60
61
#define R32(a) *((const UInt32 *)(const void *)p + (a))
62
63
#endif
64
65
66
#define CRC64_FUNC_PRE_LE2(step) \
67
UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
68
69
#define CRC64_FUNC_PRE_LE(step) \
70
CRC64_FUNC_PRE_LE2(step); \
71
CRC64_FUNC_PRE_LE2(step)
72
73
CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE)
74
{
75
const Byte *p = (const Byte *)data;
76
const Byte *lim;
77
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
78
v = CRC64_UPDATE_BYTE_2(v, *p);
79
lim = p + size;
80
if (size >= Z7_CRC64_NUM_TABLES_USE)
81
{
82
lim -= Z7_CRC64_NUM_TABLES_USE;
83
do
84
{
85
#if Z7_CRC64_NUM_TABLES_USE == 4
86
const UInt32 d = (UInt32)v ^ R32(0);
87
v = (v >> 32) ^ Q32LE(0, d);
88
#elif Z7_CRC64_NUM_TABLES_USE == 8
89
#ifdef Z7_CRC64_USE_64BIT
90
v ^= R64(0);
91
v = Q64LE(0, v);
92
#else
93
UInt32 v0, v1;
94
v0 = (UInt32)v ^ R32(0);
95
v1 = (UInt32)(v >> 32) ^ R32(1);
96
v = Q32LE(1, v0) ^ Q32LE(0, v1);
97
#endif
98
#elif Z7_CRC64_NUM_TABLES_USE == 12
99
UInt32 w;
100
UInt32 v0, v1;
101
v0 = (UInt32)v ^ R32(0);
102
v1 = (UInt32)(v >> 32) ^ R32(1);
103
w = R32(2);
104
v = Q32LE(0, w);
105
v ^= Q32LE(2, v0) ^ Q32LE(1, v1);
106
#elif Z7_CRC64_NUM_TABLES_USE == 16
107
#ifdef Z7_CRC64_USE_64BIT
108
UInt64 w;
109
UInt64 x;
110
w = R64(1); x = Q64LE(0, w);
111
v ^= R64(0); v = x ^ Q64LE(1, v);
112
#else
113
UInt32 v0, v1;
114
UInt32 r0, r1;
115
v0 = (UInt32)v ^ R32(0);
116
v1 = (UInt32)(v >> 32) ^ R32(1);
117
r0 = R32(2);
118
r1 = R32(3);
119
v = Q32LE(1, r0) ^ Q32LE(0, r1);
120
v ^= Q32LE(3, v0) ^ Q32LE(2, v1);
121
#endif
122
#else
123
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
124
#endif
125
p += Z7_CRC64_NUM_TABLES_USE;
126
}
127
while (p <= lim);
128
lim += Z7_CRC64_NUM_TABLES_USE;
129
}
130
for (; p < lim; p++)
131
v = CRC64_UPDATE_BYTE_2(v, *p);
132
return v;
133
}
134
135
#undef CRC64_UPDATE_BYTE_2
136
#undef R32
137
#undef R64
138
#undef Q32LE
139
#undef Q64LE
140
#undef CRC64_FUNC_PRE_LE
141
#undef CRC64_FUNC_PRE_LE2
142
143
#endif
144
145
146
147
148
#ifndef MY_CPU_LE
149
150
#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8))
151
152
#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
153
154
#define Q64BE(n, d) \
155
( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \
156
^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
157
^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
158
^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \
159
^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \
160
^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \
161
^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \
162
^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] )
163
164
#ifdef Z7_CRC64_DEBUG_BE
165
#define R64BE(a) GetBe64a((const UInt64 *)(const void *)p + (a))
166
#else
167
#define R64BE(a) *((const UInt64 *)(const void *)p + (a))
168
#endif
169
170
#else
171
172
#define Q32BE(n, d) \
173
( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \
174
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
175
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
176
^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
177
178
#ifdef Z7_CRC64_DEBUG_BE
179
#define R32BE(a) GetBe32a((const UInt32 *)(const void *)p + (a))
180
#else
181
#define R32BE(a) *((const UInt32 *)(const void *)p + (a))
182
#endif
183
184
#endif
185
186
#define CRC64_FUNC_PRE_BE2(step) \
187
UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
188
189
#define CRC64_FUNC_PRE_BE(step) \
190
CRC64_FUNC_PRE_BE2(step); \
191
CRC64_FUNC_PRE_BE2(step)
192
193
CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE)
194
{
195
const Byte *p = (const Byte *)data;
196
const Byte *lim;
197
v = Z7_BSWAP64(v);
198
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
199
v = CRC64_UPDATE_BYTE_2_BE(v, *p);
200
lim = p + size;
201
if (size >= Z7_CRC64_NUM_TABLES_USE)
202
{
203
lim -= Z7_CRC64_NUM_TABLES_USE;
204
do
205
{
206
#if Z7_CRC64_NUM_TABLES_USE == 4
207
const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0);
208
v = (v << 32) ^ Q32BE(0, d);
209
#elif Z7_CRC64_NUM_TABLES_USE == 12
210
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
211
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
212
const UInt32 w = R32BE(2);
213
v = Q32BE(0, w);
214
v ^= Q32BE(2, d1) ^ Q32BE(1, d0);
215
216
#elif Z7_CRC64_NUM_TABLES_USE == 8
217
#ifdef Z7_CRC64_USE_64BIT
218
v ^= R64BE(0);
219
v = Q64BE(0, v);
220
#else
221
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
222
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
223
v = Q32BE(1, d1) ^ Q32BE(0, d0);
224
#endif
225
#elif Z7_CRC64_NUM_TABLES_USE == 16
226
#ifdef Z7_CRC64_USE_64BIT
227
const UInt64 w = R64BE(1);
228
v ^= R64BE(0);
229
v = Q64BE(0, w) ^ Q64BE(1, v);
230
#else
231
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
232
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
233
const UInt32 w1 = R32BE(2);
234
const UInt32 w0 = R32BE(3);
235
v = Q32BE(1, w1) ^ Q32BE(0, w0);
236
v ^= Q32BE(3, d1) ^ Q32BE(2, d0);
237
#endif
238
#elif
239
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
240
#endif
241
p += Z7_CRC64_NUM_TABLES_USE;
242
}
243
while (p <= lim);
244
lim += Z7_CRC64_NUM_TABLES_USE;
245
}
246
for (; p < lim; p++)
247
v = CRC64_UPDATE_BYTE_2_BE(v, *p);
248
return Z7_BSWAP64(v);
249
}
250
251
#undef CRC64_UPDATE_BYTE_2_BE
252
#undef R32BE
253
#undef R64BE
254
#undef Q32BE
255
#undef Q64BE
256
#undef CRC64_FUNC_PRE_BE
257
#undef CRC64_FUNC_PRE_BE2
258
259
#endif
260
#undef Z7_CRC64_NUM_TABLES_USE
261
#endif
262
263