CoCalc -- sph

GitHub Repository: litecoincash-project/cpuminer-multi
Path: blob/master/sha3/sph_types.h
¹²⁹⁸ views
1
/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */
2
/**
3
 * Basic type definitions.
4
 *
5
 * This header file defines the generic integer types that will be used
6
 * for the implementation of hash functions; it also contains helper
7
 * functions which encode and decode multi-byte integer values, using
8
 * either little-endian or big-endian conventions.
9
 *
10
 * This file contains a compile-time test on the size of a byte
11
 * (the <code>unsigned char</code> C type). If bytes are not octets,
12
 * i.e. if they do not have a size of exactly 8 bits, then compilation
13
 * is aborted. Architectures where bytes are not octets are relatively
14
 * rare, even in the embedded devices market. We forbid non-octet bytes
15
 * because there is no clear convention on how octet streams are encoded
16
 * on such systems.
17
 *
18
 * ==========================(LICENSE BEGIN)============================
19
 *
20
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
21
 * 
22
 * Permission is hereby granted, free of charge, to any person obtaining
23
 * a copy of this software and associated documentation files (the
24
 * "Software"), to deal in the Software without restriction, including
25
 * without limitation the rights to use, copy, modify, merge, publish,
26
 * distribute, sublicense, and/or sell copies of the Software, and to
27
 * permit persons to whom the Software is furnished to do so, subject to
28
 * the following conditions:
29
 * 
30
 * The above copyright notice and this permission notice shall be
31
 * included in all copies or substantial portions of the Software.
32
 * 
33
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
34
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
35
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
36
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
37
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
38
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
39
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40
 *
41
 * ===========================(LICENSE END)=============================
42
 *
43
 * @file     sph_types.h
44
 * @author   Thomas Pornin <[email protected]>
45
 */
46

47
#ifndef SPH_TYPES_H__
48
#define SPH_TYPES_H__
49

50
#include <limits.h>
51

52
/*
53
 * All our I/O functions are defined over octet streams. We do not know
54
 * how to handle input data if bytes are not octets.
55
 */
56
#if CHAR_BIT != 8
57
#error This code requires 8-bit bytes
58
#endif
59

60
/* ============= BEGIN documentation block for Doxygen ============ */
61

62
#ifdef DOXYGEN_IGNORE
63

64
/** @mainpage sphlib C code documentation
65
 *
66
 * @section overview Overview
67
 *
68
 * <code>sphlib</code> is a library which contains implementations of
69
 * various cryptographic hash functions. These pages have been generated
70
 * with <a href="http://www.doxygen.org/index.html">doxygen</a> and
71
 * document the API for the C implementations.
72
 *
73
 * The API is described in appropriate header files, which are available
74
 * in the "Files" section. Each hash function family has its own header,
75
 * whose name begins with <code>"sph_"</code> and contains the family
76
 * name. For instance, the API for the RIPEMD hash functions is available
77
 * in the header file <code>sph_ripemd.h</code>.
78
 *
79
 * @section principles API structure and conventions
80
 *
81
 * @subsection io Input/output conventions
82
 *
83
 * In all generality, hash functions operate over strings of bits.
84
 * Individual bits are rarely encountered in C programming or actual
85
 * communication protocols; most protocols converge on the ubiquitous
86
 * "octet" which is a group of eight bits. Data is thus expressed as a
87
 * stream of octets. The C programming language contains the notion of a
88
 * "byte", which is a data unit managed under the type <code>"unsigned
89
 * char"</code>. The C standard prescribes that a byte should hold at
90
 * least eight bits, but possibly more. Most modern architectures, even
91
 * in the embedded world, feature eight-bit bytes, i.e. map bytes to
92
 * octets.
93
 *
94
 * Nevertheless, for some of the implemented hash functions, an extra
95
 * API has been added, which allows the input of arbitrary sequences of
96
 * bits: when the computation is about to be closed, 1 to 7 extra bits
97
 * can be added. The functions for which this API is implemented include
98
 * the SHA-2 functions and all SHA-3 candidates.
99
 *
100
 * <code>sphlib</code> defines hash function which may hash octet streams,
101
 * i.e. streams of bits where the number of bits is a multiple of eight.
102
 * The data input functions in the <code>sphlib</code> API expect data
103
 * as anonymous pointers (<code>"const void *"</code>) with a length
104
 * (of type <code>"size_t"</code>) which gives the input data chunk length
105
 * in bytes. A byte is assumed to be an octet; the <code>sph_types.h</code>
106
 * header contains a compile-time test which prevents compilation on
107
 * architectures where this property is not met.
108
 *
109
 * The hash function output is also converted into bytes. All currently
110
 * implemented hash functions have an output width which is a multiple of
111
 * eight, and this is likely to remain true for new designs.
112
 *
113
 * Most hash functions internally convert input data into 32-bit of 64-bit
114
 * words, using either little-endian or big-endian conversion. The hash
115
 * output also often consists of such words, which are encoded into output
116
 * bytes with a similar endianness convention. Some hash functions have
117
 * been only loosely specified on that subject; when necessary,
118
 * <code>sphlib</code> has been tested against published "reference"
119
 * implementations in order to use the same conventions.
120
 *
121
 * @subsection shortname Function short name
122
 *
123
 * Each implemented hash function has a "short name" which is used
124
 * internally to derive the identifiers for the functions and context
125
 * structures which the function uses. For instance, MD5 has the short
126
 * name <code>"md5"</code>. Short names are listed in the next section,
127
 * for the implemented hash functions. In subsequent sections, the
128
 * short name will be assumed to be <code>"XXX"</code>: replace with the
129
 * actual hash function name to get the C identifier.
130
 *
131
 * Note: some functions within the same family share the same core
132
 * elements, such as update function or context structure. Correspondingly,
133
 * some of the defined types or functions may actually be macros which
134
 * transparently evaluate to another type or function name.
135
 *
136
 * @subsection context Context structure
137
 *
138
 * Each implemented hash fonction has its own context structure, available
139
 * under the type name <code>"sph_XXX_context"</code> for the hash function
140
 * with short name <code>"XXX"</code>. This structure holds all needed
141
 * state for a running hash computation.
142
 *
143
 * The contents of these structures are meant to be opaque, and private
144
 * to the implementation. However, these contents are specified in the
145
 * header files so that application code which uses <code>sphlib</code>
146
 * may access the size of those structures.
147
 *
148
 * The caller is responsible for allocating the context structure,
149
 * whether by dynamic allocation (<code>malloc()</code> or equivalent),
150
 * static allocation (a global permanent variable), as an automatic
151
 * variable ("on the stack"), or by any other mean which ensures proper
152
 * structure alignment. <code>sphlib</code> code performs no dynamic
153
 * allocation by itself.
154
 *
155
 * The context must be initialized before use, using the
156
 * <code>sph_XXX_init()</code> function. This function sets the context
157
 * state to proper initial values for hashing.
158
 *
159
 * Since all state data is contained within the context structure,
160
 * <code>sphlib</code> is thread-safe and reentrant: several hash
161
 * computations may be performed in parallel, provided that they do not
162
 * operate on the same context. Moreover, a running computation can be
163
 * cloned by copying the context (with a simple <code>memcpy()</code>):
164
 * the context and its clone are then independant and may be updated
165
 * with new data and/or closed without interfering with each other.
166
 * Similarly, a context structure can be moved in memory at will:
167
 * context structures contain no pointer, in particular no pointer to
168
 * themselves.
169
 *
170
 * @subsection dataio Data input
171
 *
172
 * Hashed data is input with the <code>sph_XXX()</code> fonction, which
173
 * takes as parameters a pointer to the context, a pointer to the data
174
 * to hash, and the number of data bytes to hash. The context is updated
175
 * with the new data.
176
 *
177
 * Data can be input in one or several calls, with arbitrary input lengths.
178
 * However, it is best, performance wise, to input data by relatively big
179
 * chunks (say a few kilobytes), because this allows <code>sphlib</code> to
180
 * optimize things and avoid internal copying.
181
 *
182
 * When all data has been input, the context can be closed with
183
 * <code>sph_XXX_close()</code>. The hash output is computed and written
184
 * into the provided buffer. The caller must take care to provide a
185
 * buffer of appropriate length; e.g., when using SHA-1, the output is
186
 * a 20-byte word, therefore the output buffer must be at least 20-byte
187
 * long.
188
 *
189
 * For some hash functions, the <code>sph_XXX_addbits_and_close()</code>
190
 * function can be used instead of <code>sph_XXX_close()</code>. This
191
 * function can take a few extra <strong>bits</strong> to be added at
192
 * the end of the input message. This allows hashing messages with a
193
 * bit length which is not a multiple of 8. The extra bits are provided
194
 * as an unsigned integer value, and a bit count. The bit count must be
195
 * between 0 and 7, inclusive. The extra bits are provided as bits 7 to
196
 * 0 (bits of numerical value 128, 64, 32... downto 0), in that order.
197
 * For instance, to add three bits of value 1, 1 and 0, the unsigned
198
 * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count
199
 * will be 3.
200
 *
201
 * The <code>SPH_SIZE_XXX</code> macro is defined for each hash function;
202
 * it evaluates to the function output size, expressed in bits. For instance,
203
 * <code>SPH_SIZE_sha1</code> evaluates to <code>160</code>.
204
 *
205
 * When closed, the context is automatically reinitialized and can be
206
 * immediately used for another computation. It is not necessary to call
207
 * <code>sph_XXX_init()</code> after a close. Note that
208
 * <code>sph_XXX_init()</code> can still be called to "reset" a context,
209
 * i.e. forget previously input data, and get back to the initial state.
210
 *
211
 * @subsection alignment Data alignment
212
 *
213
 * "Alignment" is a property of data, which is said to be "properly
214
 * aligned" when its emplacement in memory is such that the data can
215
 * be optimally read by full words. This depends on the type of access;
216
 * basically, some hash functions will read data by 32-bit or 64-bit
217
 * words. <code>sphlib</code> does not mandate such alignment for input
218
 * data, but using aligned data can substantially improve performance.
219
 *
220
 * As a rule, it is best to input data by chunks whose length (in bytes)
221
 * is a multiple of eight, and which begins at "generally aligned"
222
 * addresses, such as the base address returned by a call to
223
 * <code>malloc()</code>.
224
 *
225
 * @section functions Implemented functions
226
 *
227
 * We give here the list of implemented functions. They are grouped by
228
 * family; to each family corresponds a specific header file. Each
229
 * individual function has its associated "short name". Please refer to
230
 * the documentation for that header file to get details on the hash
231
 * function denomination and provenance.
232
 *
233
 * Note: the functions marked with a '(64)' in the list below are
234
 * available only if the C compiler provides an integer type of length
235
 * 64 bits or more. Such a type is mandatory in the latest C standard
236
 * (ISO 9899:1999, aka "C99") and is present in several older compilers
237
 * as well, so chances are that such a type is available.
238
 *
239
 * - HAVAL family: file <code>sph_haval.h</code>
240
 *   - HAVAL-128/3 (128-bit, 3 passes): short name: <code>haval128_3</code>
241
 *   - HAVAL-128/4 (128-bit, 4 passes): short name: <code>haval128_4</code>
242
 *   - HAVAL-128/5 (128-bit, 5 passes): short name: <code>haval128_5</code>
243
 *   - HAVAL-160/3 (160-bit, 3 passes): short name: <code>haval160_3</code>
244
 *   - HAVAL-160/4 (160-bit, 4 passes): short name: <code>haval160_4</code>
245
 *   - HAVAL-160/5 (160-bit, 5 passes): short name: <code>haval160_5</code>
246
 *   - HAVAL-192/3 (192-bit, 3 passes): short name: <code>haval192_3</code>
247
 *   - HAVAL-192/4 (192-bit, 4 passes): short name: <code>haval192_4</code>
248
 *   - HAVAL-192/5 (192-bit, 5 passes): short name: <code>haval192_5</code>
249
 *   - HAVAL-224/3 (224-bit, 3 passes): short name: <code>haval224_3</code>
250
 *   - HAVAL-224/4 (224-bit, 4 passes): short name: <code>haval224_4</code>
251
 *   - HAVAL-224/5 (224-bit, 5 passes): short name: <code>haval224_5</code>
252
 *   - HAVAL-256/3 (256-bit, 3 passes): short name: <code>haval256_3</code>
253
 *   - HAVAL-256/4 (256-bit, 4 passes): short name: <code>haval256_4</code>
254
 *   - HAVAL-256/5 (256-bit, 5 passes): short name: <code>haval256_5</code>
255
 * - MD2: file <code>sph_md2.h</code>, short name: <code>md2</code>
256
 * - MD4: file <code>sph_md4.h</code>, short name: <code>md4</code>
257
 * - MD5: file <code>sph_md5.h</code>, short name: <code>md5</code>
258
 * - PANAMA: file <code>sph_panama.h</code>, short name: <code>panama</code>
259
 * - RadioGatun family: file <code>sph_radiogatun.h</code>
260
 *   - RadioGatun[32]: short name: <code>radiogatun32</code>
261
 *   - RadioGatun[64]: short name: <code>radiogatun64</code> (64)
262
 * - RIPEMD family: file <code>sph_ripemd.h</code>
263
 *   - RIPEMD: short name: <code>ripemd</code>
264
 *   - RIPEMD-128: short name: <code>ripemd128</code>
265
 *   - RIPEMD-160: short name: <code>ripemd160</code>
266
 * - SHA-0: file <code>sph_sha0.h</code>, short name: <code>sha0</code>
267
 * - SHA-1: file <code>sph_sha1.h</code>, short name: <code>sha1</code>
268
 * - SHA-2 family, 32-bit hashes: file <code>sph_sha2.h</code>
269
 *   - SHA-224: short name: <code>sha224</code>
270
 *   - SHA-256: short name: <code>sha256</code>
271
 *   - SHA-384: short name: <code>sha384</code> (64)
272
 *   - SHA-512: short name: <code>sha512</code> (64)
273
 * - Tiger family: file <code>sph_tiger.h</code>
274
 *   - Tiger: short name: <code>tiger</code> (64)
275
 *   - Tiger2: short name: <code>tiger2</code> (64)
276
 * - WHIRLPOOL family: file <code>sph_whirlpool.h</code>
277
 *   - WHIRLPOOL-0: short name: <code>whirlpool0</code> (64)
278
 *   - WHIRLPOOL-1: short name: <code>whirlpool1</code> (64)
279
 *   - WHIRLPOOL: short name: <code>whirlpool</code> (64)
280
 *
281
 * The fourteen second-round SHA-3 candidates are also implemented;
282
 * when applicable, the implementations follow the "final" specifications
283
 * as published for the third round of the SHA-3 competition (BLAKE,
284
 * Groestl, JH, Keccak and Skein have been tweaked for third round).
285
 *
286
 * - BLAKE family: file <code>sph_blake.h</code>
287
 *   - BLAKE-224: short name: <code>blake224</code>
288
 *   - BLAKE-256: short name: <code>blake256</code>
289
 *   - BLAKE-384: short name: <code>blake384</code>
290
 *   - BLAKE-512: short name: <code>blake512</code>
291
 * - BMW (Blue Midnight Wish) family: file <code>sph_bmw.h</code>
292
 *   - BMW-224: short name: <code>bmw224</code>
293
 *   - BMW-256: short name: <code>bmw256</code>
294
 *   - BMW-384: short name: <code>bmw384</code> (64)
295
 *   - BMW-512: short name: <code>bmw512</code> (64)
296
 * - CubeHash family: file <code>sph_cubehash.h</code> (specified as
297
 *   CubeHash16/32 in the CubeHash specification)
298
 *   - CubeHash-224: short name: <code>cubehash224</code>
299
 *   - CubeHash-256: short name: <code>cubehash256</code>
300
 *   - CubeHash-384: short name: <code>cubehash384</code>
301
 *   - CubeHash-512: short name: <code>cubehash512</code>
302
 * - ECHO family: file <code>sph_echo.h</code>
303
 *   - ECHO-224: short name: <code>echo224</code>
304
 *   - ECHO-256: short name: <code>echo256</code>
305
 *   - ECHO-384: short name: <code>echo384</code>
306
 *   - ECHO-512: short name: <code>echo512</code>
307
 * - Fugue family: file <code>sph_fugue.h</code>
308
 *   - Fugue-224: short name: <code>fugue224</code>
309
 *   - Fugue-256: short name: <code>fugue256</code>
310
 *   - Fugue-384: short name: <code>fugue384</code>
311
 *   - Fugue-512: short name: <code>fugue512</code>
312
 * - Groestl family: file <code>sph_groestl.h</code>
313
 *   - Groestl-224: short name: <code>groestl224</code>
314
 *   - Groestl-256: short name: <code>groestl256</code>
315
 *   - Groestl-384: short name: <code>groestl384</code>
316
 *   - Groestl-512: short name: <code>groestl512</code>
317
 * - Hamsi family: file <code>sph_hamsi.h</code>
318
 *   - Hamsi-224: short name: <code>hamsi224</code>
319
 *   - Hamsi-256: short name: <code>hamsi256</code>
320
 *   - Hamsi-384: short name: <code>hamsi384</code>
321
 *   - Hamsi-512: short name: <code>hamsi512</code>
322
 * - JH family: file <code>sph_jh.h</code>
323
 *   - JH-224: short name: <code>jh224</code>
324
 *   - JH-256: short name: <code>jh256</code>
325
 *   - JH-384: short name: <code>jh384</code>
326
 *   - JH-512: short name: <code>jh512</code>
327
 * - Keccak family: file <code>sph_keccak.h</code>
328
 *   - Keccak-224: short name: <code>keccak224</code>
329
 *   - Keccak-256: short name: <code>keccak256</code>
330
 *   - Keccak-384: short name: <code>keccak384</code>
331
 *   - Keccak-512: short name: <code>keccak512</code>
332
 * - Luffa family: file <code>sph_luffa.h</code>
333
 *   - Luffa-224: short name: <code>luffa224</code>
334
 *   - Luffa-256: short name: <code>luffa256</code>
335
 *   - Luffa-384: short name: <code>luffa384</code>
336
 *   - Luffa-512: short name: <code>luffa512</code>
337
 * - Shabal family: file <code>sph_shabal.h</code>
338
 *   - Shabal-192: short name: <code>shabal192</code>
339
 *   - Shabal-224: short name: <code>shabal224</code>
340
 *   - Shabal-256: short name: <code>shabal256</code>
341
 *   - Shabal-384: short name: <code>shabal384</code>
342
 *   - Shabal-512: short name: <code>shabal512</code>
343
 * - SHAvite-3 family: file <code>sph_shavite.h</code>
344
 *   - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"):
345
 *     short name: <code>shabal224</code>
346
 *   - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"):
347
 *     short name: <code>shabal256</code>
348
 *   - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"):
349
 *     short name: <code>shabal384</code>
350
 *   - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"):
351
 *     short name: <code>shabal512</code>
352
 * - SIMD family: file <code>sph_simd.h</code>
353
 *   - SIMD-224: short name: <code>simd224</code>
354
 *   - SIMD-256: short name: <code>simd256</code>
355
 *   - SIMD-384: short name: <code>simd384</code>
356
 *   - SIMD-512: short name: <code>simd512</code>
357
 * - Skein family: file <code>sph_skein.h</code>
358
 *   - Skein-224 (nominally specified as Skein-512-224): short name:
359
 *     <code>skein224</code> (64)
360
 *   - Skein-256 (nominally specified as Skein-512-256): short name:
361
 *     <code>skein256</code> (64)
362
 *   - Skein-384 (nominally specified as Skein-512-384): short name:
363
 *     <code>skein384</code> (64)
364
 *   - Skein-512 (nominally specified as Skein-512-512): short name:
365
 *     <code>skein512</code> (64)
366
 *
367
 * For the second-round SHA-3 candidates, the functions are as specified
368
 * for round 2, i.e. with the "tweaks" that some candidates added
369
 * between round 1 and round 2. Also, some of the submitted packages for
370
 * round 2 contained errors, in the specification, reference code, or
371
 * both. <code>sphlib</code> implements the corrected versions.
372
 */
373

374
/** @hideinitializer
375
 * Unsigned integer type whose length is at least 32 bits; on most
376
 * architectures, it will have a width of exactly 32 bits. Unsigned C
377
 * types implement arithmetics modulo a power of 2; use the
378
 * <code>SPH_T32()</code> macro to ensure that the value is truncated
379
 * to exactly 32 bits. Unless otherwise specified, all macros and
380
 * functions which accept <code>sph_u32</code> values assume that these
381
 * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures
382
 * where <code>sph_u32</code> is larger than that.
383
 */
384
typedef __arch_dependant__ sph_u32;
385

386
/** @hideinitializer
387
 * Signed integer type corresponding to <code>sph_u32</code>; it has
388
 * width 32 bits or more.
389
 */
390
typedef __arch_dependant__ sph_s32;
391

392
/** @hideinitializer
393
 * Unsigned integer type whose length is at least 64 bits; on most
394
 * architectures which feature such a type, it will have a width of
395
 * exactly 64 bits. C99-compliant platform will have this type; it
396
 * is also defined when the GNU compiler (gcc) is used, and on
397
 * platforms where <code>unsigned long</code> is large enough. If this
398
 * type is not available, then some hash functions which depends on
399
 * a 64-bit type will not be available (most notably SHA-384, SHA-512,
400
 * Tiger and WHIRLPOOL).
401
 */
402
typedef __arch_dependant__ sph_u64;
403

404
/** @hideinitializer
405
 * Signed integer type corresponding to <code>sph_u64</code>; it has
406
 * width 64 bits or more.
407
 */
408
typedef __arch_dependant__ sph_s64;
409

410
/**
411
 * This macro expands the token <code>x</code> into a suitable
412
 * constant expression of type <code>sph_u32</code>. Depending on
413
 * how this type is defined, a suffix such as <code>UL</code> may
414
 * be appended to the argument.
415
 *
416
 * @param x   the token to expand into a suitable constant expression
417
 */
418
#define SPH_C32(x)
419

420
/**
421
 * Truncate a 32-bit value to exactly 32 bits. On most systems, this is
422
 * a no-op, recognized as such by the compiler.
423
 *
424
 * @param x   the value to truncate (of type <code>sph_u32</code>)
425
 */
426
#define SPH_T32(x)
427

428
/**
429
 * Rotate a 32-bit value by a number of bits to the left. The rotate
430
 * count must reside between 1 and 31. This macro assumes that its
431
 * first argument fits in 32 bits (no extra bit allowed on machines where
432
 * <code>sph_u32</code> is wider); both arguments may be evaluated
433
 * several times.
434
 *
435
 * @param x   the value to rotate (of type <code>sph_u32</code>)
436
 * @param n   the rotation count (between 1 and 31, inclusive)
437
 */
438
#define SPH_ROTL32(x, n)
439

440
/**
441
 * Rotate a 32-bit value by a number of bits to the left. The rotate
442
 * count must reside between 1 and 31. This macro assumes that its
443
 * first argument fits in 32 bits (no extra bit allowed on machines where
444
 * <code>sph_u32</code> is wider); both arguments may be evaluated
445
 * several times.
446
 *
447
 * @param x   the value to rotate (of type <code>sph_u32</code>)
448
 * @param n   the rotation count (between 1 and 31, inclusive)
449
 */
450
#define SPH_ROTR32(x, n)
451

452
/**
453
 * This macro is defined on systems for which a 64-bit type has been
454
 * detected, and is used for <code>sph_u64</code>.
455
 */
456
#define SPH_64
457

458
/**
459
 * This macro is defined on systems for the "native" integer size is
460
 * 64 bits (64-bit values fit in one register).
461
 */
462
#define SPH_64_TRUE
463

464
/**
465
 * This macro expands the token <code>x</code> into a suitable
466
 * constant expression of type <code>sph_u64</code>. Depending on
467
 * how this type is defined, a suffix such as <code>ULL</code> may
468
 * be appended to the argument. This macro is defined only if a
469
 * 64-bit type was detected and used for <code>sph_u64</code>.
470
 *
471
 * @param x   the token to expand into a suitable constant expression
472
 */
473
#define SPH_C64(x)
474

475
/**
476
 * Truncate a 64-bit value to exactly 64 bits. On most systems, this is
477
 * a no-op, recognized as such by the compiler. This macro is defined only
478
 * if a 64-bit type was detected and used for <code>sph_u64</code>.
479
 *
480
 * @param x   the value to truncate (of type <code>sph_u64</code>)
481
 */
482
#define SPH_T64(x)
483

484
/**
485
 * Rotate a 64-bit value by a number of bits to the left. The rotate
486
 * count must reside between 1 and 63. This macro assumes that its
487
 * first argument fits in 64 bits (no extra bit allowed on machines where
488
 * <code>sph_u64</code> is wider); both arguments may be evaluated
489
 * several times. This macro is defined only if a 64-bit type was detected
490
 * and used for <code>sph_u64</code>.
491
 *
492
 * @param x   the value to rotate (of type <code>sph_u64</code>)
493
 * @param n   the rotation count (between 1 and 63, inclusive)
494
 */
495
#define SPH_ROTL64(x, n)
496

497
/**
498
 * Rotate a 64-bit value by a number of bits to the left. The rotate
499
 * count must reside between 1 and 63. This macro assumes that its
500
 * first argument fits in 64 bits (no extra bit allowed on machines where
501
 * <code>sph_u64</code> is wider); both arguments may be evaluated
502
 * several times. This macro is defined only if a 64-bit type was detected
503
 * and used for <code>sph_u64</code>.
504
 *
505
 * @param x   the value to rotate (of type <code>sph_u64</code>)
506
 * @param n   the rotation count (between 1 and 63, inclusive)
507
 */
508
#define SPH_ROTR64(x, n)
509

510
/**
511
 * This macro evaluates to <code>inline</code> or an equivalent construction,
512
 * if available on the compilation platform, or to nothing otherwise. This
513
 * is used to declare inline functions, for which the compiler should
514
 * endeavour to include the code directly in the caller. Inline functions
515
 * are typically defined in header files as replacement for macros.
516
 */
517
#define SPH_INLINE
518

519
/**
520
 * This macro is defined if the platform has been detected as using
521
 * little-endian convention. This implies that the <code>sph_u32</code>
522
 * type (and the <code>sph_u64</code> type also, if it is defined) has
523
 * an exact width (i.e. exactly 32-bit, respectively 64-bit).
524
 */
525
#define SPH_LITTLE_ENDIAN
526

527
/**
528
 * This macro is defined if the platform has been detected as using
529
 * big-endian convention. This implies that the <code>sph_u32</code>
530
 * type (and the <code>sph_u64</code> type also, if it is defined) has
531
 * an exact width (i.e. exactly 32-bit, respectively 64-bit).
532
 */
533
#define SPH_BIG_ENDIAN
534

535
/**
536
 * This macro is defined if 32-bit words (and 64-bit words, if defined)
537
 * can be read from and written to memory efficiently in little-endian
538
 * convention. This is the case for little-endian platforms, and also
539
 * for the big-endian platforms which have special little-endian access
540
 * opcodes (e.g. Ultrasparc).
541
 */
542
#define SPH_LITTLE_FAST
543

544
/**
545
 * This macro is defined if 32-bit words (and 64-bit words, if defined)
546
 * can be read from and written to memory efficiently in big-endian
547
 * convention. This is the case for little-endian platforms, and also
548
 * for the little-endian platforms which have special big-endian access
549
 * opcodes.
550
 */
551
#define SPH_BIG_FAST
552

553
/**
554
 * On some platforms, this macro is defined to an unsigned integer type
555
 * into which pointer values may be cast. The resulting value can then
556
 * be tested for being a multiple of 2, 4 or 8, indicating an aligned
557
 * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses.
558
 */
559
#define SPH_UPTR
560

561
/**
562
 * When defined, this macro indicates that unaligned memory accesses
563
 * are possible with only a minor penalty, and thus should be prefered
564
 * over strategies which first copy data to an aligned buffer.
565
 */
566
#define SPH_UNALIGNED
567

568
/**
569
 * Byte-swap a 32-bit word (i.e. <code>0x12345678</code> becomes
570
 * <code>0x78563412</code>). This is an inline function which resorts
571
 * to inline assembly on some platforms, for better performance.
572
 *
573
 * @param x   the 32-bit value to byte-swap
574
 * @return  the byte-swapped value
575
 */
576
static inline sph_u32 sph_bswap32(sph_u32 x);
577

578
/**
579
 * Byte-swap a 64-bit word. This is an inline function which resorts
580
 * to inline assembly on some platforms, for better performance. This
581
 * function is defined only if a suitable 64-bit type was found for
582
 * <code>sph_u64</code>
583
 *
584
 * @param x   the 64-bit value to byte-swap
585
 * @return  the byte-swapped value
586
 */
587
static inline sph_u64 sph_bswap64(sph_u64 x);
588

589
/**
590
 * Decode a 16-bit unsigned value from memory, in little-endian convention
591
 * (least significant byte comes first).
592
 *
593
 * @param src   the source address
594
 * @return  the decoded value
595
 */
596
static inline unsigned sph_dec16le(const void *src);
597

598
/**
599
 * Encode a 16-bit unsigned value into memory, in little-endian convention
600
 * (least significant byte comes first).
601
 *
602
 * @param dst   the destination buffer
603
 * @param val   the value to encode
604
 */
605
static inline void sph_enc16le(void *dst, unsigned val);
606

607
/**
608
 * Decode a 16-bit unsigned value from memory, in big-endian convention
609
 * (most significant byte comes first).
610
 *
611
 * @param src   the source address
612
 * @return  the decoded value
613
 */
614
static inline unsigned sph_dec16be(const void *src);
615

616
/**
617
 * Encode a 16-bit unsigned value into memory, in big-endian convention
618
 * (most significant byte comes first).
619
 *
620
 * @param dst   the destination buffer
621
 * @param val   the value to encode
622
 */
623
static inline void sph_enc16be(void *dst, unsigned val);
624

625
/**
626
 * Decode a 32-bit unsigned value from memory, in little-endian convention
627
 * (least significant byte comes first).
628
 *
629
 * @param src   the source address
630
 * @return  the decoded value
631
 */
632
static inline sph_u32 sph_dec32le(const void *src);
633

634
/**
635
 * Decode a 32-bit unsigned value from memory, in little-endian convention
636
 * (least significant byte comes first). This function assumes that the
637
 * source address is suitably aligned for a direct access, if the platform
638
 * supports such things; it can thus be marginally faster than the generic
639
 * <code>sph_dec32le()</code> function.
640
 *
641
 * @param src   the source address
642
 * @return  the decoded value
643
 */
644
static inline sph_u32 sph_dec32le_aligned(const void *src);
645

646
/**
647
 * Encode a 32-bit unsigned value into memory, in little-endian convention
648
 * (least significant byte comes first).
649
 *
650
 * @param dst   the destination buffer
651
 * @param val   the value to encode
652
 */
653
static inline void sph_enc32le(void *dst, sph_u32 val);
654

655
/**
656
 * Encode a 32-bit unsigned value into memory, in little-endian convention
657
 * (least significant byte comes first). This function assumes that the
658
 * destination address is suitably aligned for a direct access, if the
659
 * platform supports such things; it can thus be marginally faster than
660
 * the generic <code>sph_enc32le()</code> function.
661
 *
662
 * @param dst   the destination buffer
663
 * @param val   the value to encode
664
 */
665
static inline void sph_enc32le_aligned(void *dst, sph_u32 val);
666

667
/**
668
 * Decode a 32-bit unsigned value from memory, in big-endian convention
669
 * (most significant byte comes first).
670
 *
671
 * @param src   the source address
672
 * @return  the decoded value
673
 */
674
static inline sph_u32 sph_dec32be(const void *src);
675

676
/**
677
 * Decode a 32-bit unsigned value from memory, in big-endian convention
678
 * (most significant byte comes first). This function assumes that the
679
 * source address is suitably aligned for a direct access, if the platform
680
 * supports such things; it can thus be marginally faster than the generic
681
 * <code>sph_dec32be()</code> function.
682
 *
683
 * @param src   the source address
684
 * @return  the decoded value
685
 */
686
static inline sph_u32 sph_dec32be_aligned(const void *src);
687

688
/**
689
 * Encode a 32-bit unsigned value into memory, in big-endian convention
690
 * (most significant byte comes first).
691
 *
692
 * @param dst   the destination buffer
693
 * @param val   the value to encode
694
 */
695
static inline void sph_enc32be(void *dst, sph_u32 val);
696

697
/**
698
 * Encode a 32-bit unsigned value into memory, in big-endian convention
699
 * (most significant byte comes first). This function assumes that the
700
 * destination address is suitably aligned for a direct access, if the
701
 * platform supports such things; it can thus be marginally faster than
702
 * the generic <code>sph_enc32be()</code> function.
703
 *
704
 * @param dst   the destination buffer
705
 * @param val   the value to encode
706
 */
707
static inline void sph_enc32be_aligned(void *dst, sph_u32 val);
708

709
/**
710
 * Decode a 64-bit unsigned value from memory, in little-endian convention
711
 * (least significant byte comes first). This function is defined only
712
 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
713
 *
714
 * @param src   the source address
715
 * @return  the decoded value
716
 */
717
static inline sph_u64 sph_dec64le(const void *src);
718

719
/**
720
 * Decode a 64-bit unsigned value from memory, in little-endian convention
721
 * (least significant byte comes first). This function assumes that the
722
 * source address is suitably aligned for a direct access, if the platform
723
 * supports such things; it can thus be marginally faster than the generic
724
 * <code>sph_dec64le()</code> function. This function is defined only
725
 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
726
 *
727
 * @param src   the source address
728
 * @return  the decoded value
729
 */
730
static inline sph_u64 sph_dec64le_aligned(const void *src);
731

732
/**
733
 * Encode a 64-bit unsigned value into memory, in little-endian convention
734
 * (least significant byte comes first). This function is defined only
735
 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
736
 *
737
 * @param dst   the destination buffer
738
 * @param val   the value to encode
739
 */
740
static inline void sph_enc64le(void *dst, sph_u64 val);
741

742
/**
743
 * Encode a 64-bit unsigned value into memory, in little-endian convention
744
 * (least significant byte comes first). This function assumes that the
745
 * destination address is suitably aligned for a direct access, if the
746
 * platform supports such things; it can thus be marginally faster than
747
 * the generic <code>sph_enc64le()</code> function. This function is defined
748
 * only if a suitable 64-bit type was detected and used for
749
 * <code>sph_u64</code>.
750
 *
751
 * @param dst   the destination buffer
752
 * @param val   the value to encode
753
 */
754
static inline void sph_enc64le_aligned(void *dst, sph_u64 val);
755

756
/**
757
 * Decode a 64-bit unsigned value from memory, in big-endian convention
758
 * (most significant byte comes first). This function is defined only
759
 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
760
 *
761
 * @param src   the source address
762
 * @return  the decoded value
763
 */
764
static inline sph_u64 sph_dec64be(const void *src);
765

766
/**
767
 * Decode a 64-bit unsigned value from memory, in big-endian convention
768
 * (most significant byte comes first). This function assumes that the
769
 * source address is suitably aligned for a direct access, if the platform
770
 * supports such things; it can thus be marginally faster than the generic
771
 * <code>sph_dec64be()</code> function. This function is defined only
772
 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
773
 *
774
 * @param src   the source address
775
 * @return  the decoded value
776
 */
777
static inline sph_u64 sph_dec64be_aligned(const void *src);
778

779
/**
780
 * Encode a 64-bit unsigned value into memory, in big-endian convention
781
 * (most significant byte comes first). This function is defined only
782
 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
783
 *
784
 * @param dst   the destination buffer
785
 * @param val   the value to encode
786
 */
787
static inline void sph_enc64be(void *dst, sph_u64 val);
788

789
/**
790
 * Encode a 64-bit unsigned value into memory, in big-endian convention
791
 * (most significant byte comes first). This function assumes that the
792
 * destination address is suitably aligned for a direct access, if the
793
 * platform supports such things; it can thus be marginally faster than
794
 * the generic <code>sph_enc64be()</code> function. This function is defined
795
 * only if a suitable 64-bit type was detected and used for
796
 * <code>sph_u64</code>.
797
 *
798
 * @param dst   the destination buffer
799
 * @param val   the value to encode
800
 */
801
static inline void sph_enc64be_aligned(void *dst, sph_u64 val);
802

803
#endif
804

805
/* ============== END documentation block for Doxygen ============= */
806

807
#ifndef DOXYGEN_IGNORE
808

809
/*
810
 * We want to define the types "sph_u32" and "sph_u64" which hold
811
 * unsigned values of at least, respectively, 32 and 64 bits. These
812
 * tests should select appropriate types for most platforms. The
813
 * macro "SPH_64" is defined if the 64-bit is supported.
814
 */
815

816
#undef SPH_64
817
#undef SPH_64_TRUE
818

819
#if defined __STDC__ && __STDC_VERSION__ >= 199901L
820

821
/*
822
 * On C99 implementations, we can use <stdint.h> to get an exact 64-bit
823
 * type, if any, or otherwise use a wider type (which must exist, for
824
 * C99 conformance).
825
 */
826

827
#include <stdint.h>
828

829
#ifdef UINT32_MAX
830
typedef uint32_t sph_u32;
831
typedef int32_t sph_s32;
832
#else
833
typedef uint_fast32_t sph_u32;
834
typedef int_fast32_t sph_s32;
835
#endif
836
#if !SPH_NO_64
837
#ifdef UINT64_MAX
838
typedef uint64_t sph_u64;
839
typedef int64_t sph_s64;
840
#else
841
typedef uint_fast64_t sph_u64;
842
typedef int_fast64_t sph_s64;
843
#endif
844
#endif
845

846
#define SPH_C32(x)    ((sph_u32)(x))
847
#if !SPH_NO_64
848
#define SPH_C64(x)    ((sph_u64)(x))
849
#define SPH_64  1
850
#endif
851

852
#else
853

854
/*
855
 * On non-C99 systems, we use "unsigned int" if it is wide enough,
856
 * "unsigned long" otherwise. This supports all "reasonable" architectures.
857
 * We have to be cautious: pre-C99 preprocessors handle constants
858
 * differently in '#if' expressions. Hence the shifts to test UINT_MAX.
859
 */
860

861
#if ((UINT_MAX >> 11) >> 11) >= 0x3FF
862

863
typedef unsigned int sph_u32;
864
typedef int sph_s32;
865

866
#define SPH_C32(x)    ((sph_u32)(x ## U))
867

868
#else
869

870
typedef unsigned long sph_u32;
871
typedef long sph_s32;
872

873
#define SPH_C32(x)    ((sph_u32)(x ## UL))
874

875
#endif
876

877
#if !SPH_NO_64
878

879
/*
880
 * We want a 64-bit type. We use "unsigned long" if it is wide enough (as
881
 * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9),
882
 * "unsigned long long" otherwise, if available. We use ULLONG_MAX to
883
 * test whether "unsigned long long" is available; we also know that
884
 * gcc features this type, even if the libc header do not know it.
885
 */
886

887
#if ((ULONG_MAX >> 31) >> 31) >= 3
888

889
typedef unsigned long sph_u64;
890
typedef long sph_s64;
891

892
#define SPH_C64(x)    ((sph_u64)(x ## UL))
893

894
#define SPH_64  1
895

896
#elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__
897

898
typedef unsigned long long sph_u64;
899
typedef long long sph_s64;
900

901
#define SPH_C64(x)    ((sph_u64)(x ## ULL))
902

903
#define SPH_64  1
904

905
#else
906

907
/*
908
 * No 64-bit type...
909
 */
910

911
#endif
912

913
#endif
914

915
#endif
916

917
/*
918
 * If the "unsigned long" type has length 64 bits or more, then this is
919
 * a "true" 64-bit architectures. This is also true with Visual C on
920
 * amd64, even though the "long" type is limited to 32 bits.
921
 */
922
#if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64)
923
#define SPH_64_TRUE   1
924
#endif
925

926
/*
927
 * Implementation note: some processors have specific opcodes to perform
928
 * a rotation. Recent versions of gcc recognize the expression above and
929
 * use the relevant opcodes, when appropriate.
930
 */
931

932
#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
933
#ifdef _MSC_VER
934
#define SPH_ROTL32(x, n)   _rotl(x, n)
935
#define SPH_ROTR32(x, n)   _rotr(x, n)
936
#else
937
#define SPH_ROTL32(x, n)   SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
938
#define SPH_ROTR32(x, n)   SPH_ROTL32(x, (32 - (n)))
939
#endif
940

941
#if SPH_64
942

943
#define SPH_T64(x)    ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
944
#ifdef _MSC_VER
945
#define SPH_ROTL64(x, n)   _rotl64(x, n)
946
#define SPH_ROTR64(x, n)   _rotr64(x, n)
947
#else
948
#define SPH_ROTL64(x, n)   SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
949
#define SPH_ROTR64(x, n)   SPH_ROTL64(x, (64 - (n)))
950
#endif
951

952
#endif
953

954
#ifndef DOXYGEN_IGNORE
955
/*
956
 * Define SPH_INLINE to be an "inline" qualifier, if available. We define
957
 * some small macro-like functions which benefit greatly from being inlined.
958
 */
959
#if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__
960
#define SPH_INLINE inline
961
#elif defined _MSC_VER
962
#define SPH_INLINE __inline
963
#else
964
#define SPH_INLINE
965
#endif
966
#endif
967

968
/*
969
 * We define some macros which qualify the architecture. These macros
970
 * may be explicit set externally (e.g. as compiler parameters). The
971
 * code below sets those macros if they are not already defined.
972
 *
973
 * Most macros are boolean, thus evaluate to either zero or non-zero.
974
 * The SPH_UPTR macro is special, in that it evaluates to a C type,
975
 * or is not defined.
976
 *
977
 * SPH_UPTR             if defined: unsigned type to cast pointers into
978
 *
979
 * SPH_UNALIGNED        non-zero if unaligned accesses are efficient
980
 * SPH_LITTLE_ENDIAN    non-zero if architecture is known to be little-endian
981
 * SPH_BIG_ENDIAN       non-zero if architecture is known to be big-endian
982
 * SPH_LITTLE_FAST      non-zero if little-endian decoding is fast
983
 * SPH_BIG_FAST         non-zero if big-endian decoding is fast
984
 *
985
 * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit
986
 * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN
987
 * _must_ be non-zero in those situations. The 32-bit and 64-bit types
988
 * _must_ also have an exact width.
989
 *
990
 * SPH_SPARCV9_GCC_32   UltraSPARC-compatible with gcc, 32-bit mode
991
 * SPH_SPARCV9_GCC_64   UltraSPARC-compatible with gcc, 64-bit mode
992
 * SPH_SPARCV9_GCC      UltraSPARC-compatible with gcc
993
 * SPH_I386_GCC         x86-compatible (32-bit) with gcc
994
 * SPH_I386_MSVC        x86-compatible (32-bit) with Microsoft Visual C
995
 * SPH_AMD64_GCC        x86-compatible (64-bit) with gcc
996
 * SPH_AMD64_MSVC       x86-compatible (64-bit) with Microsoft Visual C
997
 * SPH_PPC32_GCC        PowerPC, 32-bit, with gcc
998
 * SPH_PPC64_GCC        PowerPC, 64-bit, with gcc
999
 *
1000
 * TODO: enhance automatic detection, for more architectures and compilers.
1001
 * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with
1002
 * some very fast functions (e.g. MD4) when using unaligned input data.
1003
 * The CPU-specific-with-GCC macros are useful only for inline assembly,
1004
 * normally restrained to this header file.
1005
 */
1006

1007
/*
1008
 * 32-bit x86, aka "i386 compatible".
1009
 */
1010
#if defined __i386__ || defined _M_IX86
1011

1012
#define SPH_DETECT_UNALIGNED         1
1013
#define SPH_DETECT_LITTLE_ENDIAN     1
1014
#define SPH_DETECT_UPTR              sph_u32
1015
#ifdef __GNUC__
1016
#define SPH_DETECT_I386_GCC          1
1017
#endif
1018
#ifdef _MSC_VER
1019
#define SPH_DETECT_I386_MSVC         1
1020
#endif
1021

1022
/*
1023
 * 64-bit x86, hereafter known as "amd64".
1024
 */
1025
#elif defined __x86_64 || defined _M_X64
1026

1027
#define SPH_DETECT_UNALIGNED         1
1028
#define SPH_DETECT_LITTLE_ENDIAN     1
1029
#define SPH_DETECT_UPTR              sph_u64
1030
#ifdef __GNUC__
1031
#define SPH_DETECT_AMD64_GCC         1
1032
#endif
1033
#ifdef _MSC_VER
1034
#define SPH_DETECT_AMD64_MSVC        1
1035
#endif
1036

1037
/*
1038
 * 64-bit Sparc architecture (implies v9).
1039
 */
1040
#elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \
1041
	|| defined __sparcv9
1042

1043
#define SPH_DETECT_BIG_ENDIAN        1
1044
#define SPH_DETECT_UPTR              sph_u64
1045
#ifdef __GNUC__
1046
#define SPH_DETECT_SPARCV9_GCC_64    1
1047
#define SPH_DETECT_LITTLE_FAST       1
1048
#endif
1049

1050
/*
1051
 * 32-bit Sparc.
1052
 */
1053
#elif (defined __sparc__ || defined __sparc) \
1054
	&& !(defined __sparcv9 || defined __arch64__)
1055

1056
#define SPH_DETECT_BIG_ENDIAN        1
1057
#define SPH_DETECT_UPTR              sph_u32
1058
#if defined __GNUC__ && defined __sparc_v9__
1059
#define SPH_DETECT_SPARCV9_GCC_32    1
1060
#define SPH_DETECT_LITTLE_FAST       1
1061
#endif
1062

1063
/*
1064
 * ARM, little-endian.
1065
 */
1066
#elif defined __arm__ && __ARMEL__
1067

1068
#define SPH_DETECT_LITTLE_ENDIAN     1
1069

1070
/*
1071
 * MIPS, little-endian.
1072
 */
1073
#elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__
1074

1075
#define SPH_DETECT_LITTLE_ENDIAN     1
1076

1077
/*
1078
 * MIPS, big-endian.
1079
 */
1080
#elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__
1081

1082
#define SPH_DETECT_BIG_ENDIAN        1
1083

1084
/*
1085
 * PowerPC.
1086
 */
1087
#elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \
1088
	|| defined _ARCH_PPC
1089

1090
/*
1091
 * Note: we do not declare cross-endian access to be "fast": even if
1092
 * using inline assembly, implementation should still assume that
1093
 * keeping the decoded word in a temporary is faster than decoding
1094
 * it again.
1095
 */
1096
#if defined __GNUC__
1097
#if SPH_64_TRUE
1098
#define SPH_DETECT_PPC64_GCC         1
1099
#else
1100
#define SPH_DETECT_PPC32_GCC         1
1101
#endif
1102
#endif
1103

1104
#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
1105
#define SPH_DETECT_BIG_ENDIAN        1
1106
#elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
1107
#define SPH_DETECT_LITTLE_ENDIAN     1
1108
#endif
1109

1110
/*
1111
 * Itanium, 64-bit.
1112
 */
1113
#elif defined __ia64 || defined __ia64__ \
1114
	|| defined __itanium__ || defined _M_IA64
1115

1116
#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
1117
#define SPH_DETECT_BIG_ENDIAN        1
1118
#else
1119
#define SPH_DETECT_LITTLE_ENDIAN     1
1120
#endif
1121
#if defined __LP64__ || defined _LP64
1122
#define SPH_DETECT_UPTR              sph_u64
1123
#else
1124
#define SPH_DETECT_UPTR              sph_u32
1125
#endif
1126

1127
#endif
1128

1129
#if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64
1130
#define SPH_DETECT_SPARCV9_GCC       1
1131
#endif
1132

1133
#if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED
1134
#define SPH_UNALIGNED         SPH_DETECT_UNALIGNED
1135
#endif
1136
#if defined SPH_DETECT_UPTR && !defined SPH_UPTR
1137
#define SPH_UPTR              SPH_DETECT_UPTR
1138
#endif
1139
#if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN
1140
#define SPH_LITTLE_ENDIAN     SPH_DETECT_LITTLE_ENDIAN
1141
#endif
1142
#if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN
1143
#define SPH_BIG_ENDIAN        SPH_DETECT_BIG_ENDIAN
1144
#endif
1145
#if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST
1146
#define SPH_LITTLE_FAST       SPH_DETECT_LITTLE_FAST
1147
#endif
1148
#if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST
1149
#define SPH_BIG_FAST    SPH_DETECT_BIG_FAST
1150
#endif
1151
#if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32
1152
#define SPH_SPARCV9_GCC_32    SPH_DETECT_SPARCV9_GCC_32
1153
#endif
1154
#if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64
1155
#define SPH_SPARCV9_GCC_64    SPH_DETECT_SPARCV9_GCC_64
1156
#endif
1157
#if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC
1158
#define SPH_SPARCV9_GCC       SPH_DETECT_SPARCV9_GCC
1159
#endif
1160
#if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC
1161
#define SPH_I386_GCC          SPH_DETECT_I386_GCC
1162
#endif
1163
#if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC
1164
#define SPH_I386_MSVC         SPH_DETECT_I386_MSVC
1165
#endif
1166
#if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC
1167
#define SPH_AMD64_GCC         SPH_DETECT_AMD64_GCC
1168
#endif
1169
#if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC
1170
#define SPH_AMD64_MSVC        SPH_DETECT_AMD64_MSVC
1171
#endif
1172
#if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC
1173
#define SPH_PPC32_GCC         SPH_DETECT_PPC32_GCC
1174
#endif
1175
#if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC
1176
#define SPH_PPC64_GCC         SPH_DETECT_PPC64_GCC
1177
#endif
1178

1179
#if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST
1180
#define SPH_LITTLE_FAST              1
1181
#endif
1182
#if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST
1183
#define SPH_BIG_FAST                 1
1184
#endif
1185

1186
#if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN)
1187
#error SPH_UPTR defined, but endianness is not known.
1188
#endif
1189

1190
#if SPH_I386_GCC && !SPH_NO_ASM
1191

1192
/*
1193
 * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
1194
 * values.
1195
 */
1196

1197
static SPH_INLINE sph_u32
1198
sph_bswap32(sph_u32 x)
1199
{
1200
	__asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
1201
	return x;
1202
}
1203

1204
#if SPH_64
1205

1206
static SPH_INLINE sph_u64
1207
sph_bswap64(sph_u64 x)
1208
{
1209
	return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
1210
		| (sph_u64)sph_bswap32((sph_u32)(x >> 32));
1211
}
1212

1213
#endif
1214

1215
#elif SPH_AMD64_GCC && !SPH_NO_ASM
1216

1217
/*
1218
 * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
1219
 * and 64-bit values.
1220
 */
1221

1222
static SPH_INLINE sph_u32
1223
sph_bswap32(sph_u32 x)
1224
{
1225
	__asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
1226
	return x;
1227
}
1228

1229
#if SPH_64
1230

1231
static SPH_INLINE sph_u64
1232
sph_bswap64(sph_u64 x)
1233
{
1234
	__asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x));
1235
	return x;
1236
}
1237

1238
#endif
1239

1240
/*
1241
 * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough
1242
 * to generate proper opcodes for endianness swapping with the pure C
1243
 * implementation below.
1244
 *
1245

1246
#elif SPH_I386_MSVC && !SPH_NO_ASM
1247

1248
static __inline sph_u32 __declspec(naked) __fastcall
1249
sph_bswap32(sph_u32 x)
1250
{
1251
	__asm {
1252
		bswap  ecx
1253
		mov    eax,ecx
1254
		ret
1255
	}
1256
}
1257

1258
#if SPH_64
1259

1260
static SPH_INLINE sph_u64
1261
sph_bswap64(sph_u64 x)
1262
{
1263
	return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
1264
		| (sph_u64)sph_bswap32((sph_u32)(x >> 32));
1265
}
1266

1267
#endif
1268

1269
 *
1270
 * [end of disabled code]
1271
 */
1272

1273
#else
1274

1275
static SPH_INLINE sph_u32
1276
sph_bswap32(sph_u32 x)
1277
{
1278
	x = SPH_T32((x << 16) | (x >> 16));
1279
	x = ((x & SPH_C32(0xFF00FF00)) >> 8)
1280
		| ((x & SPH_C32(0x00FF00FF)) << 8);
1281
	return x;
1282
}
1283

1284
#if SPH_64
1285

1286
/**
1287
 * Byte-swap a 64-bit value.
1288
 *
1289
 * @param x   the input value
1290
 * @return  the byte-swapped value
1291
 */
1292
static SPH_INLINE sph_u64
1293
sph_bswap64(sph_u64 x)
1294
{
1295
	x = SPH_T64((x << 32) | (x >> 32));
1296
	x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16)
1297
		| ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16);
1298
	x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8)
1299
		| ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8);
1300
	return x;
1301
}
1302

1303
#endif
1304

1305
#endif
1306

1307
#if SPH_SPARCV9_GCC && !SPH_NO_ASM
1308

1309
/*
1310
 * On UltraSPARC systems, native ordering is big-endian, but it is
1311
 * possible to perform little-endian read accesses by specifying the
1312
 * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use
1313
 * the opcode "lda [%reg]0x88,%dst", where %reg is the register which
1314
 * contains the source address and %dst is the destination register,
1315
 * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register
1316
 * to get the address space name. The latter format is better since it
1317
 * combines an addition and the actual access in a single opcode; but
1318
 * it requires the setting (and subsequent resetting) of %asi, which is
1319
 * slow. Some operations (i.e. MD5 compression function) combine many
1320
 * successive little-endian read accesses, which may share the same
1321
 * %asi setting. The macros below contain the appropriate inline
1322
 * assembly.
1323
 */
1324

1325
#define SPH_SPARCV9_SET_ASI   \
1326
	sph_u32 sph_sparcv9_asi; \
1327
	__asm__ __volatile__ ( \
1328
		"rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi));
1329

1330
#define SPH_SPARCV9_RESET_ASI  \
1331
	__asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi));
1332

1333
#define SPH_SPARCV9_DEC32LE(base, idx)   ({ \
1334
		sph_u32 sph_sparcv9_tmp; \
1335
		__asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \
1336
			: "=r" (sph_sparcv9_tmp) : "r" (base)); \
1337
		sph_sparcv9_tmp; \
1338
	})
1339

1340
#endif
1341

1342
static SPH_INLINE void
1343
sph_enc16be(void *dst, unsigned val)
1344
{
1345
	((unsigned char *)dst)[0] = (val >> 8);
1346
	((unsigned char *)dst)[1] = val;
1347
}
1348

1349
static SPH_INLINE unsigned
1350
sph_dec16be(const void *src)
1351
{
1352
	return ((unsigned)(((const unsigned char *)src)[0]) << 8)
1353
		| (unsigned)(((const unsigned char *)src)[1]);
1354
}
1355

1356
static SPH_INLINE void
1357
sph_enc16le(void *dst, unsigned val)
1358
{
1359
	((unsigned char *)dst)[0] = val;
1360
	((unsigned char *)dst)[1] = val >> 8;
1361
}
1362

1363
static SPH_INLINE unsigned
1364
sph_dec16le(const void *src)
1365
{
1366
	return (unsigned)(((const unsigned char *)src)[0])
1367
		| ((unsigned)(((const unsigned char *)src)[1]) << 8);
1368
}
1369

1370
/**
1371
 * Encode a 32-bit value into the provided buffer (big endian convention).
1372
 *
1373
 * @param dst   the destination buffer
1374
 * @param val   the 32-bit value to encode
1375
 */
1376
static SPH_INLINE void
1377
sph_enc32be(void *dst, sph_u32 val)
1378
{
1379
#if defined SPH_UPTR
1380
#if SPH_UNALIGNED
1381
#if SPH_LITTLE_ENDIAN
1382
	val = sph_bswap32(val);
1383
#endif
1384
	*(sph_u32 *)dst = val;
1385
#else
1386
	if (((SPH_UPTR)dst & 3) == 0) {
1387
#if SPH_LITTLE_ENDIAN
1388
		val = sph_bswap32(val);
1389
#endif
1390
		*(sph_u32 *)dst = val;
1391
	} else {
1392
		((unsigned char *)dst)[0] = (val >> 24);
1393
		((unsigned char *)dst)[1] = (val >> 16);
1394
		((unsigned char *)dst)[2] = (val >> 8);
1395
		((unsigned char *)dst)[3] = val;
1396
	}
1397
#endif
1398
#else
1399
	((unsigned char *)dst)[0] = (val >> 24);
1400
	((unsigned char *)dst)[1] = (val >> 16);
1401
	((unsigned char *)dst)[2] = (val >> 8);
1402
	((unsigned char *)dst)[3] = val;
1403
#endif
1404
}
1405

1406
/**
1407
 * Encode a 32-bit value into the provided buffer (big endian convention).
1408
 * The destination buffer must be properly aligned.
1409
 *
1410
 * @param dst   the destination buffer (32-bit aligned)
1411
 * @param val   the value to encode
1412
 */
1413
static SPH_INLINE void
1414
sph_enc32be_aligned(void *dst, sph_u32 val)
1415
{
1416
#if SPH_LITTLE_ENDIAN
1417
	*(sph_u32 *)dst = sph_bswap32(val);
1418
#elif SPH_BIG_ENDIAN
1419
	*(sph_u32 *)dst = val;
1420
#else
1421
	((unsigned char *)dst)[0] = (val >> 24);
1422
	((unsigned char *)dst)[1] = (val >> 16);
1423
	((unsigned char *)dst)[2] = (val >> 8);
1424
	((unsigned char *)dst)[3] = val;
1425
#endif
1426
}
1427

1428
/**
1429
 * Decode a 32-bit value from the provided buffer (big endian convention).
1430
 *
1431
 * @param src   the source buffer
1432
 * @return  the decoded value
1433
 */
1434
static SPH_INLINE sph_u32
1435
sph_dec32be(const void *src)
1436
{
1437
#if defined SPH_UPTR
1438
#if SPH_UNALIGNED
1439
#if SPH_LITTLE_ENDIAN
1440
	return sph_bswap32(*(const sph_u32 *)src);
1441
#else
1442
	return *(const sph_u32 *)src;
1443
#endif
1444
#else
1445
	if (((SPH_UPTR)src & 3) == 0) {
1446
#if SPH_LITTLE_ENDIAN
1447
		return sph_bswap32(*(const sph_u32 *)src);
1448
#else
1449
		return *(const sph_u32 *)src;
1450
#endif
1451
	} else {
1452
		return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1453
			| ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1454
			| ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1455
			| (sph_u32)(((const unsigned char *)src)[3]);
1456
	}
1457
#endif
1458
#else
1459
	return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1460
		| ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1461
		| ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1462
		| (sph_u32)(((const unsigned char *)src)[3]);
1463
#endif
1464
}
1465

1466
/**
1467
 * Decode a 32-bit value from the provided buffer (big endian convention).
1468
 * The source buffer must be properly aligned.
1469
 *
1470
 * @param src   the source buffer (32-bit aligned)
1471
 * @return  the decoded value
1472
 */
1473
static SPH_INLINE sph_u32
1474
sph_dec32be_aligned(const void *src)
1475
{
1476
#if SPH_LITTLE_ENDIAN
1477
	return sph_bswap32(*(const sph_u32 *)src);
1478
#elif SPH_BIG_ENDIAN
1479
	return *(const sph_u32 *)src;
1480
#else
1481
	return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1482
		| ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1483
		| ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1484
		| (sph_u32)(((const unsigned char *)src)[3]);
1485
#endif
1486
}
1487

1488
/**
1489
 * Encode a 32-bit value into the provided buffer (little endian convention).
1490
 *
1491
 * @param dst   the destination buffer
1492
 * @param val   the 32-bit value to encode
1493
 */
1494
static SPH_INLINE void
1495
sph_enc32le(void *dst, sph_u32 val)
1496
{
1497
#if defined SPH_UPTR
1498
#if SPH_UNALIGNED
1499
#if SPH_BIG_ENDIAN
1500
	val = sph_bswap32(val);
1501
#endif
1502
	*(sph_u32 *)dst = val;
1503
#else
1504
	if (((SPH_UPTR)dst & 3) == 0) {
1505
#if SPH_BIG_ENDIAN
1506
		val = sph_bswap32(val);
1507
#endif
1508
		*(sph_u32 *)dst = val;
1509
	} else {
1510
		((unsigned char *)dst)[0] = val;
1511
		((unsigned char *)dst)[1] = (val >> 8);
1512
		((unsigned char *)dst)[2] = (val >> 16);
1513
		((unsigned char *)dst)[3] = (val >> 24);
1514
	}
1515
#endif
1516
#else
1517
	((unsigned char *)dst)[0] = val;
1518
	((unsigned char *)dst)[1] = (val >> 8);
1519
	((unsigned char *)dst)[2] = (val >> 16);
1520
	((unsigned char *)dst)[3] = (val >> 24);
1521
#endif
1522
}
1523

1524
/**
1525
 * Encode a 32-bit value into the provided buffer (little endian convention).
1526
 * The destination buffer must be properly aligned.
1527
 *
1528
 * @param dst   the destination buffer (32-bit aligned)
1529
 * @param val   the value to encode
1530
 */
1531
static SPH_INLINE void
1532
sph_enc32le_aligned(void *dst, sph_u32 val)
1533
{
1534
#if SPH_LITTLE_ENDIAN
1535
	*(sph_u32 *)dst = val;
1536
#elif SPH_BIG_ENDIAN
1537
	*(sph_u32 *)dst = sph_bswap32(val);
1538
#else
1539
	((unsigned char *)dst)[0] = val;
1540
	((unsigned char *)dst)[1] = (val >> 8);
1541
	((unsigned char *)dst)[2] = (val >> 16);
1542
	((unsigned char *)dst)[3] = (val >> 24);
1543
#endif
1544
}
1545

1546
/**
1547
 * Decode a 32-bit value from the provided buffer (little endian convention).
1548
 *
1549
 * @param src   the source buffer
1550
 * @return  the decoded value
1551
 */
1552
static SPH_INLINE sph_u32
1553
sph_dec32le(const void *src)
1554
{
1555
#if defined SPH_UPTR
1556
#if SPH_UNALIGNED
1557
#if SPH_BIG_ENDIAN
1558
	return sph_bswap32(*(const sph_u32 *)src);
1559
#else
1560
	return *(const sph_u32 *)src;
1561
#endif
1562
#else
1563
	if (((SPH_UPTR)src & 3) == 0) {
1564
#if SPH_BIG_ENDIAN
1565
#if SPH_SPARCV9_GCC && !SPH_NO_ASM
1566
		sph_u32 tmp;
1567

1568
		/*
1569
		 * "__volatile__" is needed here because without it,
1570
		 * gcc-3.4.3 miscompiles the code and performs the
1571
		 * access before the test on the address, thus triggering
1572
		 * a bus error...
1573
		 */
1574
		__asm__ __volatile__ (
1575
			"lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1576
		return tmp;
1577
/*
1578
 * On PowerPC, this turns out not to be worth the effort: the inline
1579
 * assembly makes GCC optimizer uncomfortable, which tends to nullify
1580
 * the decoding gains.
1581
 *
1582
 * For most hash functions, using this inline assembly trick changes
1583
 * hashing speed by less than 5% and often _reduces_ it. The biggest
1584
 * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is
1585
 * less then 10%. The speed gain on CubeHash is probably due to the
1586
 * chronic shortage of registers that CubeHash endures; for the other
1587
 * functions, the generic code appears to be efficient enough already.
1588
 *
1589
#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
1590
		sph_u32 tmp;
1591

1592
		__asm__ __volatile__ (
1593
			"lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1594
		return tmp;
1595
 */
1596
#else
1597
		return sph_bswap32(*(const sph_u32 *)src);
1598
#endif
1599
#else
1600
		return *(const sph_u32 *)src;
1601
#endif
1602
	} else {
1603
		return (sph_u32)(((const unsigned char *)src)[0])
1604
			| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1605
			| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1606
			| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1607
	}
1608
#endif
1609
#else
1610
	return (sph_u32)(((const unsigned char *)src)[0])
1611
		| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1612
		| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1613
		| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1614
#endif
1615
}
1616

1617
/**
1618
 * Decode a 32-bit value from the provided buffer (little endian convention).
1619
 * The source buffer must be properly aligned.
1620
 *
1621
 * @param src   the source buffer (32-bit aligned)
1622
 * @return  the decoded value
1623
 */
1624
static SPH_INLINE sph_u32
1625
sph_dec32le_aligned(const void *src)
1626
{
1627
#if SPH_LITTLE_ENDIAN
1628
	return *(const sph_u32 *)src;
1629
#elif SPH_BIG_ENDIAN
1630
#if SPH_SPARCV9_GCC && !SPH_NO_ASM
1631
	sph_u32 tmp;
1632

1633
	__asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1634
	return tmp;
1635
/*
1636
 * Not worth it generally.
1637
 *
1638
#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
1639
	sph_u32 tmp;
1640

1641
	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1642
	return tmp;
1643
 */
1644
#else
1645
	return sph_bswap32(*(const sph_u32 *)src);
1646
#endif
1647
#else
1648
	return (sph_u32)(((const unsigned char *)src)[0])
1649
		| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1650
		| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1651
		| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1652
#endif
1653
}
1654

1655
#if SPH_64
1656

1657
/**
1658
 * Encode a 64-bit value into the provided buffer (big endian convention).
1659
 *
1660
 * @param dst   the destination buffer
1661
 * @param val   the 64-bit value to encode
1662
 */
1663
static SPH_INLINE void
1664
sph_enc64be(void *dst, sph_u64 val)
1665
{
1666
#if defined SPH_UPTR
1667
#if SPH_UNALIGNED
1668
#if SPH_LITTLE_ENDIAN
1669
	val = sph_bswap64(val);
1670
#endif
1671
	*(sph_u64 *)dst = val;
1672
#else
1673
	if (((SPH_UPTR)dst & 7) == 0) {
1674
#if SPH_LITTLE_ENDIAN
1675
		val = sph_bswap64(val);
1676
#endif
1677
		*(sph_u64 *)dst = val;
1678
	} else {
1679
		((unsigned char *)dst)[0] = (val >> 56);
1680
		((unsigned char *)dst)[1] = (val >> 48);
1681
		((unsigned char *)dst)[2] = (val >> 40);
1682
		((unsigned char *)dst)[3] = (val >> 32);
1683
		((unsigned char *)dst)[4] = (val >> 24);
1684
		((unsigned char *)dst)[5] = (val >> 16);
1685
		((unsigned char *)dst)[6] = (val >> 8);
1686
		((unsigned char *)dst)[7] = val;
1687
	}
1688
#endif
1689
#else
1690
	((unsigned char *)dst)[0] = (val >> 56);
1691
	((unsigned char *)dst)[1] = (val >> 48);
1692
	((unsigned char *)dst)[2] = (val >> 40);
1693
	((unsigned char *)dst)[3] = (val >> 32);
1694
	((unsigned char *)dst)[4] = (val >> 24);
1695
	((unsigned char *)dst)[5] = (val >> 16);
1696
	((unsigned char *)dst)[6] = (val >> 8);
1697
	((unsigned char *)dst)[7] = val;
1698
#endif
1699
}
1700

1701
/**
1702
 * Encode a 64-bit value into the provided buffer (big endian convention).
1703
 * The destination buffer must be properly aligned.
1704
 *
1705
 * @param dst   the destination buffer (64-bit aligned)
1706
 * @param val   the value to encode
1707
 */
1708
static SPH_INLINE void
1709
sph_enc64be_aligned(void *dst, sph_u64 val)
1710
{
1711
#if SPH_LITTLE_ENDIAN
1712
	*(sph_u64 *)dst = sph_bswap64(val);
1713
#elif SPH_BIG_ENDIAN
1714
	*(sph_u64 *)dst = val;
1715
#else
1716
	((unsigned char *)dst)[0] = (val >> 56);
1717
	((unsigned char *)dst)[1] = (val >> 48);
1718
	((unsigned char *)dst)[2] = (val >> 40);
1719
	((unsigned char *)dst)[3] = (val >> 32);
1720
	((unsigned char *)dst)[4] = (val >> 24);
1721
	((unsigned char *)dst)[5] = (val >> 16);
1722
	((unsigned char *)dst)[6] = (val >> 8);
1723
	((unsigned char *)dst)[7] = val;
1724
#endif
1725
}
1726

1727
/**
1728
 * Decode a 64-bit value from the provided buffer (big endian convention).
1729
 *
1730
 * @param src   the source buffer
1731
 * @return  the decoded value
1732
 */
1733
static SPH_INLINE sph_u64
1734
sph_dec64be(const void *src)
1735
{
1736
#if defined SPH_UPTR
1737
#if SPH_UNALIGNED
1738
#if SPH_LITTLE_ENDIAN
1739
	return sph_bswap64(*(const sph_u64 *)src);
1740
#else
1741
	return *(const sph_u64 *)src;
1742
#endif
1743
#else
1744
	if (((SPH_UPTR)src & 7) == 0) {
1745
#if SPH_LITTLE_ENDIAN
1746
		return sph_bswap64(*(const sph_u64 *)src);
1747
#else
1748
		return *(const sph_u64 *)src;
1749
#endif
1750
	} else {
1751
		return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1752
			| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1753
			| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1754
			| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1755
			| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1756
			| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1757
			| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1758
			| (sph_u64)(((const unsigned char *)src)[7]);
1759
	}
1760
#endif
1761
#else
1762
	return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1763
		| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1764
		| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1765
		| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1766
		| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1767
		| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1768
		| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1769
		| (sph_u64)(((const unsigned char *)src)[7]);
1770
#endif
1771
}
1772

1773
/**
1774
 * Decode a 64-bit value from the provided buffer (big endian convention).
1775
 * The source buffer must be properly aligned.
1776
 *
1777
 * @param src   the source buffer (64-bit aligned)
1778
 * @return  the decoded value
1779
 */
1780
static SPH_INLINE sph_u64
1781
sph_dec64be_aligned(const void *src)
1782
{
1783
#if SPH_LITTLE_ENDIAN
1784
	return sph_bswap64(*(const sph_u64 *)src);
1785
#elif SPH_BIG_ENDIAN
1786
	return *(const sph_u64 *)src;
1787
#else
1788
	return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1789
		| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1790
		| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1791
		| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1792
		| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1793
		| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1794
		| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1795
		| (sph_u64)(((const unsigned char *)src)[7]);
1796
#endif
1797
}
1798

1799
/**
1800
 * Encode a 64-bit value into the provided buffer (little endian convention).
1801
 *
1802
 * @param dst   the destination buffer
1803
 * @param val   the 64-bit value to encode
1804
 */
1805
static SPH_INLINE void
1806
sph_enc64le(void *dst, sph_u64 val)
1807
{
1808
#if defined SPH_UPTR
1809
#if SPH_UNALIGNED
1810
#if SPH_BIG_ENDIAN
1811
	val = sph_bswap64(val);
1812
#endif
1813
	*(sph_u64 *)dst = val;
1814
#else
1815
	if (((SPH_UPTR)dst & 7) == 0) {
1816
#if SPH_BIG_ENDIAN
1817
		val = sph_bswap64(val);
1818
#endif
1819
		*(sph_u64 *)dst = val;
1820
	} else {
1821
		((unsigned char *)dst)[0] = val;
1822
		((unsigned char *)dst)[1] = (val >> 8);
1823
		((unsigned char *)dst)[2] = (val >> 16);
1824
		((unsigned char *)dst)[3] = (val >> 24);
1825
		((unsigned char *)dst)[4] = (val >> 32);
1826
		((unsigned char *)dst)[5] = (val >> 40);
1827
		((unsigned char *)dst)[6] = (val >> 48);
1828
		((unsigned char *)dst)[7] = (val >> 56);
1829
	}
1830
#endif
1831
#else
1832
	((unsigned char *)dst)[0] = val;
1833
	((unsigned char *)dst)[1] = (val >> 8);
1834
	((unsigned char *)dst)[2] = (val >> 16);
1835
	((unsigned char *)dst)[3] = (val >> 24);
1836
	((unsigned char *)dst)[4] = (val >> 32);
1837
	((unsigned char *)dst)[5] = (val >> 40);
1838
	((unsigned char *)dst)[6] = (val >> 48);
1839
	((unsigned char *)dst)[7] = (val >> 56);
1840
#endif
1841
}
1842

1843
/**
1844
 * Encode a 64-bit value into the provided buffer (little endian convention).
1845
 * The destination buffer must be properly aligned.
1846
 *
1847
 * @param dst   the destination buffer (64-bit aligned)
1848
 * @param val   the value to encode
1849
 */
1850
static SPH_INLINE void
1851
sph_enc64le_aligned(void *dst, sph_u64 val)
1852
{
1853
#if SPH_LITTLE_ENDIAN
1854
	*(sph_u64 *)dst = val;
1855
#elif SPH_BIG_ENDIAN
1856
	*(sph_u64 *)dst = sph_bswap64(val);
1857
#else
1858
	((unsigned char *)dst)[0] = val;
1859
	((unsigned char *)dst)[1] = (val >> 8);
1860
	((unsigned char *)dst)[2] = (val >> 16);
1861
	((unsigned char *)dst)[3] = (val >> 24);
1862
	((unsigned char *)dst)[4] = (val >> 32);
1863
	((unsigned char *)dst)[5] = (val >> 40);
1864
	((unsigned char *)dst)[6] = (val >> 48);
1865
	((unsigned char *)dst)[7] = (val >> 56);
1866
#endif
1867
}
1868

1869
/**
1870
 * Decode a 64-bit value from the provided buffer (little endian convention).
1871
 *
1872
 * @param src   the source buffer
1873
 * @return  the decoded value
1874
 */
1875
static SPH_INLINE sph_u64
1876
sph_dec64le(const void *src)
1877
{
1878
#if defined SPH_UPTR
1879
#if SPH_UNALIGNED
1880
#if SPH_BIG_ENDIAN
1881
	return sph_bswap64(*(const sph_u64 *)src);
1882
#else
1883
	return *(const sph_u64 *)src;
1884
#endif
1885
#else
1886
	if (((SPH_UPTR)src & 7) == 0) {
1887
#if SPH_BIG_ENDIAN
1888
#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
1889
		sph_u64 tmp;
1890

1891
		__asm__ __volatile__ (
1892
			"ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1893
		return tmp;
1894
/*
1895
 * Not worth it generally.
1896
 *
1897
#elif SPH_PPC32_GCC && !SPH_NO_ASM
1898
		return (sph_u64)sph_dec32le_aligned(src)
1899
			| ((sph_u64)sph_dec32le_aligned(
1900
				(const char *)src + 4) << 32);
1901
#elif SPH_PPC64_GCC && !SPH_NO_ASM
1902
		sph_u64 tmp;
1903

1904
		__asm__ __volatile__ (
1905
			"ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1906
		return tmp;
1907
 */
1908
#else
1909
		return sph_bswap64(*(const sph_u64 *)src);
1910
#endif
1911
#else
1912
		return *(const sph_u64 *)src;
1913
#endif
1914
	} else {
1915
		return (sph_u64)(((const unsigned char *)src)[0])
1916
			| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1917
			| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1918
			| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1919
			| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1920
			| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1921
			| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1922
			| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1923
	}
1924
#endif
1925
#else
1926
	return (sph_u64)(((const unsigned char *)src)[0])
1927
		| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1928
		| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1929
		| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1930
		| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1931
		| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1932
		| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1933
		| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1934
#endif
1935
}
1936

1937
/**
1938
 * Decode a 64-bit value from the provided buffer (little endian convention).
1939
 * The source buffer must be properly aligned.
1940
 *
1941
 * @param src   the source buffer (64-bit aligned)
1942
 * @return  the decoded value
1943
 */
1944
static SPH_INLINE sph_u64
1945
sph_dec64le_aligned(const void *src)
1946
{
1947
#if SPH_LITTLE_ENDIAN
1948
	return *(const sph_u64 *)src;
1949
#elif SPH_BIG_ENDIAN
1950
#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
1951
	sph_u64 tmp;
1952

1953
	__asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1954
	return tmp;
1955
/*
1956
 * Not worth it generally.
1957
 *
1958
#elif SPH_PPC32_GCC && !SPH_NO_ASM
1959
	return (sph_u64)sph_dec32le_aligned(src)
1960
		| ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32);
1961
#elif SPH_PPC64_GCC && !SPH_NO_ASM
1962
	sph_u64 tmp;
1963

1964
	__asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1965
	return tmp;
1966
 */
1967
#else
1968
	return sph_bswap64(*(const sph_u64 *)src);
1969
#endif
1970
#else
1971
	return (sph_u64)(((const unsigned char *)src)[0])
1972
		| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1973
		| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1974
		| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1975
		| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1976
		| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1977
		| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1978
		| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1979
#endif
1980
}
1981

1982
#endif
1983

1984
#endif /* Doxygen excluded block */
1985

1986
#endif
1987

1988
Product

Resources

Company