Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmliblzma/common/tuklib_integer.h
3153 views
1
// SPDX-License-Identifier: 0BSD
2
3
///////////////////////////////////////////////////////////////////////////////
4
//
5
/// \file tuklib_integer.h
6
/// \brief Various integer and bit operations
7
///
8
/// This file provides macros or functions to do some basic integer and bit
9
/// operations.
10
///
11
/// Native endian inline functions (XX = 16, 32, or 64):
12
/// - Unaligned native endian reads: readXXne(ptr)
13
/// - Unaligned native endian writes: writeXXne(ptr, num)
14
/// - Aligned native endian reads: aligned_readXXne(ptr)
15
/// - Aligned native endian writes: aligned_writeXXne(ptr, num)
16
///
17
/// Endianness-converting integer operations (these can be macros!)
18
/// (XX = 16, 32, or 64; Y = b or l):
19
/// - Byte swapping: byteswapXX(num)
20
/// - Byte order conversions to/from native (byteswaps if Y isn't
21
/// the native endianness): convXXYe(num)
22
/// - Unaligned reads: readXXYe(ptr)
23
/// - Unaligned writes: writeXXYe(ptr, num)
24
/// - Aligned reads: aligned_readXXYe(ptr)
25
/// - Aligned writes: aligned_writeXXYe(ptr, num)
26
///
27
/// Since the above can macros, the arguments should have no side effects
28
/// because they may be evaluated more than once.
29
///
30
/// Bit scan operations for non-zero 32-bit integers (inline functions):
31
/// - Bit scan reverse (find highest non-zero bit): bsr32(num)
32
/// - Count leading zeros: clz32(num)
33
/// - Count trailing zeros: ctz32(num)
34
/// - Bit scan forward (simply an alias for ctz32()): bsf32(num)
35
///
36
/// The above bit scan operations return 0-31. If num is zero,
37
/// the result is undefined.
38
//
39
// Authors: Lasse Collin
40
// Joachim Henke
41
//
42
///////////////////////////////////////////////////////////////////////////////
43
44
#ifndef TUKLIB_INTEGER_H
45
#define TUKLIB_INTEGER_H
46
47
#include "tuklib_common.h"
48
#include <string.h>
49
50
// Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
51
// and such functions.
52
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
53
# include <immintrin.h>
54
// Only include <intrin.h> when it is needed. GCC and Clang can both
55
// use __builtin's, so we only need Windows instrincs when using MSVC.
56
// GCC and Clang can set _MSC_VER on Windows, so we need to exclude these
57
// cases explicitly.
58
#elif defined(_MSC_VER) && !TUKLIB_GNUC_REQ(3, 4) && !defined(__clang__)
59
# include <intrin.h>
60
#endif
61
62
63
///////////////////
64
// Byte swapping //
65
///////////////////
66
67
#if defined(HAVE___BUILTIN_BSWAPXX)
68
// GCC >= 4.8 and Clang
69
# define byteswap16(num) __builtin_bswap16(num)
70
# define byteswap32(num) __builtin_bswap32(num)
71
# define byteswap64(num) __builtin_bswap64(num)
72
73
#elif defined(HAVE_BYTESWAP_H)
74
// glibc, uClibc, dietlibc
75
# include <byteswap.h>
76
# ifdef HAVE_BSWAP_16
77
# define byteswap16(num) bswap_16(num)
78
# endif
79
# ifdef HAVE_BSWAP_32
80
# define byteswap32(num) bswap_32(num)
81
# endif
82
# ifdef HAVE_BSWAP_64
83
# define byteswap64(num) bswap_64(num)
84
# endif
85
86
#elif defined(HAVE_SYS_ENDIAN_H)
87
// *BSDs and Darwin
88
# include <sys/endian.h>
89
# ifdef __OpenBSD__
90
# define byteswap16(num) swap16(num)
91
# define byteswap32(num) swap32(num)
92
# define byteswap64(num) swap64(num)
93
# else
94
# define byteswap16(num) bswap16(num)
95
# define byteswap32(num) bswap32(num)
96
# define byteswap64(num) bswap64(num)
97
# endif
98
99
#elif defined(HAVE_SYS_BYTEORDER_H)
100
// Solaris
101
# include <sys/byteorder.h>
102
# ifdef BSWAP_16
103
# define byteswap16(num) BSWAP_16(num)
104
# endif
105
# ifdef BSWAP_32
106
# define byteswap32(num) BSWAP_32(num)
107
# endif
108
# ifdef BSWAP_64
109
# define byteswap64(num) BSWAP_64(num)
110
# endif
111
# ifdef BE_16
112
# define conv16be(num) BE_16(num)
113
# endif
114
# ifdef BE_32
115
# define conv32be(num) BE_32(num)
116
# endif
117
# ifdef BE_64
118
# define conv64be(num) BE_64(num)
119
# endif
120
# ifdef LE_16
121
# define conv16le(num) LE_16(num)
122
# endif
123
# ifdef LE_32
124
# define conv32le(num) LE_32(num)
125
# endif
126
# ifdef LE_64
127
# define conv64le(num) LE_64(num)
128
# endif
129
#endif
130
131
#ifndef byteswap16
132
# define byteswap16(n) (uint16_t)( \
133
(((n) & 0x00FFU) << 8) \
134
| (((n) & 0xFF00U) >> 8) \
135
)
136
#endif
137
138
#ifndef byteswap32
139
# define byteswap32(n) (uint32_t)( \
140
(((n) & UINT32_C(0x000000FF)) << 24) \
141
| (((n) & UINT32_C(0x0000FF00)) << 8) \
142
| (((n) & UINT32_C(0x00FF0000)) >> 8) \
143
| (((n) & UINT32_C(0xFF000000)) >> 24) \
144
)
145
#endif
146
147
#ifndef byteswap64
148
# define byteswap64(n) (uint64_t)( \
149
(((n) & UINT64_C(0x00000000000000FF)) << 56) \
150
| (((n) & UINT64_C(0x000000000000FF00)) << 40) \
151
| (((n) & UINT64_C(0x0000000000FF0000)) << 24) \
152
| (((n) & UINT64_C(0x00000000FF000000)) << 8) \
153
| (((n) & UINT64_C(0x000000FF00000000)) >> 8) \
154
| (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \
155
| (((n) & UINT64_C(0x00FF000000000000)) >> 40) \
156
| (((n) & UINT64_C(0xFF00000000000000)) >> 56) \
157
)
158
#endif
159
160
// Define conversion macros using the basic byte swapping macros.
161
#ifdef WORDS_BIGENDIAN
162
# ifndef conv16be
163
# define conv16be(num) ((uint16_t)(num))
164
# endif
165
# ifndef conv32be
166
# define conv32be(num) ((uint32_t)(num))
167
# endif
168
# ifndef conv64be
169
# define conv64be(num) ((uint64_t)(num))
170
# endif
171
# ifndef conv16le
172
# define conv16le(num) byteswap16(num)
173
# endif
174
# ifndef conv32le
175
# define conv32le(num) byteswap32(num)
176
# endif
177
# ifndef conv64le
178
# define conv64le(num) byteswap64(num)
179
# endif
180
#else
181
# ifndef conv16be
182
# define conv16be(num) byteswap16(num)
183
# endif
184
# ifndef conv32be
185
# define conv32be(num) byteswap32(num)
186
# endif
187
# ifndef conv64be
188
# define conv64be(num) byteswap64(num)
189
# endif
190
# ifndef conv16le
191
# define conv16le(num) ((uint16_t)(num))
192
# endif
193
# ifndef conv32le
194
# define conv32le(num) ((uint32_t)(num))
195
# endif
196
# ifndef conv64le
197
# define conv64le(num) ((uint64_t)(num))
198
# endif
199
#endif
200
201
202
////////////////////////////////
203
// Unaligned reads and writes //
204
////////////////////////////////
205
206
// No-strict-align archs like x86-64
207
// ---------------------------------
208
//
209
// The traditional way of casting e.g. *(const uint16_t *)uint8_pointer
210
// is bad even if the uint8_pointer is properly aligned because this kind
211
// of casts break strict aliasing rules and result in undefined behavior.
212
// With unaligned pointers it's even worse: compilers may emit vector
213
// instructions that require aligned pointers even if non-vector
214
// instructions work with unaligned pointers.
215
//
216
// Using memcpy() is the standard compliant way to do unaligned access.
217
// Many modern compilers inline it so there is no function call overhead.
218
// For those compilers that don't handle the memcpy() method well, the
219
// old casting method (that violates strict aliasing) can be requested at
220
// build time. A third method, casting to a packed struct, would also be
221
// an option but isn't provided to keep things simpler (it's already a mess).
222
// Hopefully this is flexible enough in practice.
223
//
224
// Some compilers on x86-64 like Clang >= 10 and GCC >= 5.1 detect that
225
//
226
// buf[0] | (buf[1] << 8)
227
//
228
// reads a 16-bit value and can emit a single 16-bit load and produce
229
// identical code than with the memcpy() method. In other cases Clang and GCC
230
// produce either the same or better code with memcpy(). For example, Clang 9
231
// on x86-64 can detect 32-bit load but not 16-bit load.
232
//
233
// MSVC uses unaligned access with the memcpy() method but emits byte-by-byte
234
// code for "buf[0] | (buf[1] << 8)".
235
//
236
// Conclusion: The memcpy() method is the best choice when unaligned access
237
// is supported.
238
//
239
// Strict-align archs like SPARC
240
// -----------------------------
241
//
242
// GCC versions from around 4.x to to at least 13.2.0 produce worse code
243
// from the memcpy() method than from simple byte-by-byte shift-or code
244
// when reading a 32-bit integer:
245
//
246
// (1) It may be constructed on stack using four 8-bit loads,
247
// four 8-bit stores to stack, and finally one 32-bit load from stack.
248
//
249
// (2) Especially with -Os, an actual memcpy() call may be emitted.
250
//
251
// This is true on at least on ARM, ARM64, SPARC, SPARC64, MIPS64EL, and
252
// RISC-V. Of these, ARM, ARM64, and RISC-V support unaligned access in
253
// some processors but not all so this is relevant only in the case when
254
// GCC assumes that unaligned is not supported or -mstrict-align or
255
// -mno-unaligned-access is used.
256
//
257
// For Clang it makes little difference. ARM64 with -O2 -mstrict-align
258
// was one the very few with a minor difference: the memcpy() version
259
// was one instruction longer.
260
//
261
// Conclusion: At least in case of GCC and Clang, byte-by-byte code is
262
// the best choice for strict-align archs to do unaligned access.
263
//
264
// See also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111502
265
//
266
// Thanks to <https://godbolt.org/> it was easy to test different compilers.
267
// The following is for little endian targets:
268
/*
269
#include <stdint.h>
270
#include <string.h>
271
272
uint32_t bytes16(const uint8_t *b)
273
{
274
return (uint32_t)b[0]
275
| ((uint32_t)b[1] << 8);
276
}
277
278
uint32_t copy16(const uint8_t *b)
279
{
280
uint16_t v;
281
memcpy(&v, b, sizeof(v));
282
return v;
283
}
284
285
uint32_t bytes32(const uint8_t *b)
286
{
287
return (uint32_t)b[0]
288
| ((uint32_t)b[1] << 8)
289
| ((uint32_t)b[2] << 16)
290
| ((uint32_t)b[3] << 24);
291
}
292
293
uint32_t copy32(const uint8_t *b)
294
{
295
uint32_t v;
296
memcpy(&v, b, sizeof(v));
297
return v;
298
}
299
300
void wbytes16(uint8_t *b, uint16_t v)
301
{
302
b[0] = (uint8_t)v;
303
b[1] = (uint8_t)(v >> 8);
304
}
305
306
void wcopy16(uint8_t *b, uint16_t v)
307
{
308
memcpy(b, &v, sizeof(v));
309
}
310
311
void wbytes32(uint8_t *b, uint32_t v)
312
{
313
b[0] = (uint8_t)v;
314
b[1] = (uint8_t)(v >> 8);
315
b[2] = (uint8_t)(v >> 16);
316
b[3] = (uint8_t)(v >> 24);
317
}
318
319
void wcopy32(uint8_t *b, uint32_t v)
320
{
321
memcpy(b, &v, sizeof(v));
322
}
323
*/
324
325
326
#ifdef TUKLIB_FAST_UNALIGNED_ACCESS
327
328
static inline uint16_t
329
read16ne(const uint8_t *buf)
330
{
331
#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
332
return *(const uint16_t *)buf;
333
#else
334
uint16_t num;
335
memcpy(&num, buf, sizeof(num));
336
return num;
337
#endif
338
}
339
340
341
static inline uint32_t
342
read32ne(const uint8_t *buf)
343
{
344
#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
345
return *(const uint32_t *)buf;
346
#else
347
uint32_t num;
348
memcpy(&num, buf, sizeof(num));
349
return num;
350
#endif
351
}
352
353
354
static inline uint64_t
355
read64ne(const uint8_t *buf)
356
{
357
#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
358
return *(const uint64_t *)buf;
359
#else
360
uint64_t num;
361
memcpy(&num, buf, sizeof(num));
362
return num;
363
#endif
364
}
365
366
367
static inline void
368
write16ne(uint8_t *buf, uint16_t num)
369
{
370
#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
371
*(uint16_t *)buf = num;
372
#else
373
memcpy(buf, &num, sizeof(num));
374
#endif
375
return;
376
}
377
378
379
static inline void
380
write32ne(uint8_t *buf, uint32_t num)
381
{
382
#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
383
*(uint32_t *)buf = num;
384
#else
385
memcpy(buf, &num, sizeof(num));
386
#endif
387
return;
388
}
389
390
391
static inline void
392
write64ne(uint8_t *buf, uint64_t num)
393
{
394
#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
395
*(uint64_t *)buf = num;
396
#else
397
memcpy(buf, &num, sizeof(num));
398
#endif
399
return;
400
}
401
402
403
static inline uint16_t
404
read16be(const uint8_t *buf)
405
{
406
uint16_t num = read16ne(buf);
407
return conv16be(num);
408
}
409
410
411
static inline uint16_t
412
read16le(const uint8_t *buf)
413
{
414
uint16_t num = read16ne(buf);
415
return conv16le(num);
416
}
417
418
419
static inline uint32_t
420
read32be(const uint8_t *buf)
421
{
422
uint32_t num = read32ne(buf);
423
return conv32be(num);
424
}
425
426
427
static inline uint32_t
428
read32le(const uint8_t *buf)
429
{
430
uint32_t num = read32ne(buf);
431
return conv32le(num);
432
}
433
434
435
static inline uint64_t
436
read64be(const uint8_t *buf)
437
{
438
uint64_t num = read64ne(buf);
439
return conv64be(num);
440
}
441
442
443
static inline uint64_t
444
read64le(const uint8_t *buf)
445
{
446
uint64_t num = read64ne(buf);
447
return conv64le(num);
448
}
449
450
451
// NOTE: Possible byte swapping must be done in a macro to allow the compiler
452
// to optimize byte swapping of constants when using glibc's or *BSD's
453
// byte swapping macros. The actual write is done in an inline function
454
// to make type checking of the buf pointer possible.
455
#define write16be(buf, num) write16ne(buf, conv16be(num))
456
#define write32be(buf, num) write32ne(buf, conv32be(num))
457
#define write64be(buf, num) write64ne(buf, conv64be(num))
458
#define write16le(buf, num) write16ne(buf, conv16le(num))
459
#define write32le(buf, num) write32ne(buf, conv32le(num))
460
#define write64le(buf, num) write64ne(buf, conv64le(num))
461
462
#else
463
464
#ifdef WORDS_BIGENDIAN
465
# define read16ne read16be
466
# define read32ne read32be
467
# define read64ne read64be
468
# define write16ne write16be
469
# define write32ne write32be
470
# define write64ne write64be
471
#else
472
# define read16ne read16le
473
# define read32ne read32le
474
# define read64ne read64le
475
# define write16ne write16le
476
# define write32ne write32le
477
# define write64ne write64le
478
#endif
479
480
481
static inline uint16_t
482
read16be(const uint8_t *buf)
483
{
484
uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
485
return num;
486
}
487
488
489
static inline uint16_t
490
read16le(const uint8_t *buf)
491
{
492
uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
493
return num;
494
}
495
496
497
static inline uint32_t
498
read32be(const uint8_t *buf)
499
{
500
uint32_t num = (uint32_t)buf[0] << 24;
501
num |= (uint32_t)buf[1] << 16;
502
num |= (uint32_t)buf[2] << 8;
503
num |= (uint32_t)buf[3];
504
return num;
505
}
506
507
508
static inline uint32_t
509
read32le(const uint8_t *buf)
510
{
511
uint32_t num = (uint32_t)buf[0];
512
num |= (uint32_t)buf[1] << 8;
513
num |= (uint32_t)buf[2] << 16;
514
num |= (uint32_t)buf[3] << 24;
515
return num;
516
}
517
518
519
static inline uint64_t
520
read64be(const uint8_t *buf)
521
{
522
uint64_t num = (uint64_t)buf[0] << 56;
523
num |= (uint64_t)buf[1] << 48;
524
num |= (uint64_t)buf[2] << 40;
525
num |= (uint64_t)buf[3] << 32;
526
num |= (uint64_t)buf[4] << 24;
527
num |= (uint64_t)buf[5] << 16;
528
num |= (uint64_t)buf[6] << 8;
529
num |= (uint64_t)buf[7];
530
return num;
531
}
532
533
534
static inline uint64_t
535
read64le(const uint8_t *buf)
536
{
537
uint64_t num = (uint64_t)buf[0];
538
num |= (uint64_t)buf[1] << 8;
539
num |= (uint64_t)buf[2] << 16;
540
num |= (uint64_t)buf[3] << 24;
541
num |= (uint64_t)buf[4] << 32;
542
num |= (uint64_t)buf[5] << 40;
543
num |= (uint64_t)buf[6] << 48;
544
num |= (uint64_t)buf[7] << 56;
545
return num;
546
}
547
548
549
static inline void
550
write16be(uint8_t *buf, uint16_t num)
551
{
552
buf[0] = (uint8_t)(num >> 8);
553
buf[1] = (uint8_t)num;
554
return;
555
}
556
557
558
static inline void
559
write16le(uint8_t *buf, uint16_t num)
560
{
561
buf[0] = (uint8_t)num;
562
buf[1] = (uint8_t)(num >> 8);
563
return;
564
}
565
566
567
static inline void
568
write32be(uint8_t *buf, uint32_t num)
569
{
570
buf[0] = (uint8_t)(num >> 24);
571
buf[1] = (uint8_t)(num >> 16);
572
buf[2] = (uint8_t)(num >> 8);
573
buf[3] = (uint8_t)num;
574
return;
575
}
576
577
578
static inline void
579
write32le(uint8_t *buf, uint32_t num)
580
{
581
buf[0] = (uint8_t)num;
582
buf[1] = (uint8_t)(num >> 8);
583
buf[2] = (uint8_t)(num >> 16);
584
buf[3] = (uint8_t)(num >> 24);
585
return;
586
}
587
588
589
static inline void
590
write64be(uint8_t *buf, uint64_t num)
591
{
592
buf[0] = (uint8_t)(num >> 56);
593
buf[1] = (uint8_t)(num >> 48);
594
buf[2] = (uint8_t)(num >> 40);
595
buf[3] = (uint8_t)(num >> 32);
596
buf[4] = (uint8_t)(num >> 24);
597
buf[5] = (uint8_t)(num >> 16);
598
buf[6] = (uint8_t)(num >> 8);
599
buf[7] = (uint8_t)num;
600
return;
601
}
602
603
604
static inline void
605
write64le(uint8_t *buf, uint64_t num)
606
{
607
buf[0] = (uint8_t)num;
608
buf[1] = (uint8_t)(num >> 8);
609
buf[2] = (uint8_t)(num >> 16);
610
buf[3] = (uint8_t)(num >> 24);
611
buf[4] = (uint8_t)(num >> 32);
612
buf[5] = (uint8_t)(num >> 40);
613
buf[6] = (uint8_t)(num >> 48);
614
buf[7] = (uint8_t)(num >> 56);
615
return;
616
}
617
618
#endif
619
620
621
//////////////////////////////
622
// Aligned reads and writes //
623
//////////////////////////////
624
625
// Separate functions for aligned reads and writes are provided since on
626
// strict-align archs aligned access is much faster than unaligned access.
627
//
628
// Just like in the unaligned case, memcpy() is needed to avoid
629
// strict aliasing violations. However, on archs that don't support
630
// unaligned access the compiler cannot know that the pointers given
631
// to memcpy() are aligned which results in slow code. As of C11 there is
632
// no standard way to tell the compiler that we know that the address is
633
// aligned but some compilers have language extensions to do that. With
634
// such language extensions the memcpy() method gives excellent results.
635
//
636
// What to do on a strict-align system when no known language extensions
637
// are available? Falling back to byte-by-byte access would be safe but ruin
638
// optimizations that have been made specifically with aligned access in mind.
639
// As a compromise, aligned reads will fall back to non-compliant type punning
640
// but aligned writes will be byte-by-byte, that is, fast reads are preferred
641
// over fast writes. This obviously isn't great but hopefully it's a working
642
// compromise for now.
643
//
644
// __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6.
645
#ifdef HAVE___BUILTIN_ASSUME_ALIGNED
646
# define tuklib_memcpy_aligned(dest, src, size) \
647
memcpy(dest, __builtin_assume_aligned(src, size), size)
648
#else
649
# define tuklib_memcpy_aligned(dest, src, size) \
650
memcpy(dest, src, size)
651
# ifndef TUKLIB_FAST_UNALIGNED_ACCESS
652
# define TUKLIB_USE_UNSAFE_ALIGNED_READS 1
653
# endif
654
#endif
655
656
657
static inline uint16_t
658
aligned_read16ne(const uint8_t *buf)
659
{
660
#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
661
|| defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
662
return *(const uint16_t *)buf;
663
#else
664
uint16_t num;
665
tuklib_memcpy_aligned(&num, buf, sizeof(num));
666
return num;
667
#endif
668
}
669
670
671
static inline uint32_t
672
aligned_read32ne(const uint8_t *buf)
673
{
674
#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
675
|| defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
676
return *(const uint32_t *)buf;
677
#else
678
uint32_t num;
679
tuklib_memcpy_aligned(&num, buf, sizeof(num));
680
return num;
681
#endif
682
}
683
684
685
static inline uint64_t
686
aligned_read64ne(const uint8_t *buf)
687
{
688
#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
689
|| defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
690
return *(const uint64_t *)buf;
691
#else
692
uint64_t num;
693
tuklib_memcpy_aligned(&num, buf, sizeof(num));
694
return num;
695
#endif
696
}
697
698
699
static inline void
700
aligned_write16ne(uint8_t *buf, uint16_t num)
701
{
702
#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
703
*(uint16_t *)buf = num;
704
#else
705
tuklib_memcpy_aligned(buf, &num, sizeof(num));
706
#endif
707
return;
708
}
709
710
711
static inline void
712
aligned_write32ne(uint8_t *buf, uint32_t num)
713
{
714
#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
715
*(uint32_t *)buf = num;
716
#else
717
tuklib_memcpy_aligned(buf, &num, sizeof(num));
718
#endif
719
return;
720
}
721
722
723
static inline void
724
aligned_write64ne(uint8_t *buf, uint64_t num)
725
{
726
#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
727
*(uint64_t *)buf = num;
728
#else
729
tuklib_memcpy_aligned(buf, &num, sizeof(num));
730
#endif
731
return;
732
}
733
734
735
static inline uint16_t
736
aligned_read16be(const uint8_t *buf)
737
{
738
uint16_t num = aligned_read16ne(buf);
739
return conv16be(num);
740
}
741
742
743
static inline uint16_t
744
aligned_read16le(const uint8_t *buf)
745
{
746
uint16_t num = aligned_read16ne(buf);
747
return conv16le(num);
748
}
749
750
751
static inline uint32_t
752
aligned_read32be(const uint8_t *buf)
753
{
754
uint32_t num = aligned_read32ne(buf);
755
return conv32be(num);
756
}
757
758
759
static inline uint32_t
760
aligned_read32le(const uint8_t *buf)
761
{
762
uint32_t num = aligned_read32ne(buf);
763
return conv32le(num);
764
}
765
766
767
static inline uint64_t
768
aligned_read64be(const uint8_t *buf)
769
{
770
uint64_t num = aligned_read64ne(buf);
771
return conv64be(num);
772
}
773
774
775
static inline uint64_t
776
aligned_read64le(const uint8_t *buf)
777
{
778
uint64_t num = aligned_read64ne(buf);
779
return conv64le(num);
780
}
781
782
783
// These need to be macros like in the unaligned case.
784
#define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num))
785
#define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num))
786
#define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num))
787
#define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num))
788
#define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num))
789
#define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num))
790
791
792
////////////////////
793
// Bit operations //
794
////////////////////
795
796
static inline uint32_t
797
bsr32(uint32_t n)
798
{
799
// Check for ICC first, since it tends to define __GNUC__ too.
800
#if defined(__INTEL_COMPILER)
801
return _bit_scan_reverse(n);
802
803
#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX
804
// GCC >= 3.4 has __builtin_clz(), which gives good results on
805
// multiple architectures. On x86, __builtin_clz() ^ 31U becomes
806
// either plain BSR (so the XOR gets optimized away) or LZCNT and
807
// XOR (if -march indicates that SSE4a instructions are supported).
808
return (uint32_t)__builtin_clz(n) ^ 31U;
809
810
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
811
uint32_t i;
812
__asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
813
return i;
814
815
#else
816
uint32_t i = 31;
817
818
if ((n & 0xFFFF0000) == 0) {
819
n <<= 16;
820
i = 15;
821
}
822
823
if ((n & 0xFF000000) == 0) {
824
n <<= 8;
825
i -= 8;
826
}
827
828
if ((n & 0xF0000000) == 0) {
829
n <<= 4;
830
i -= 4;
831
}
832
833
if ((n & 0xC0000000) == 0) {
834
n <<= 2;
835
i -= 2;
836
}
837
838
if ((n & 0x80000000) == 0)
839
--i;
840
841
return i;
842
#endif
843
}
844
845
846
static inline uint32_t
847
clz32(uint32_t n)
848
{
849
#if defined(__INTEL_COMPILER)
850
return _bit_scan_reverse(n) ^ 31U;
851
852
#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX
853
return (uint32_t)__builtin_clz(n);
854
855
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
856
uint32_t i;
857
__asm__("bsrl %1, %0\n\t"
858
"xorl $31, %0"
859
: "=r" (i) : "rm" (n));
860
return i;
861
862
#else
863
uint32_t i = 0;
864
865
if ((n & 0xFFFF0000) == 0) {
866
n <<= 16;
867
i = 16;
868
}
869
870
if ((n & 0xFF000000) == 0) {
871
n <<= 8;
872
i += 8;
873
}
874
875
if ((n & 0xF0000000) == 0) {
876
n <<= 4;
877
i += 4;
878
}
879
880
if ((n & 0xC0000000) == 0) {
881
n <<= 2;
882
i += 2;
883
}
884
885
if ((n & 0x80000000) == 0)
886
++i;
887
888
return i;
889
#endif
890
}
891
892
893
static inline uint32_t
894
ctz32(uint32_t n)
895
{
896
#if defined(__INTEL_COMPILER)
897
return _bit_scan_forward(n);
898
899
#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX >= UINT32_MAX
900
return (uint32_t)__builtin_ctz(n);
901
902
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
903
uint32_t i;
904
__asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
905
return i;
906
907
#else
908
uint32_t i = 0;
909
910
if ((n & 0x0000FFFF) == 0) {
911
n >>= 16;
912
i = 16;
913
}
914
915
if ((n & 0x000000FF) == 0) {
916
n >>= 8;
917
i += 8;
918
}
919
920
if ((n & 0x0000000F) == 0) {
921
n >>= 4;
922
i += 4;
923
}
924
925
if ((n & 0x00000003) == 0) {
926
n >>= 2;
927
i += 2;
928
}
929
930
if ((n & 0x00000001) == 0)
931
++i;
932
933
return i;
934
#endif
935
}
936
937
#define bsf32 ctz32
938
939
#endif
940
941