/* crc32.c -- compute the CRC-32 of a data stream1* Copyright (C) 1995-2022 Mark Adler2* For conditions of distribution and use, see copyright notice in zlib.h3*4* This interleaved implementation of a CRC makes use of pipelined multiple5* arithmetic-logic units, commonly found in modern CPU cores. It is due to6* Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.7*/89/* @(#) $Id$ */1011/*12Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore13protection on the static variables used to control the first-use generation14of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should15first call get_crc_table() to initialize the tables before allowing more than16one thread to use crc32().1718MAKECRCH can be #defined to write out crc32.h. A main() routine is also19produced, so that this one source file can be compiled to an executable.20*/2122#ifdef MAKECRCH23# include <stdio.h>24# ifndef DYNAMIC_CRC_TABLE25# define DYNAMIC_CRC_TABLE26# endif /* !DYNAMIC_CRC_TABLE */27#endif /* MAKECRCH */2829#include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */3031/*32A CRC of a message is computed on N braids of words in the message, where33each word consists of W bytes (4 or 8). If N is 3, for example, then three34running sparse CRCs are calculated respectively on each braid, at these35indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...36This is done starting at a word boundary, and continues until as many blocks37of N * W bytes as are available have been processed. The results are combined38into a single CRC at the end. For this code, N must be in the range 1..6 and39W must be 4 or 8. The upper limit on N can be increased if desired by adding40more #if blocks, extending the patterns apparent in the code. In addition,41crc32.h would need to be regenerated, if the maximum N value is increased.4243N and W are chosen empirically by benchmarking the execution time on a given44processor. The choices for N and W below were based on testing on Intel Kaby45Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS6446Octeon II processors. The Intel, AMD, and ARM processors were all fastest47with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.48They were all tested with either gcc or clang, all using the -O3 optimization49level. Your mileage may vary.50*/5152/* Define N */53#ifdef Z_TESTN54# define N Z_TESTN55#else56# define N 557#endif58#if N < 1 || N > 659# error N must be in 1..660#endif6162/*63z_crc_t must be at least 32 bits. z_word_t must be at least as long as64z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and65that bytes are eight bits.66*/6768/*69Define W and the associated z_word_t type. If W is not defined, then a70braided calculation is not used, and the associated tables and code are not71compiled.72*/73#ifdef Z_TESTW74# if Z_TESTW-1 != -175# define W Z_TESTW76# endif77#else78# ifdef MAKECRCH79# define W 8 /* required for MAKECRCH */80# else81# if defined(__x86_64__) || defined(__aarch64__)82# define W 883# else84# define W 485# endif86# endif87#endif88#ifdef W89# if W == 8 && defined(Z_U8)90typedef Z_U8 z_word_t;91# elif defined(Z_U4)92# undef W93# define W 494typedef Z_U4 z_word_t;95# else96# undef W97# endif98#endif99100/* If available, use the ARM processor CRC32 instruction. */101#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8102# define ARMCRC32103#endif104105#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))106/*107Swap the bytes in a z_word_t to convert between little and big endian. Any108self-respecting compiler will optimize this to a single machine byte-swap109instruction, if one is available. This assumes that word_t is either 32 bits110or 64 bits.111*/112local z_word_t byte_swap(z_word_t word) {113# if W == 8114return115(word & 0xff00000000000000) >> 56 |116(word & 0xff000000000000) >> 40 |117(word & 0xff0000000000) >> 24 |118(word & 0xff00000000) >> 8 |119(word & 0xff000000) << 8 |120(word & 0xff0000) << 24 |121(word & 0xff00) << 40 |122(word & 0xff) << 56;123# else /* W == 4 */124return125(word & 0xff000000) >> 24 |126(word & 0xff0000) >> 8 |127(word & 0xff00) << 8 |128(word & 0xff) << 24;129# endif130}131#endif132133#ifdef DYNAMIC_CRC_TABLE134/* =========================================================================135* Table of powers of x for combining CRC-32s, filled in by make_crc_table()136* below.137*/138local z_crc_t FAR x2n_table[32];139#else140/* =========================================================================141* Tables for byte-wise and braided CRC-32 calculations, and a table of powers142* of x for combining CRC-32s, all made by make_crc_table().143*/144# include "crc32.h"145#endif146147/* CRC polynomial. */148#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */149150/*151Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,152reflected. For speed, this requires that a not be zero.153*/154local z_crc_t multmodp(z_crc_t a, z_crc_t b) {155z_crc_t m, p;156157m = (z_crc_t)1 << 31;158p = 0;159for (;;) {160if (a & m) {161p ^= b;162if ((a & (m - 1)) == 0)163break;164}165m >>= 1;166b = b & 1 ? (b >> 1) ^ POLY : b >> 1;167}168return p;169}170171/*172Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been173initialized.174*/175local z_crc_t x2nmodp(z_off64_t n, unsigned k) {176z_crc_t p;177178p = (z_crc_t)1 << 31; /* x^0 == 1 */179while (n) {180if (n & 1)181p = multmodp(x2n_table[k & 31], p);182n >>= 1;183k++;184}185return p;186}187188#ifdef DYNAMIC_CRC_TABLE189/* =========================================================================190* Build the tables for byte-wise and braided CRC-32 calculations, and a table191* of powers of x for combining CRC-32s.192*/193local z_crc_t FAR crc_table[256];194#ifdef W195local z_word_t FAR crc_big_table[256];196local z_crc_t FAR crc_braid_table[W][256];197local z_word_t FAR crc_braid_big_table[W][256];198local void braid(z_crc_t [][256], z_word_t [][256], int, int);199#endif200#ifdef MAKECRCH201local void write_table(FILE *, const z_crc_t FAR *, int);202local void write_table32hi(FILE *, const z_word_t FAR *, int);203local void write_table64(FILE *, const z_word_t FAR *, int);204#endif /* MAKECRCH */205206/*207Define a once() function depending on the availability of atomics. If this is208compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in209multiple threads, and if atomics are not available, then get_crc_table() must210be called to initialize the tables and must return before any threads are211allowed to compute or combine CRCs.212*/213214/* Definition of once functionality. */215typedef struct once_s once_t;216217/* Check for the availability of atomics. */218#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \219!defined(__STDC_NO_ATOMICS__)220221#include <stdatomic.h>222223/* Structure for once(), which must be initialized with ONCE_INIT. */224struct once_s {225atomic_flag begun;226atomic_int done;227};228#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}229230/*231Run the provided init() function exactly once, even if multiple threads232invoke once() at the same time. The state must be a once_t initialized with233ONCE_INIT.234*/235local void once(once_t *state, void (*init)(void)) {236if (!atomic_load(&state->done)) {237if (atomic_flag_test_and_set(&state->begun))238while (!atomic_load(&state->done))239;240else {241init();242atomic_store(&state->done, 1);243}244}245}246247#else /* no atomics */248249/* Structure for once(), which must be initialized with ONCE_INIT. */250struct once_s {251volatile int begun;252volatile int done;253};254#define ONCE_INIT {0, 0}255256/* Test and set. Alas, not atomic, but tries to minimize the period of257vulnerability. */258local int test_and_set(int volatile *flag) {259int was;260261was = *flag;262*flag = 1;263return was;264}265266/* Run the provided init() function once. This is not thread-safe. */267local void once(once_t *state, void (*init)(void)) {268if (!state->done) {269if (test_and_set(&state->begun))270while (!state->done)271;272else {273init();274state->done = 1;275}276}277}278279#endif280281/* State for once(). */282local once_t made = ONCE_INIT;283284/*285Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:286x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.287288Polynomials over GF(2) are represented in binary, one bit per coefficient,289with the lowest powers in the most significant bit. Then adding polynomials290is just exclusive-or, and multiplying a polynomial by x is a right shift by291one. If we call the above polynomial p, and represent a byte as the292polynomial q, also with the lowest power in the most significant bit (so the293byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,294where a mod b means the remainder after dividing a by b.295296This calculation is done using the shift-register method of multiplying and297taking the remainder. The register is initialized to zero, and for each298incoming bit, x^32 is added mod p to the register if the bit is a one (where299x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x300(which is shifting right by one and adding x^32 mod p if the bit shifted out301is a one). We start with the highest power (least significant bit) of q and302repeat for all eight bits of q.303304The table is simply the CRC of all possible eight bit values. This is all the305information needed to generate CRCs on data a byte at a time for all306combinations of CRC register values and incoming bytes.307*/308309local void make_crc_table(void) {310unsigned i, j, n;311z_crc_t p;312313/* initialize the CRC of bytes tables */314for (i = 0; i < 256; i++) {315p = i;316for (j = 0; j < 8; j++)317p = p & 1 ? (p >> 1) ^ POLY : p >> 1;318crc_table[i] = p;319#ifdef W320crc_big_table[i] = byte_swap(p);321#endif322}323324/* initialize the x^2^n mod p(x) table */325p = (z_crc_t)1 << 30; /* x^1 */326x2n_table[0] = p;327for (n = 1; n < 32; n++)328x2n_table[n] = p = multmodp(p, p);329330#ifdef W331/* initialize the braiding tables -- needs x2n_table[] */332braid(crc_braid_table, crc_braid_big_table, N, W);333#endif334335#ifdef MAKECRCH336{337/*338The crc32.h header file contains tables for both 32-bit and 64-bit339z_word_t's, and so requires a 64-bit type be available. In that case,340z_word_t must be defined to be 64-bits. This code then also generates341and writes out the tables for the case that z_word_t is 32 bits.342*/343#if !defined(W) || W != 8344# error Need a 64-bit integer type in order to generate crc32.h.345#endif346FILE *out;347int k, n;348z_crc_t ltl[8][256];349z_word_t big[8][256];350351out = fopen("crc32.h", "w");352if (out == NULL) return;353354/* write out little-endian CRC table to crc32.h */355fprintf(out,356"/* crc32.h -- tables for rapid CRC calculation\n"357" * Generated automatically by crc32.c\n */\n"358"\n"359"local const z_crc_t FAR crc_table[] = {\n"360" ");361write_table(out, crc_table, 256);362fprintf(out,363"};\n");364365/* write out big-endian CRC table for 64-bit z_word_t to crc32.h */366fprintf(out,367"\n"368"#ifdef W\n"369"\n"370"#if W == 8\n"371"\n"372"local const z_word_t FAR crc_big_table[] = {\n"373" ");374write_table64(out, crc_big_table, 256);375fprintf(out,376"};\n");377378/* write out big-endian CRC table for 32-bit z_word_t to crc32.h */379fprintf(out,380"\n"381"#else /* W == 4 */\n"382"\n"383"local const z_word_t FAR crc_big_table[] = {\n"384" ");385write_table32hi(out, crc_big_table, 256);386fprintf(out,387"};\n"388"\n"389"#endif\n");390391/* write out braid tables for each value of N */392for (n = 1; n <= 6; n++) {393fprintf(out,394"\n"395"#if N == %d\n", n);396397/* compute braid tables for this N and 64-bit word_t */398braid(ltl, big, n, 8);399400/* write out braid tables for 64-bit z_word_t to crc32.h */401fprintf(out,402"\n"403"#if W == 8\n"404"\n"405"local const z_crc_t FAR crc_braid_table[][256] = {\n");406for (k = 0; k < 8; k++) {407fprintf(out, " {");408write_table(out, ltl[k], 256);409fprintf(out, "}%s", k < 7 ? ",\n" : "");410}411fprintf(out,412"};\n"413"\n"414"local const z_word_t FAR crc_braid_big_table[][256] = {\n");415for (k = 0; k < 8; k++) {416fprintf(out, " {");417write_table64(out, big[k], 256);418fprintf(out, "}%s", k < 7 ? ",\n" : "");419}420fprintf(out,421"};\n");422423/* compute braid tables for this N and 32-bit word_t */424braid(ltl, big, n, 4);425426/* write out braid tables for 32-bit z_word_t to crc32.h */427fprintf(out,428"\n"429"#else /* W == 4 */\n"430"\n"431"local const z_crc_t FAR crc_braid_table[][256] = {\n");432for (k = 0; k < 4; k++) {433fprintf(out, " {");434write_table(out, ltl[k], 256);435fprintf(out, "}%s", k < 3 ? ",\n" : "");436}437fprintf(out,438"};\n"439"\n"440"local const z_word_t FAR crc_braid_big_table[][256] = {\n");441for (k = 0; k < 4; k++) {442fprintf(out, " {");443write_table32hi(out, big[k], 256);444fprintf(out, "}%s", k < 3 ? ",\n" : "");445}446fprintf(out,447"};\n"448"\n"449"#endif\n"450"\n"451"#endif\n");452}453fprintf(out,454"\n"455"#endif\n");456457/* write out zeros operator table to crc32.h */458fprintf(out,459"\n"460"local const z_crc_t FAR x2n_table[] = {\n"461" ");462write_table(out, x2n_table, 32);463fprintf(out,464"};\n");465fclose(out);466}467#endif /* MAKECRCH */468}469470#ifdef MAKECRCH471472/*473Write the 32-bit values in table[0..k-1] to out, five per line in474hexadecimal separated by commas.475*/476local void write_table(FILE *out, const z_crc_t FAR *table, int k) {477int n;478479for (n = 0; n < k; n++)480fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",481(unsigned long)(table[n]),482n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));483}484485/*486Write the high 32-bits of each value in table[0..k-1] to out, five per line487in hexadecimal separated by commas.488*/489local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {490int n;491492for (n = 0; n < k; n++)493fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",494(unsigned long)(table[n] >> 32),495n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));496}497498/*499Write the 64-bit values in table[0..k-1] to out, three per line in500hexadecimal separated by commas. This assumes that if there is a 64-bit501type, then there is also a long long integer type, and it is at least 64502bits. If not, then the type cast and format string can be adjusted503accordingly.504*/505local void write_table64(FILE *out, const z_word_t FAR *table, int k) {506int n;507508for (n = 0; n < k; n++)509fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",510(unsigned long long)(table[n]),511n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));512}513514/* Actually do the deed. */515int main(void) {516make_crc_table();517return 0;518}519520#endif /* MAKECRCH */521522#ifdef W523/*524Generate the little and big-endian braid tables for the given n and z_word_t525size w. Each array must have room for w blocks of 256 elements.526*/527local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {528int k;529z_crc_t i, p, q;530for (k = 0; k < w; k++) {531p = x2nmodp((n * w + 3 - k) << 3, 0);532ltl[k][0] = 0;533big[w - 1 - k][0] = 0;534for (i = 1; i < 256; i++) {535ltl[k][i] = q = multmodp(i << 24, p);536big[w - 1 - k][i] = byte_swap(q);537}538}539}540#endif541542#endif /* DYNAMIC_CRC_TABLE */543544/* =========================================================================545* This function can be used by asm versions of crc32(), and to force the546* generation of the CRC tables in a threaded application.547*/548const z_crc_t FAR * ZEXPORT get_crc_table(void) {549#ifdef DYNAMIC_CRC_TABLE550once(&made, make_crc_table);551#endif /* DYNAMIC_CRC_TABLE */552return (const z_crc_t FAR *)crc_table;553}554555/* =========================================================================556* Use ARM machine instructions if available. This will compute the CRC about557* ten times faster than the braided calculation. This code does not check for558* the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will559* only be defined if the compilation specifies an ARM processor architecture560* that has the instructions. For example, compiling with -march=armv8.1-a or561* -march=armv8-a+crc, or -march=native if the compile machine has the crc32562* instructions.563*/564#ifdef ARMCRC32565566/*567Constants empirically determined to maximize speed. These values are from568measurements on a Cortex-A57. Your mileage may vary.569*/570#define Z_BATCH 3990 /* number of words in a batch */571#define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */572#define Z_BATCH_MIN 800 /* fewest words in a final batch */573574unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,575z_size_t len) {576z_crc_t val;577z_word_t crc1, crc2;578const z_word_t *word;579z_word_t val0, val1, val2;580z_size_t last, last2, i;581z_size_t num;582583/* Return initial CRC, if requested. */584if (buf == Z_NULL) return 0;585586#ifdef DYNAMIC_CRC_TABLE587once(&made, make_crc_table);588#endif /* DYNAMIC_CRC_TABLE */589590/* Pre-condition the CRC */591crc = (~crc) & 0xffffffff;592593/* Compute the CRC up to a word boundary. */594while (len && ((z_size_t)buf & 7) != 0) {595len--;596val = *buf++;597__asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));598}599600/* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */601word = (z_word_t const *)buf;602num = len >> 3;603len &= 7;604605/* Do three interleaved CRCs to realize the throughput of one crc32x606instruction per cycle. Each CRC is calculated on Z_BATCH words. The607three CRCs are combined into a single CRC after each set of batches. */608while (num >= 3 * Z_BATCH) {609crc1 = 0;610crc2 = 0;611for (i = 0; i < Z_BATCH; i++) {612val0 = word[i];613val1 = word[i + Z_BATCH];614val2 = word[i + 2 * Z_BATCH];615__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));616__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));617__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));618}619word += 3 * Z_BATCH;620num -= 3 * Z_BATCH;621crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;622crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;623}624625/* Do one last smaller batch with the remaining words, if there are enough626to pay for the combination of CRCs. */627last = num / 3;628if (last >= Z_BATCH_MIN) {629last2 = last << 1;630crc1 = 0;631crc2 = 0;632for (i = 0; i < last; i++) {633val0 = word[i];634val1 = word[i + last];635val2 = word[i + last2];636__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));637__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));638__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));639}640word += 3 * last;641num -= 3 * last;642val = x2nmodp(last, 6);643crc = multmodp(val, crc) ^ crc1;644crc = multmodp(val, crc) ^ crc2;645}646647/* Compute the CRC on any remaining words. */648for (i = 0; i < num; i++) {649val0 = word[i];650__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));651}652word += num;653654/* Complete the CRC on any remaining bytes. */655buf = (const unsigned char FAR *)word;656while (len) {657len--;658val = *buf++;659__asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));660}661662/* Return the CRC, post-conditioned. */663return crc ^ 0xffffffff;664}665666#else667668#ifdef W669670/*671Return the CRC of the W bytes in the word_t data, taking the672least-significant byte of the word as the first byte of data, without any pre673or post conditioning. This is used to combine the CRCs of each braid.674*/675local z_crc_t crc_word(z_word_t data) {676int k;677for (k = 0; k < W; k++)678data = (data >> 8) ^ crc_table[data & 0xff];679return (z_crc_t)data;680}681682local z_word_t crc_word_big(z_word_t data) {683int k;684for (k = 0; k < W; k++)685data = (data << 8) ^686crc_big_table[(data >> ((W - 1) << 3)) & 0xff];687return data;688}689690#endif691692/* ========================================================================= */693unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,694z_size_t len) {695/* Return initial CRC, if requested. */696if (buf == Z_NULL) return 0;697698#ifdef DYNAMIC_CRC_TABLE699once(&made, make_crc_table);700#endif /* DYNAMIC_CRC_TABLE */701702/* Pre-condition the CRC */703crc = (~crc) & 0xffffffff;704705#ifdef W706707/* If provided enough bytes, do a braided CRC calculation. */708if (len >= N * W + W - 1) {709z_size_t blks;710z_word_t const *words;711unsigned endian;712int k;713714/* Compute the CRC up to a z_word_t boundary. */715while (len && ((z_size_t)buf & (W - 1)) != 0) {716len--;717crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];718}719720/* Compute the CRC on as many N z_word_t blocks as are available. */721blks = len / (N * W);722len -= blks * N * W;723words = (z_word_t const *)buf;724725/* Do endian check at execution time instead of compile time, since ARM726processors can change the endianness at execution time. If the727compiler knows what the endianness will be, it can optimize out the728check and the unused branch. */729endian = 1;730if (*(unsigned char *)&endian) {731/* Little endian. */732733z_crc_t crc0;734z_word_t word0;735#if N > 1736z_crc_t crc1;737z_word_t word1;738#if N > 2739z_crc_t crc2;740z_word_t word2;741#if N > 3742z_crc_t crc3;743z_word_t word3;744#if N > 4745z_crc_t crc4;746z_word_t word4;747#if N > 5748z_crc_t crc5;749z_word_t word5;750#endif751#endif752#endif753#endif754#endif755756/* Initialize the CRC for each braid. */757crc0 = crc;758#if N > 1759crc1 = 0;760#if N > 2761crc2 = 0;762#if N > 3763crc3 = 0;764#if N > 4765crc4 = 0;766#if N > 5767crc5 = 0;768#endif769#endif770#endif771#endif772#endif773774/*775Process the first blks-1 blocks, computing the CRCs on each braid776independently.777*/778while (--blks) {779/* Load the word for each braid into registers. */780word0 = crc0 ^ words[0];781#if N > 1782word1 = crc1 ^ words[1];783#if N > 2784word2 = crc2 ^ words[2];785#if N > 3786word3 = crc3 ^ words[3];787#if N > 4788word4 = crc4 ^ words[4];789#if N > 5790word5 = crc5 ^ words[5];791#endif792#endif793#endif794#endif795#endif796words += N;797798/* Compute and update the CRC for each word. The loop should799get unrolled. */800crc0 = crc_braid_table[0][word0 & 0xff];801#if N > 1802crc1 = crc_braid_table[0][word1 & 0xff];803#if N > 2804crc2 = crc_braid_table[0][word2 & 0xff];805#if N > 3806crc3 = crc_braid_table[0][word3 & 0xff];807#if N > 4808crc4 = crc_braid_table[0][word4 & 0xff];809#if N > 5810crc5 = crc_braid_table[0][word5 & 0xff];811#endif812#endif813#endif814#endif815#endif816for (k = 1; k < W; k++) {817crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];818#if N > 1819crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];820#if N > 2821crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];822#if N > 3823crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];824#if N > 4825crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];826#if N > 5827crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];828#endif829#endif830#endif831#endif832#endif833}834}835836/*837Process the last block, combining the CRCs of the N braids at the838same time.839*/840crc = crc_word(crc0 ^ words[0]);841#if N > 1842crc = crc_word(crc1 ^ words[1] ^ crc);843#if N > 2844crc = crc_word(crc2 ^ words[2] ^ crc);845#if N > 3846crc = crc_word(crc3 ^ words[3] ^ crc);847#if N > 4848crc = crc_word(crc4 ^ words[4] ^ crc);849#if N > 5850crc = crc_word(crc5 ^ words[5] ^ crc);851#endif852#endif853#endif854#endif855#endif856words += N;857}858else {859/* Big endian. */860861z_word_t crc0, word0, comb;862#if N > 1863z_word_t crc1, word1;864#if N > 2865z_word_t crc2, word2;866#if N > 3867z_word_t crc3, word3;868#if N > 4869z_word_t crc4, word4;870#if N > 5871z_word_t crc5, word5;872#endif873#endif874#endif875#endif876#endif877878/* Initialize the CRC for each braid. */879crc0 = byte_swap(crc);880#if N > 1881crc1 = 0;882#if N > 2883crc2 = 0;884#if N > 3885crc3 = 0;886#if N > 4887crc4 = 0;888#if N > 5889crc5 = 0;890#endif891#endif892#endif893#endif894#endif895896/*897Process the first blks-1 blocks, computing the CRCs on each braid898independently.899*/900while (--blks) {901/* Load the word for each braid into registers. */902word0 = crc0 ^ words[0];903#if N > 1904word1 = crc1 ^ words[1];905#if N > 2906word2 = crc2 ^ words[2];907#if N > 3908word3 = crc3 ^ words[3];909#if N > 4910word4 = crc4 ^ words[4];911#if N > 5912word5 = crc5 ^ words[5];913#endif914#endif915#endif916#endif917#endif918words += N;919920/* Compute and update the CRC for each word. The loop should921get unrolled. */922crc0 = crc_braid_big_table[0][word0 & 0xff];923#if N > 1924crc1 = crc_braid_big_table[0][word1 & 0xff];925#if N > 2926crc2 = crc_braid_big_table[0][word2 & 0xff];927#if N > 3928crc3 = crc_braid_big_table[0][word3 & 0xff];929#if N > 4930crc4 = crc_braid_big_table[0][word4 & 0xff];931#if N > 5932crc5 = crc_braid_big_table[0][word5 & 0xff];933#endif934#endif935#endif936#endif937#endif938for (k = 1; k < W; k++) {939crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];940#if N > 1941crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];942#if N > 2943crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];944#if N > 3945crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];946#if N > 4947crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];948#if N > 5949crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];950#endif951#endif952#endif953#endif954#endif955}956}957958/*959Process the last block, combining the CRCs of the N braids at the960same time.961*/962comb = crc_word_big(crc0 ^ words[0]);963#if N > 1964comb = crc_word_big(crc1 ^ words[1] ^ comb);965#if N > 2966comb = crc_word_big(crc2 ^ words[2] ^ comb);967#if N > 3968comb = crc_word_big(crc3 ^ words[3] ^ comb);969#if N > 4970comb = crc_word_big(crc4 ^ words[4] ^ comb);971#if N > 5972comb = crc_word_big(crc5 ^ words[5] ^ comb);973#endif974#endif975#endif976#endif977#endif978words += N;979crc = byte_swap(comb);980}981982/*983Update the pointer to the remaining bytes to process.984*/985buf = (unsigned char const *)words;986}987988#endif /* W */989990/* Complete the computation of the CRC on any remaining bytes. */991while (len >= 8) {992len -= 8;993crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];994crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];995crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];996crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];997crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];998crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];999crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];1000crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];1001}1002while (len) {1003len--;1004crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];1005}10061007/* Return the CRC, post-conditioned. */1008return crc ^ 0xffffffff;1009}10101011#endif10121013/* ========================================================================= */1014unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,1015uInt len) {1016return crc32_z(crc, buf, len);1017}10181019/* ========================================================================= */1020uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {1021#ifdef DYNAMIC_CRC_TABLE1022once(&made, make_crc_table);1023#endif /* DYNAMIC_CRC_TABLE */1024return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);1025}10261027/* ========================================================================= */1028uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {1029return crc32_combine64(crc1, crc2, (z_off64_t)len2);1030}10311032/* ========================================================================= */1033uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {1034#ifdef DYNAMIC_CRC_TABLE1035once(&made, make_crc_table);1036#endif /* DYNAMIC_CRC_TABLE */1037return x2nmodp(len2, 3);1038}10391040/* ========================================================================= */1041uLong ZEXPORT crc32_combine_gen(z_off_t len2) {1042return crc32_combine_gen64((z_off64_t)len2);1043}10441045/* ========================================================================= */1046uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {1047return multmodp(op, crc1) ^ (crc2 & 0xffffffff);1048}104910501051