Path: blob/main/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aesopt.h
48775 views
// SPDX-License-Identifier: Brian-Gladman-3-Clause1/*2* ---------------------------------------------------------------------------3* Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.4*5* LICENSE TERMS6*7* The free distribution and use of this software is allowed (with or without8* changes) provided that:9*10* 1. source code distributions include the above copyright notice, this11* list of conditions and the following disclaimer;12*13* 2. binary distributions include the above copyright notice, this list14* of conditions and the following disclaimer in their documentation;15*16* 3. the name of the copyright holder is not used to endorse products17* built using this software without specific written permission.18*19* DISCLAIMER20*21* This software is provided 'as is' with no explicit or implied warranties22* in respect of its properties, including, but not limited to, correctness23* and/or fitness for purpose.24* ---------------------------------------------------------------------------25* Issue Date: 20/12/200726*27* This file contains the compilation options for AES (Rijndael) and code28* that is common across encryption, key scheduling and table generation.29*30* OPERATION31*32* These source code files implement the AES algorithm Rijndael designed by33* Joan Daemen and Vincent Rijmen. This version is designed for the standard34* block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 2435* and 32 bytes).36*37* This version is designed for flexibility and speed using operations on38* 32-bit words rather than operations on bytes. It can be compiled with39* either big or little endian internal byte order but is faster when the40* native byte order for the processor is used.41*42* THE CIPHER INTERFACE43*44* The cipher interface is implemented as an array of bytes in which lower45* AES bit sequence indexes map to higher numeric significance within bytes.46*/4748/*49* OpenSolaris changes50* 1. Added __cplusplus and _AESTAB_H header guards51* 2. Added header files sys/types.h and aes_impl.h52* 3. Added defines for AES_ENCRYPT, AES_DECRYPT, AES_REV_DKS, and ASM_AMD64_C53* 4. Moved defines for IS_BIG_ENDIAN, IS_LITTLE_ENDIAN, PLATFORM_BYTE_ORDER54* from brg_endian.h55* 5. Undefined VIA_ACE_POSSIBLE and ASSUME_VIA_ACE_PRESENT56* 6. Changed uint_8t and uint_32t to uint8_t and uint32_t57* 7. Defined aes_sw32 as htonl() for byte swapping58* 8. Cstyled and hdrchk code59*60*/6162#ifndef _AESOPT_H63#define _AESOPT_H6465#ifdef __cplusplus66extern "C" {67#endif6869#include <sys/zfs_context.h>70#include <aes/aes_impl.h>7172/* SUPPORT FEATURES */73#define AES_ENCRYPT /* if support for encryption is needed */74#define AES_DECRYPT /* if support for decryption is needed */7576/* PLATFORM-SPECIFIC FEATURES */77#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */78#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */79#define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN80#define AES_REV_DKS /* define to reverse decryption key schedule */818283/*84* CONFIGURATION - THE USE OF DEFINES85* Later in this section there are a number of defines that control the86* operation of the code. In each section, the purpose of each define is87* explained so that the relevant form can be included or excluded by88* setting either 1's or 0's respectively on the branches of the related89* #if clauses. The following local defines should not be changed.90*/9192#define ENCRYPTION_IN_C 193#define DECRYPTION_IN_C 294#define ENC_KEYING_IN_C 495#define DEC_KEYING_IN_C 89697#define NO_TABLES 098#define ONE_TABLE 199#define FOUR_TABLES 4100#define NONE 0101#define PARTIAL 1102#define FULL 2103104/* --- START OF USER CONFIGURED OPTIONS --- */105106/*107* 1. BYTE ORDER WITHIN 32 BIT WORDS108*109* The fundamental data processing units in Rijndael are 8-bit bytes. The110* input, output and key input are all enumerated arrays of bytes in which111* bytes are numbered starting at zero and increasing to one less than the112* number of bytes in the array in question. This enumeration is only used113* for naming bytes and does not imply any adjacency or order relationship114* from one byte to another. When these inputs and outputs are considered115* as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to116* byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.117* In this implementation bits are numbered from 0 to 7 starting at the118* numerically least significant end of each byte. Bit n represents 2^n.119*120* However, Rijndael can be implemented more efficiently using 32-bit121* words by packing bytes into words so that bytes 4*n to 4*n+3 are placed122* into word[n]. While in principle these bytes can be assembled into words123* in any positions, this implementation only supports the two formats in124* which bytes in adjacent positions within words also have adjacent byte125* numbers. This order is called big-endian if the lowest numbered bytes126* in words have the highest numeric significance and little-endian if the127* opposite applies.128*129* This code can work in either order irrespective of the order used by the130* machine on which it runs. Normally the internal byte order will be set131* to the order of the processor on which the code is to be run but this132* define can be used to reverse this in special situations133*134* WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.135* This define will hence be redefined later (in section 4) if necessary136*/137138#if 1139#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER140#elif 0141#define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN142#elif 0143#define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN144#else145#error The algorithm byte order is not defined146#endif147148/* 2. VIA ACE SUPPORT */149150#if defined(__GNUC__) && defined(__i386__) || \151defined(_WIN32) && defined(_M_IX86) && \152!(defined(_WIN64) || defined(_WIN32_WCE) || \153defined(_MSC_VER) && (_MSC_VER <= 800))154#define VIA_ACE_POSSIBLE155#endif156157/*158* Define this option if support for the VIA ACE is required. This uses159* inline assembler instructions and is only implemented for the Microsoft,160* Intel and GCC compilers. If VIA ACE is known to be present, then defining161* ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption162* code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if163* it is detected (both present and enabled) but the normal AES code will164* also be present.165*166* When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte167* aligned; other input/output buffers do not need to be 16 byte aligned168* but there are very large performance gains if this can be arranged.169* VIA ACE also requires the decryption key schedule to be in reverse170* order (which later checks below ensure).171*/172173/* VIA ACE is not used here for OpenSolaris: */174#undef VIA_ACE_POSSIBLE175#undef ASSUME_VIA_ACE_PRESENT176177#if 0 && defined(VIA_ACE_POSSIBLE) && !defined(USE_VIA_ACE_IF_PRESENT)178#define USE_VIA_ACE_IF_PRESENT179#endif180181#if 0 && defined(VIA_ACE_POSSIBLE) && !defined(ASSUME_VIA_ACE_PRESENT)182#define ASSUME_VIA_ACE_PRESENT183#endif184185186/*187* 3. ASSEMBLER SUPPORT188*189* This define (which can be on the command line) enables the use of the190* assembler code routines for encryption, decryption and key scheduling191* as follows:192*193* ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for194* encryption and decryption and but with key scheduling in C195* ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for196* encryption, decryption and key scheduling197* ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for198* encryption and decryption and but with key scheduling in C199* ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for200* encryption and decryption and but with key scheduling in C201*202* Change one 'if 0' below to 'if 1' to select the version or define203* as a compilation option.204*/205206#if 0 && !defined(ASM_X86_V1C)207#define ASM_X86_V1C208#elif 0 && !defined(ASM_X86_V2)209#define ASM_X86_V2210#elif 0 && !defined(ASM_X86_V2C)211#define ASM_X86_V2C212#elif 1 && !defined(ASM_AMD64_C)213#define ASM_AMD64_C214#endif215216#if (defined(ASM_X86_V1C) || defined(ASM_X86_V2) || defined(ASM_X86_V2C)) && \217!defined(_M_IX86) || defined(ASM_AMD64_C) && !defined(_M_X64) && \218!defined(__amd64)219#error Assembler code is only available for x86 and AMD64 systems220#endif221222/*223* 4. FAST INPUT/OUTPUT OPERATIONS.224*225* On some machines it is possible to improve speed by transferring the226* bytes in the input and output arrays to and from the internal 32-bit227* variables by addressing these arrays as if they are arrays of 32-bit228* words. On some machines this will always be possible but there may229* be a large performance penalty if the byte arrays are not aligned on230* the normal word boundaries. On other machines this technique will231* lead to memory access errors when such 32-bit word accesses are not232* properly aligned. The option SAFE_IO avoids such problems but will233* often be slower on those machines that support misaligned access234* (especially so if care is taken to align the input and output byte235* arrays on 32-bit word boundaries). If SAFE_IO is not defined it is236* assumed that access to byte arrays as if they are arrays of 32-bit237* words will not cause problems when such accesses are misaligned.238*/239#if 1 && !defined(_MSC_VER)240#define SAFE_IO241#endif242243/*244* 5. LOOP UNROLLING245*246* The code for encryption and decryption cycles through a number of rounds247* that can be implemented either in a loop or by expanding the code into a248* long sequence of instructions, the latter producing a larger program but249* one that will often be much faster. The latter is called loop unrolling.250* There are also potential speed advantages in expanding two iterations in251* a loop with half the number of iterations, which is called partial loop252* unrolling. The following options allow partial or full loop unrolling253* to be set independently for encryption and decryption254*/255#if 1256#define ENC_UNROLL FULL257#elif 0258#define ENC_UNROLL PARTIAL259#else260#define ENC_UNROLL NONE261#endif262263#if 1264#define DEC_UNROLL FULL265#elif 0266#define DEC_UNROLL PARTIAL267#else268#define DEC_UNROLL NONE269#endif270271#if 1272#define ENC_KS_UNROLL273#endif274275#if 1276#define DEC_KS_UNROLL277#endif278279/*280* 6. FAST FINITE FIELD OPERATIONS281*282* If this section is included, tables are used to provide faster finite283* field arithmetic. This has no effect if FIXED_TABLES is defined.284*/285#if 1286#define FF_TABLES287#endif288289/*290* 7. INTERNAL STATE VARIABLE FORMAT291*292* The internal state of Rijndael is stored in a number of local 32-bit293* word variables which can be defined either as an array or as individual294* names variables. Include this section if you want to store these local295* variables in arrays. Otherwise individual local variables will be used.296*/297#if 1298#define ARRAYS299#endif300301/*302* 8. FIXED OR DYNAMIC TABLES303*304* When this section is included the tables used by the code are compiled305* statically into the binary file. Otherwise the subroutine aes_init()306* must be called to compute them before the code is first used.307*/308#if 1 && !(defined(_MSC_VER) && (_MSC_VER <= 800))309#define FIXED_TABLES310#endif311312/*313* 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES314*315* In some systems it is better to mask longer values to extract bytes316* rather than using a cast. This option allows this choice.317*/318#if 0319#define to_byte(x) ((uint8_t)(x))320#else321#define to_byte(x) ((x) & 0xff)322#endif323324/*325* 10. TABLE ALIGNMENT326*327* On some systems speed will be improved by aligning the AES large lookup328* tables on particular boundaries. This define should be set to a power of329* two giving the desired alignment. It can be left undefined if alignment330* is not needed. This option is specific to the Microsoft VC++ compiler -331* it seems to sometimes cause trouble for the VC++ version 6 compiler.332*/333334#if 1 && defined(_MSC_VER) && (_MSC_VER >= 1300)335#define TABLE_ALIGN 32336#endif337338/*339* 11. REDUCE CODE AND TABLE SIZE340*341* This replaces some expanded macros with function calls if AES_ASM_V2 or342* AES_ASM_V2C are defined343*/344345#if 1 && (defined(ASM_X86_V2) || defined(ASM_X86_V2C))346#define REDUCE_CODE_SIZE347#endif348349/*350* 12. TABLE OPTIONS351*352* This cipher proceeds by repeating in a number of cycles known as rounds353* which are implemented by a round function which is optionally be speeded354* up using tables. The basic tables are 256 32-bit words, with either355* one or four tables being required for each round function depending on356* how much speed is required. Encryption and decryption round functions357* are different and the last encryption and decryption round functions are358* different again making four different round functions in all.359*360* This means that:361* 1. Normal encryption and decryption rounds can each use either 0, 1362* or 4 tables and table spaces of 0, 1024 or 4096 bytes each.363* 2. The last encryption and decryption rounds can also use either 0, 1364* or 4 tables and table spaces of 0, 1024 or 4096 bytes each.365*366* Include or exclude the appropriate definitions below to set the number367* of tables used by this implementation.368*/369370#if 1 /* set tables for the normal encryption round */371#define ENC_ROUND FOUR_TABLES372#elif 0373#define ENC_ROUND ONE_TABLE374#else375#define ENC_ROUND NO_TABLES376#endif377378#if 1 /* set tables for the last encryption round */379#define LAST_ENC_ROUND FOUR_TABLES380#elif 0381#define LAST_ENC_ROUND ONE_TABLE382#else383#define LAST_ENC_ROUND NO_TABLES384#endif385386#if 1 /* set tables for the normal decryption round */387#define DEC_ROUND FOUR_TABLES388#elif 0389#define DEC_ROUND ONE_TABLE390#else391#define DEC_ROUND NO_TABLES392#endif393394#if 1 /* set tables for the last decryption round */395#define LAST_DEC_ROUND FOUR_TABLES396#elif 0397#define LAST_DEC_ROUND ONE_TABLE398#else399#define LAST_DEC_ROUND NO_TABLES400#endif401402/*403* The decryption key schedule can be speeded up with tables in the same404* way that the round functions can. Include or exclude the following405* defines to set this requirement.406*/407#if 1408#define KEY_SCHED FOUR_TABLES409#elif 0410#define KEY_SCHED ONE_TABLE411#else412#define KEY_SCHED NO_TABLES413#endif414415/* ---- END OF USER CONFIGURED OPTIONS ---- */416417/* VIA ACE support is only available for VC++ and GCC */418419#if !defined(_MSC_VER) && !defined(__GNUC__)420#if defined(ASSUME_VIA_ACE_PRESENT)421#undef ASSUME_VIA_ACE_PRESENT422#endif423#if defined(USE_VIA_ACE_IF_PRESENT)424#undef USE_VIA_ACE_IF_PRESENT425#endif426#endif427428#if defined(ASSUME_VIA_ACE_PRESENT) && !defined(USE_VIA_ACE_IF_PRESENT)429#define USE_VIA_ACE_IF_PRESENT430#endif431432#if defined(USE_VIA_ACE_IF_PRESENT) && !defined(AES_REV_DKS)433#define AES_REV_DKS434#endif435436/* Assembler support requires the use of platform byte order */437438#if (defined(ASM_X86_V1C) || defined(ASM_X86_V2C) || defined(ASM_AMD64_C)) && \439(ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)440#undef ALGORITHM_BYTE_ORDER441#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER442#endif443444/*445* In this implementation the columns of the state array are each held in446* 32-bit words. The state array can be held in various ways: in an array447* of words, in a number of individual word variables or in a number of448* processor registers. The following define maps a variable name x and449* a column number c to the way the state array variable is to be held.450* The first define below maps the state into an array x[c] whereas the451* second form maps the state into a number of individual variables x0,452* x1, etc. Another form could map individual state columns to machine453* register names.454*/455456#if defined(ARRAYS)457#define s(x, c) x[c]458#else459#define s(x, c) x##c460#endif461462/*463* This implementation provides subroutines for encryption, decryption464* and for setting the three key lengths (separately) for encryption465* and decryption. Since not all functions are needed, masks are set466* up here to determine which will be implemented in C467*/468469#if !defined(AES_ENCRYPT)470#define EFUNCS_IN_C 0471#elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \472defined(ASM_X86_V2C) || defined(ASM_AMD64_C)473#define EFUNCS_IN_C ENC_KEYING_IN_C474#elif !defined(ASM_X86_V2)475#define EFUNCS_IN_C (ENCRYPTION_IN_C | ENC_KEYING_IN_C)476#else477#define EFUNCS_IN_C 0478#endif479480#if !defined(AES_DECRYPT)481#define DFUNCS_IN_C 0482#elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \483defined(ASM_X86_V2C) || defined(ASM_AMD64_C)484#define DFUNCS_IN_C DEC_KEYING_IN_C485#elif !defined(ASM_X86_V2)486#define DFUNCS_IN_C (DECRYPTION_IN_C | DEC_KEYING_IN_C)487#else488#define DFUNCS_IN_C 0489#endif490491#define FUNCS_IN_C (EFUNCS_IN_C | DFUNCS_IN_C)492493/* END OF CONFIGURATION OPTIONS */494495/* Disable or report errors on some combinations of options */496497#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES498#undef LAST_ENC_ROUND499#define LAST_ENC_ROUND NO_TABLES500#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES501#undef LAST_ENC_ROUND502#define LAST_ENC_ROUND ONE_TABLE503#endif504505#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE506#undef ENC_UNROLL507#define ENC_UNROLL NONE508#endif509510#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES511#undef LAST_DEC_ROUND512#define LAST_DEC_ROUND NO_TABLES513#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES514#undef LAST_DEC_ROUND515#define LAST_DEC_ROUND ONE_TABLE516#endif517518#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE519#undef DEC_UNROLL520#define DEC_UNROLL NONE521#endif522523#if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)524#define aes_sw32 htonl525#elif defined(bswap32)526#define aes_sw32 bswap32527#elif defined(bswap_32)528#define aes_sw32 bswap_32529#else530#define brot(x, n) (((uint32_t)(x) << (n)) | ((uint32_t)(x) >> (32 - (n))))531#define aes_sw32(x) ((brot((x), 8) & 0x00ff00ff) | (brot((x), 24) & 0xff00ff00))532#endif533534535/*536* upr(x, n): rotates bytes within words by n positions, moving bytes to537* higher index positions with wrap around into low positions538* ups(x, n): moves bytes by n positions to higher index positions in539* words but without wrap around540* bval(x, n): extracts a byte from a word541*542* WARNING: The definitions given here are intended only for use with543* unsigned variables and with shift counts that are compile544* time constants545*/546547#if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)548#define upr(x, n) (((uint32_t)(x) << (8 * (n))) | \549((uint32_t)(x) >> (32 - 8 * (n))))550#define ups(x, n) ((uint32_t)(x) << (8 * (n)))551#define bval(x, n) to_byte((x) >> (8 * (n)))552#define bytes2word(b0, b1, b2, b3) \553(((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | \554((uint32_t)(b1) << 8) | (b0))555#endif556557#if (ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN)558#define upr(x, n) (((uint32_t)(x) >> (8 * (n))) | \559((uint32_t)(x) << (32 - 8 * (n))))560#define ups(x, n) ((uint32_t)(x) >> (8 * (n)))561#define bval(x, n) to_byte((x) >> (24 - 8 * (n)))562#define bytes2word(b0, b1, b2, b3) \563(((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | \564((uint32_t)(b2) << 8) | (b3))565#endif566567#if defined(SAFE_IO)568#define word_in(x, c) bytes2word(((const uint8_t *)(x) + 4 * c)[0], \569((const uint8_t *)(x) + 4 * c)[1], \570((const uint8_t *)(x) + 4 * c)[2], \571((const uint8_t *)(x) + 4 * c)[3])572#define word_out(x, c, v) { ((uint8_t *)(x) + 4 * c)[0] = bval(v, 0); \573((uint8_t *)(x) + 4 * c)[1] = bval(v, 1); \574((uint8_t *)(x) + 4 * c)[2] = bval(v, 2); \575((uint8_t *)(x) + 4 * c)[3] = bval(v, 3); }576#elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)577#define word_in(x, c) (*((uint32_t *)(x) + (c)))578#define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = (v))579#else580#define word_in(x, c) aes_sw32(*((uint32_t *)(x) + (c)))581#define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = aes_sw32(v))582#endif583584/* the finite field modular polynomial and elements */585586#define WPOLY 0x011b587#define BPOLY 0x1b588589/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */590591#define m1 0x80808080592#define m2 0x7f7f7f7f593#define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))594595/*596* The following defines provide alternative definitions of gf_mulx that might597* give improved performance if a fast 32-bit multiply is not available. Note598* that a temporary variable u needs to be defined where gf_mulx is used.599*600* #define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ \601* ((u >> 3) | (u >> 6))602* #define m4 (0x01010101 * BPOLY)603* #define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) \604* & m4)605*/606607/* Work out which tables are needed for the different options */608609#if defined(ASM_X86_V1C)610#if defined(ENC_ROUND)611#undef ENC_ROUND612#endif613#define ENC_ROUND FOUR_TABLES614#if defined(LAST_ENC_ROUND)615#undef LAST_ENC_ROUND616#endif617#define LAST_ENC_ROUND FOUR_TABLES618#if defined(DEC_ROUND)619#undef DEC_ROUND620#endif621#define DEC_ROUND FOUR_TABLES622#if defined(LAST_DEC_ROUND)623#undef LAST_DEC_ROUND624#endif625#define LAST_DEC_ROUND FOUR_TABLES626#if defined(KEY_SCHED)627#undef KEY_SCHED628#define KEY_SCHED FOUR_TABLES629#endif630#endif631632#if (FUNCS_IN_C & ENCRYPTION_IN_C) || defined(ASM_X86_V1C)633#if ENC_ROUND == ONE_TABLE634#define FT1_SET635#elif ENC_ROUND == FOUR_TABLES636#define FT4_SET637#else638#define SBX_SET639#endif640#if LAST_ENC_ROUND == ONE_TABLE641#define FL1_SET642#elif LAST_ENC_ROUND == FOUR_TABLES643#define FL4_SET644#elif !defined(SBX_SET)645#define SBX_SET646#endif647#endif648649#if (FUNCS_IN_C & DECRYPTION_IN_C) || defined(ASM_X86_V1C)650#if DEC_ROUND == ONE_TABLE651#define IT1_SET652#elif DEC_ROUND == FOUR_TABLES653#define IT4_SET654#else655#define ISB_SET656#endif657#if LAST_DEC_ROUND == ONE_TABLE658#define IL1_SET659#elif LAST_DEC_ROUND == FOUR_TABLES660#define IL4_SET661#elif !defined(ISB_SET)662#define ISB_SET663#endif664#endif665666667#if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \668defined(ASM_X86_V2C)))669#if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))670#if KEY_SCHED == ONE_TABLE671#if !defined(FL1_SET) && !defined(FL4_SET)672#define LS1_SET673#endif674#elif KEY_SCHED == FOUR_TABLES675#if !defined(FL4_SET)676#define LS4_SET677#endif678#elif !defined(SBX_SET)679#define SBX_SET680#endif681#endif682#if (FUNCS_IN_C & DEC_KEYING_IN_C)683#if KEY_SCHED == ONE_TABLE684#define IM1_SET685#elif KEY_SCHED == FOUR_TABLES686#define IM4_SET687#elif !defined(SBX_SET)688#define SBX_SET689#endif690#endif691#endif692693/* generic definitions of Rijndael macros that use tables */694695#define no_table(x, box, vf, rf, c) bytes2word(\696box[bval(vf(x, 0, c), rf(0, c))], \697box[bval(vf(x, 1, c), rf(1, c))], \698box[bval(vf(x, 2, c), rf(2, c))], \699box[bval(vf(x, 3, c), rf(3, c))])700701#define one_table(x, op, tab, vf, rf, c) \702(tab[bval(vf(x, 0, c), rf(0, c))] \703^ op(tab[bval(vf(x, 1, c), rf(1, c))], 1) \704^ op(tab[bval(vf(x, 2, c), rf(2, c))], 2) \705^ op(tab[bval(vf(x, 3, c), rf(3, c))], 3))706707#define four_tables(x, tab, vf, rf, c) \708(tab[0][bval(vf(x, 0, c), rf(0, c))] \709^ tab[1][bval(vf(x, 1, c), rf(1, c))] \710^ tab[2][bval(vf(x, 2, c), rf(2, c))] \711^ tab[3][bval(vf(x, 3, c), rf(3, c))])712713#define vf1(x, r, c) (x)714#define rf1(r, c) (r)715#define rf2(r, c) ((8+r-c)&3)716717/*718* Perform forward and inverse column mix operation on four bytes in long word719* x in parallel. NOTE: x must be a simple variable, NOT an expression in720* these macros.721*/722723#if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \724defined(ASM_X86_V2C)))725726#if defined(FM4_SET) /* not currently used */727#define fwd_mcol(x) four_tables(x, t_use(f, m), vf1, rf1, 0)728#elif defined(FM1_SET) /* not currently used */729#define fwd_mcol(x) one_table(x, upr, t_use(f, m), vf1, rf1, 0)730#else731#define dec_fmvars uint32_t g2732#define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ \733upr((x), 2) ^ upr((x), 1))734#endif735736#if defined(IM4_SET)737#define inv_mcol(x) four_tables(x, t_use(i, m), vf1, rf1, 0)738#elif defined(IM1_SET)739#define inv_mcol(x) one_table(x, upr, t_use(i, m), vf1, rf1, 0)740#else741#define dec_imvars uint32_t g2, g4, g9742#define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = \743(x) ^ gf_mulx(g4), g4 ^= g9, \744(x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ \745upr(g4, 2) ^ upr(g9, 1))746#endif747748#if defined(FL4_SET)749#define ls_box(x, c) four_tables(x, t_use(f, l), vf1, rf2, c)750#elif defined(LS4_SET)751#define ls_box(x, c) four_tables(x, t_use(l, s), vf1, rf2, c)752#elif defined(FL1_SET)753#define ls_box(x, c) one_table(x, upr, t_use(f, l), vf1, rf2, c)754#elif defined(LS1_SET)755#define ls_box(x, c) one_table(x, upr, t_use(l, s), vf1, rf2, c)756#else757#define ls_box(x, c) no_table(x, t_use(s, box), vf1, rf2, c)758#endif759760#endif761762#if defined(ASM_X86_V1C) && defined(AES_DECRYPT) && !defined(ISB_SET)763#define ISB_SET764#endif765766#ifdef __cplusplus767}768#endif769770#endif /* _AESOPT_H */771772773