Path: blob/master/sha3/sph_keccak.c
1299 views
/* $Id: keccak.c 259 2011-07-19 22:11:27Z tp $ */1/*2* Keccak implementation.3*4* ==========================(LICENSE BEGIN)============================5*6* Copyright (c) 2007-2010 Projet RNRT SAPHIR7*8* Permission is hereby granted, free of charge, to any person obtaining9* a copy of this software and associated documentation files (the10* "Software"), to deal in the Software without restriction, including11* without limitation the rights to use, copy, modify, merge, publish,12* distribute, sublicense, and/or sell copies of the Software, and to13* permit persons to whom the Software is furnished to do so, subject to14* the following conditions:15*16* The above copyright notice and this permission notice shall be17* included in all copies or substantial portions of the Software.18*19* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,20* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF21* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.22* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY23* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,24* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE25* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.26*27* ===========================(LICENSE END)=============================28*29* @author Thomas Pornin <[email protected]>30*/3132#include <stddef.h>33#include <string.h>3435#include "sph_keccak.h"3637#ifdef __cplusplus38extern "C"{39#endif4041/*42* Parameters:43*44* SPH_KECCAK_64 use a 64-bit type45* SPH_KECCAK_UNROLL number of loops to unroll (0/undef for full unroll)46* SPH_KECCAK_INTERLEAVE use bit-interleaving (32-bit type only)47* SPH_KECCAK_NOCOPY do not copy the state into local variables48*49* If there is no usable 64-bit type, the code automatically switches50* back to the 32-bit implementation.51*52* Some tests on an Intel Core2 Q6600 (both 64-bit and 32-bit, 32 kB L153* code cache), a PowerPC (G3, 32 kB L1 code cache), an ARM920T core54* (16 kB L1 code cache), and a small MIPS-compatible CPU (Broadcom BCM3302,55* 8 kB L1 code cache), seem to show that the following are optimal:56*57* -- x86, 64-bit: use the 64-bit implementation, unroll 8 rounds,58* do not copy the state; unrolling 2, 6 or all rounds also provides59* near-optimal performance.60* -- x86, 32-bit: use the 32-bit implementation, unroll 6 rounds,61* interleave, do not copy the state. Unrolling 1, 2, 4 or 8 rounds62* also provides near-optimal performance.63* -- PowerPC: use the 64-bit implementation, unroll 8 rounds,64* copy the state. Unrolling 4 or 6 rounds is near-optimal.65* -- ARM: use the 64-bit implementation, unroll 2 or 4 rounds,66* copy the state.67* -- MIPS: use the 64-bit implementation, unroll 2 rounds, copy68* the state. Unrolling only 1 round is also near-optimal.69*70* Also, interleaving does not always yield actual improvements when71* using a 32-bit implementation; in particular when the architecture72* does not offer a native rotation opcode (interleaving replaces one73* 64-bit rotation with two 32-bit rotations, which is a gain only if74* there is a native 32-bit rotation opcode and not a native 64-bit75* rotation opcode; also, interleaving implies a small overhead when76* processing input words).77*78* To sum up:79* -- when possible, use the 64-bit code80* -- exception: on 32-bit x86, use 32-bit code81* -- when using 32-bit code, use interleaving82* -- copy the state, except on x8683* -- unroll 8 rounds on "big" machine, 2 rounds on "small" machines84*/8586#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_KECCAK87#define SPH_SMALL_FOOTPRINT_KECCAK 188#endif8990/*91* By default, we select the 64-bit implementation if a 64-bit type92* is available, unless a 32-bit x86 is detected.93*/94#if !defined SPH_KECCAK_64 && SPH_64 \95&& !(defined __i386__ || SPH_I386_GCC || SPH_I386_MSVC)96#define SPH_KECCAK_64 197#endif9899/*100* If using a 32-bit implementation, we prefer to interleave.101*/102#if !SPH_KECCAK_64 && !defined SPH_KECCAK_INTERLEAVE103#define SPH_KECCAK_INTERLEAVE 1104#endif105106/*107* Unroll 8 rounds on big systems, 2 rounds on small systems.108*/109#ifndef SPH_KECCAK_UNROLL110#if SPH_SMALL_FOOTPRINT_KECCAK111#define SPH_KECCAK_UNROLL 2112#else113#define SPH_KECCAK_UNROLL 8114#endif115#endif116117/*118* We do not want to copy the state to local variables on x86 (32-bit119* and 64-bit alike).120*/121#ifndef SPH_KECCAK_NOCOPY122#if defined __i386__ || defined __x86_64 || SPH_I386_MSVC || SPH_I386_GCC123#define SPH_KECCAK_NOCOPY 1124#else125#define SPH_KECCAK_NOCOPY 0126#endif127#endif128129#ifdef _MSC_VER130#pragma warning (disable: 4146)131#endif132133#if SPH_KECCAK_64134135static const sph_u64 RC[] = {136SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),137SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),138SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),139SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),140SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),141SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),142SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),143SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),144SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),145SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),146SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),147SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)148};149150#if SPH_KECCAK_NOCOPY151152#define a00 (kc->u.wide[ 0])153#define a10 (kc->u.wide[ 1])154#define a20 (kc->u.wide[ 2])155#define a30 (kc->u.wide[ 3])156#define a40 (kc->u.wide[ 4])157#define a01 (kc->u.wide[ 5])158#define a11 (kc->u.wide[ 6])159#define a21 (kc->u.wide[ 7])160#define a31 (kc->u.wide[ 8])161#define a41 (kc->u.wide[ 9])162#define a02 (kc->u.wide[10])163#define a12 (kc->u.wide[11])164#define a22 (kc->u.wide[12])165#define a32 (kc->u.wide[13])166#define a42 (kc->u.wide[14])167#define a03 (kc->u.wide[15])168#define a13 (kc->u.wide[16])169#define a23 (kc->u.wide[17])170#define a33 (kc->u.wide[18])171#define a43 (kc->u.wide[19])172#define a04 (kc->u.wide[20])173#define a14 (kc->u.wide[21])174#define a24 (kc->u.wide[22])175#define a34 (kc->u.wide[23])176#define a44 (kc->u.wide[24])177178#define DECL_STATE179#define READ_STATE(sc)180#define WRITE_STATE(sc)181182#define INPUT_BUF(size) do { \183size_t j; \184for (j = 0; j < (size); j += 8) { \185kc->u.wide[j >> 3] ^= sph_dec64le_aligned(buf + j); \186} \187} while (0)188189#define INPUT_BUF144 INPUT_BUF(144)190#define INPUT_BUF136 INPUT_BUF(136)191#define INPUT_BUF104 INPUT_BUF(104)192#define INPUT_BUF72 INPUT_BUF(72)193194#else195196#define DECL_STATE \197sph_u64 a00, a01, a02, a03, a04; \198sph_u64 a10, a11, a12, a13, a14; \199sph_u64 a20, a21, a22, a23, a24; \200sph_u64 a30, a31, a32, a33, a34; \201sph_u64 a40, a41, a42, a43, a44;202203#define READ_STATE(state) do { \204a00 = (state)->u.wide[ 0]; \205a10 = (state)->u.wide[ 1]; \206a20 = (state)->u.wide[ 2]; \207a30 = (state)->u.wide[ 3]; \208a40 = (state)->u.wide[ 4]; \209a01 = (state)->u.wide[ 5]; \210a11 = (state)->u.wide[ 6]; \211a21 = (state)->u.wide[ 7]; \212a31 = (state)->u.wide[ 8]; \213a41 = (state)->u.wide[ 9]; \214a02 = (state)->u.wide[10]; \215a12 = (state)->u.wide[11]; \216a22 = (state)->u.wide[12]; \217a32 = (state)->u.wide[13]; \218a42 = (state)->u.wide[14]; \219a03 = (state)->u.wide[15]; \220a13 = (state)->u.wide[16]; \221a23 = (state)->u.wide[17]; \222a33 = (state)->u.wide[18]; \223a43 = (state)->u.wide[19]; \224a04 = (state)->u.wide[20]; \225a14 = (state)->u.wide[21]; \226a24 = (state)->u.wide[22]; \227a34 = (state)->u.wide[23]; \228a44 = (state)->u.wide[24]; \229} while (0)230231#define WRITE_STATE(state) do { \232(state)->u.wide[ 0] = a00; \233(state)->u.wide[ 1] = a10; \234(state)->u.wide[ 2] = a20; \235(state)->u.wide[ 3] = a30; \236(state)->u.wide[ 4] = a40; \237(state)->u.wide[ 5] = a01; \238(state)->u.wide[ 6] = a11; \239(state)->u.wide[ 7] = a21; \240(state)->u.wide[ 8] = a31; \241(state)->u.wide[ 9] = a41; \242(state)->u.wide[10] = a02; \243(state)->u.wide[11] = a12; \244(state)->u.wide[12] = a22; \245(state)->u.wide[13] = a32; \246(state)->u.wide[14] = a42; \247(state)->u.wide[15] = a03; \248(state)->u.wide[16] = a13; \249(state)->u.wide[17] = a23; \250(state)->u.wide[18] = a33; \251(state)->u.wide[19] = a43; \252(state)->u.wide[20] = a04; \253(state)->u.wide[21] = a14; \254(state)->u.wide[22] = a24; \255(state)->u.wide[23] = a34; \256(state)->u.wide[24] = a44; \257} while (0)258259#define INPUT_BUF144 do { \260a00 ^= sph_dec64le_aligned(buf + 0); \261a10 ^= sph_dec64le_aligned(buf + 8); \262a20 ^= sph_dec64le_aligned(buf + 16); \263a30 ^= sph_dec64le_aligned(buf + 24); \264a40 ^= sph_dec64le_aligned(buf + 32); \265a01 ^= sph_dec64le_aligned(buf + 40); \266a11 ^= sph_dec64le_aligned(buf + 48); \267a21 ^= sph_dec64le_aligned(buf + 56); \268a31 ^= sph_dec64le_aligned(buf + 64); \269a41 ^= sph_dec64le_aligned(buf + 72); \270a02 ^= sph_dec64le_aligned(buf + 80); \271a12 ^= sph_dec64le_aligned(buf + 88); \272a22 ^= sph_dec64le_aligned(buf + 96); \273a32 ^= sph_dec64le_aligned(buf + 104); \274a42 ^= sph_dec64le_aligned(buf + 112); \275a03 ^= sph_dec64le_aligned(buf + 120); \276a13 ^= sph_dec64le_aligned(buf + 128); \277a23 ^= sph_dec64le_aligned(buf + 136); \278} while (0)279280#define INPUT_BUF136 do { \281a00 ^= sph_dec64le_aligned(buf + 0); \282a10 ^= sph_dec64le_aligned(buf + 8); \283a20 ^= sph_dec64le_aligned(buf + 16); \284a30 ^= sph_dec64le_aligned(buf + 24); \285a40 ^= sph_dec64le_aligned(buf + 32); \286a01 ^= sph_dec64le_aligned(buf + 40); \287a11 ^= sph_dec64le_aligned(buf + 48); \288a21 ^= sph_dec64le_aligned(buf + 56); \289a31 ^= sph_dec64le_aligned(buf + 64); \290a41 ^= sph_dec64le_aligned(buf + 72); \291a02 ^= sph_dec64le_aligned(buf + 80); \292a12 ^= sph_dec64le_aligned(buf + 88); \293a22 ^= sph_dec64le_aligned(buf + 96); \294a32 ^= sph_dec64le_aligned(buf + 104); \295a42 ^= sph_dec64le_aligned(buf + 112); \296a03 ^= sph_dec64le_aligned(buf + 120); \297a13 ^= sph_dec64le_aligned(buf + 128); \298} while (0)299300#define INPUT_BUF104 do { \301a00 ^= sph_dec64le_aligned(buf + 0); \302a10 ^= sph_dec64le_aligned(buf + 8); \303a20 ^= sph_dec64le_aligned(buf + 16); \304a30 ^= sph_dec64le_aligned(buf + 24); \305a40 ^= sph_dec64le_aligned(buf + 32); \306a01 ^= sph_dec64le_aligned(buf + 40); \307a11 ^= sph_dec64le_aligned(buf + 48); \308a21 ^= sph_dec64le_aligned(buf + 56); \309a31 ^= sph_dec64le_aligned(buf + 64); \310a41 ^= sph_dec64le_aligned(buf + 72); \311a02 ^= sph_dec64le_aligned(buf + 80); \312a12 ^= sph_dec64le_aligned(buf + 88); \313a22 ^= sph_dec64le_aligned(buf + 96); \314} while (0)315316#define INPUT_BUF72 do { \317a00 ^= sph_dec64le_aligned(buf + 0); \318a10 ^= sph_dec64le_aligned(buf + 8); \319a20 ^= sph_dec64le_aligned(buf + 16); \320a30 ^= sph_dec64le_aligned(buf + 24); \321a40 ^= sph_dec64le_aligned(buf + 32); \322a01 ^= sph_dec64le_aligned(buf + 40); \323a11 ^= sph_dec64le_aligned(buf + 48); \324a21 ^= sph_dec64le_aligned(buf + 56); \325a31 ^= sph_dec64le_aligned(buf + 64); \326} while (0)327328#define INPUT_BUF(lim) do { \329a00 ^= sph_dec64le_aligned(buf + 0); \330a10 ^= sph_dec64le_aligned(buf + 8); \331a20 ^= sph_dec64le_aligned(buf + 16); \332a30 ^= sph_dec64le_aligned(buf + 24); \333a40 ^= sph_dec64le_aligned(buf + 32); \334a01 ^= sph_dec64le_aligned(buf + 40); \335a11 ^= sph_dec64le_aligned(buf + 48); \336a21 ^= sph_dec64le_aligned(buf + 56); \337a31 ^= sph_dec64le_aligned(buf + 64); \338if ((lim) == 72) \339break; \340a41 ^= sph_dec64le_aligned(buf + 72); \341a02 ^= sph_dec64le_aligned(buf + 80); \342a12 ^= sph_dec64le_aligned(buf + 88); \343a22 ^= sph_dec64le_aligned(buf + 96); \344if ((lim) == 104) \345break; \346a32 ^= sph_dec64le_aligned(buf + 104); \347a42 ^= sph_dec64le_aligned(buf + 112); \348a03 ^= sph_dec64le_aligned(buf + 120); \349a13 ^= sph_dec64le_aligned(buf + 128); \350if ((lim) == 136) \351break; \352a23 ^= sph_dec64le_aligned(buf + 136); \353} while (0)354355#endif356357#define DECL64(x) sph_u64 x358#define MOV64(d, s) (d = s)359#define XOR64(d, a, b) (d = a ^ b)360#define AND64(d, a, b) (d = a & b)361#define OR64(d, a, b) (d = a | b)362#define NOT64(d, s) (d = SPH_T64(~s))363#define ROL64(d, v, n) (d = SPH_ROTL64(v, n))364#define XOR64_IOTA XOR64365366#else367368static const struct {369sph_u32 high, low;370} RC[] = {371#if SPH_KECCAK_INTERLEAVE372{ SPH_C32(0x00000000), SPH_C32(0x00000001) },373{ SPH_C32(0x00000089), SPH_C32(0x00000000) },374{ SPH_C32(0x8000008B), SPH_C32(0x00000000) },375{ SPH_C32(0x80008080), SPH_C32(0x00000000) },376{ SPH_C32(0x0000008B), SPH_C32(0x00000001) },377{ SPH_C32(0x00008000), SPH_C32(0x00000001) },378{ SPH_C32(0x80008088), SPH_C32(0x00000001) },379{ SPH_C32(0x80000082), SPH_C32(0x00000001) },380{ SPH_C32(0x0000000B), SPH_C32(0x00000000) },381{ SPH_C32(0x0000000A), SPH_C32(0x00000000) },382{ SPH_C32(0x00008082), SPH_C32(0x00000001) },383{ SPH_C32(0x00008003), SPH_C32(0x00000000) },384{ SPH_C32(0x0000808B), SPH_C32(0x00000001) },385{ SPH_C32(0x8000000B), SPH_C32(0x00000001) },386{ SPH_C32(0x8000008A), SPH_C32(0x00000001) },387{ SPH_C32(0x80000081), SPH_C32(0x00000001) },388{ SPH_C32(0x80000081), SPH_C32(0x00000000) },389{ SPH_C32(0x80000008), SPH_C32(0x00000000) },390{ SPH_C32(0x00000083), SPH_C32(0x00000000) },391{ SPH_C32(0x80008003), SPH_C32(0x00000000) },392{ SPH_C32(0x80008088), SPH_C32(0x00000001) },393{ SPH_C32(0x80000088), SPH_C32(0x00000000) },394{ SPH_C32(0x00008000), SPH_C32(0x00000001) },395{ SPH_C32(0x80008082), SPH_C32(0x00000000) }396#else397{ SPH_C32(0x00000000), SPH_C32(0x00000001) },398{ SPH_C32(0x00000000), SPH_C32(0x00008082) },399{ SPH_C32(0x80000000), SPH_C32(0x0000808A) },400{ SPH_C32(0x80000000), SPH_C32(0x80008000) },401{ SPH_C32(0x00000000), SPH_C32(0x0000808B) },402{ SPH_C32(0x00000000), SPH_C32(0x80000001) },403{ SPH_C32(0x80000000), SPH_C32(0x80008081) },404{ SPH_C32(0x80000000), SPH_C32(0x00008009) },405{ SPH_C32(0x00000000), SPH_C32(0x0000008A) },406{ SPH_C32(0x00000000), SPH_C32(0x00000088) },407{ SPH_C32(0x00000000), SPH_C32(0x80008009) },408{ SPH_C32(0x00000000), SPH_C32(0x8000000A) },409{ SPH_C32(0x00000000), SPH_C32(0x8000808B) },410{ SPH_C32(0x80000000), SPH_C32(0x0000008B) },411{ SPH_C32(0x80000000), SPH_C32(0x00008089) },412{ SPH_C32(0x80000000), SPH_C32(0x00008003) },413{ SPH_C32(0x80000000), SPH_C32(0x00008002) },414{ SPH_C32(0x80000000), SPH_C32(0x00000080) },415{ SPH_C32(0x00000000), SPH_C32(0x0000800A) },416{ SPH_C32(0x80000000), SPH_C32(0x8000000A) },417{ SPH_C32(0x80000000), SPH_C32(0x80008081) },418{ SPH_C32(0x80000000), SPH_C32(0x00008080) },419{ SPH_C32(0x00000000), SPH_C32(0x80000001) },420{ SPH_C32(0x80000000), SPH_C32(0x80008008) }421#endif422};423424#if SPH_KECCAK_INTERLEAVE425426#define INTERLEAVE(xl, xh) do { \427sph_u32 l, h, t; \428l = (xl); h = (xh); \429t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \430t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \431t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \432t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \433t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \434t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \435t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \436t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \437t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \438l ^= t; h ^= t >> 16; \439(xl) = l; (xh) = h; \440} while (0)441442#define UNINTERLEAVE(xl, xh) do { \443sph_u32 l, h, t; \444l = (xl); h = (xh); \445t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \446l ^= t; h ^= t >> 16; \447t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \448t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \449t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \450t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \451t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \452t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \453t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \454t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \455(xl) = l; (xh) = h; \456} while (0)457458#else459460#define INTERLEAVE(l, h)461#define UNINTERLEAVE(l, h)462463#endif464465#if SPH_KECCAK_NOCOPY466467#define a00l (kc->u.narrow[2 * 0 + 0])468#define a00h (kc->u.narrow[2 * 0 + 1])469#define a10l (kc->u.narrow[2 * 1 + 0])470#define a10h (kc->u.narrow[2 * 1 + 1])471#define a20l (kc->u.narrow[2 * 2 + 0])472#define a20h (kc->u.narrow[2 * 2 + 1])473#define a30l (kc->u.narrow[2 * 3 + 0])474#define a30h (kc->u.narrow[2 * 3 + 1])475#define a40l (kc->u.narrow[2 * 4 + 0])476#define a40h (kc->u.narrow[2 * 4 + 1])477#define a01l (kc->u.narrow[2 * 5 + 0])478#define a01h (kc->u.narrow[2 * 5 + 1])479#define a11l (kc->u.narrow[2 * 6 + 0])480#define a11h (kc->u.narrow[2 * 6 + 1])481#define a21l (kc->u.narrow[2 * 7 + 0])482#define a21h (kc->u.narrow[2 * 7 + 1])483#define a31l (kc->u.narrow[2 * 8 + 0])484#define a31h (kc->u.narrow[2 * 8 + 1])485#define a41l (kc->u.narrow[2 * 9 + 0])486#define a41h (kc->u.narrow[2 * 9 + 1])487#define a02l (kc->u.narrow[2 * 10 + 0])488#define a02h (kc->u.narrow[2 * 10 + 1])489#define a12l (kc->u.narrow[2 * 11 + 0])490#define a12h (kc->u.narrow[2 * 11 + 1])491#define a22l (kc->u.narrow[2 * 12 + 0])492#define a22h (kc->u.narrow[2 * 12 + 1])493#define a32l (kc->u.narrow[2 * 13 + 0])494#define a32h (kc->u.narrow[2 * 13 + 1])495#define a42l (kc->u.narrow[2 * 14 + 0])496#define a42h (kc->u.narrow[2 * 14 + 1])497#define a03l (kc->u.narrow[2 * 15 + 0])498#define a03h (kc->u.narrow[2 * 15 + 1])499#define a13l (kc->u.narrow[2 * 16 + 0])500#define a13h (kc->u.narrow[2 * 16 + 1])501#define a23l (kc->u.narrow[2 * 17 + 0])502#define a23h (kc->u.narrow[2 * 17 + 1])503#define a33l (kc->u.narrow[2 * 18 + 0])504#define a33h (kc->u.narrow[2 * 18 + 1])505#define a43l (kc->u.narrow[2 * 19 + 0])506#define a43h (kc->u.narrow[2 * 19 + 1])507#define a04l (kc->u.narrow[2 * 20 + 0])508#define a04h (kc->u.narrow[2 * 20 + 1])509#define a14l (kc->u.narrow[2 * 21 + 0])510#define a14h (kc->u.narrow[2 * 21 + 1])511#define a24l (kc->u.narrow[2 * 22 + 0])512#define a24h (kc->u.narrow[2 * 22 + 1])513#define a34l (kc->u.narrow[2 * 23 + 0])514#define a34h (kc->u.narrow[2 * 23 + 1])515#define a44l (kc->u.narrow[2 * 24 + 0])516#define a44h (kc->u.narrow[2 * 24 + 1])517518#define DECL_STATE519#define READ_STATE(state)520#define WRITE_STATE(state)521522#define INPUT_BUF(size) do { \523size_t j; \524for (j = 0; j < (size); j += 8) { \525sph_u32 tl, th; \526tl = sph_dec32le_aligned(buf + j + 0); \527th = sph_dec32le_aligned(buf + j + 4); \528INTERLEAVE(tl, th); \529kc->u.narrow[(j >> 2) + 0] ^= tl; \530kc->u.narrow[(j >> 2) + 1] ^= th; \531} \532} while (0)533534#define INPUT_BUF144 INPUT_BUF(144)535#define INPUT_BUF136 INPUT_BUF(136)536#define INPUT_BUF104 INPUT_BUF(104)537#define INPUT_BUF72 INPUT_BUF(72)538539#else540541#define DECL_STATE \542sph_u32 a00l, a00h, a01l, a01h, a02l, a02h, a03l, a03h, a04l, a04h; \543sph_u32 a10l, a10h, a11l, a11h, a12l, a12h, a13l, a13h, a14l, a14h; \544sph_u32 a20l, a20h, a21l, a21h, a22l, a22h, a23l, a23h, a24l, a24h; \545sph_u32 a30l, a30h, a31l, a31h, a32l, a32h, a33l, a33h, a34l, a34h; \546sph_u32 a40l, a40h, a41l, a41h, a42l, a42h, a43l, a43h, a44l, a44h;547548#define READ_STATE(state) do { \549a00l = (state)->u.narrow[2 * 0 + 0]; \550a00h = (state)->u.narrow[2 * 0 + 1]; \551a10l = (state)->u.narrow[2 * 1 + 0]; \552a10h = (state)->u.narrow[2 * 1 + 1]; \553a20l = (state)->u.narrow[2 * 2 + 0]; \554a20h = (state)->u.narrow[2 * 2 + 1]; \555a30l = (state)->u.narrow[2 * 3 + 0]; \556a30h = (state)->u.narrow[2 * 3 + 1]; \557a40l = (state)->u.narrow[2 * 4 + 0]; \558a40h = (state)->u.narrow[2 * 4 + 1]; \559a01l = (state)->u.narrow[2 * 5 + 0]; \560a01h = (state)->u.narrow[2 * 5 + 1]; \561a11l = (state)->u.narrow[2 * 6 + 0]; \562a11h = (state)->u.narrow[2 * 6 + 1]; \563a21l = (state)->u.narrow[2 * 7 + 0]; \564a21h = (state)->u.narrow[2 * 7 + 1]; \565a31l = (state)->u.narrow[2 * 8 + 0]; \566a31h = (state)->u.narrow[2 * 8 + 1]; \567a41l = (state)->u.narrow[2 * 9 + 0]; \568a41h = (state)->u.narrow[2 * 9 + 1]; \569a02l = (state)->u.narrow[2 * 10 + 0]; \570a02h = (state)->u.narrow[2 * 10 + 1]; \571a12l = (state)->u.narrow[2 * 11 + 0]; \572a12h = (state)->u.narrow[2 * 11 + 1]; \573a22l = (state)->u.narrow[2 * 12 + 0]; \574a22h = (state)->u.narrow[2 * 12 + 1]; \575a32l = (state)->u.narrow[2 * 13 + 0]; \576a32h = (state)->u.narrow[2 * 13 + 1]; \577a42l = (state)->u.narrow[2 * 14 + 0]; \578a42h = (state)->u.narrow[2 * 14 + 1]; \579a03l = (state)->u.narrow[2 * 15 + 0]; \580a03h = (state)->u.narrow[2 * 15 + 1]; \581a13l = (state)->u.narrow[2 * 16 + 0]; \582a13h = (state)->u.narrow[2 * 16 + 1]; \583a23l = (state)->u.narrow[2 * 17 + 0]; \584a23h = (state)->u.narrow[2 * 17 + 1]; \585a33l = (state)->u.narrow[2 * 18 + 0]; \586a33h = (state)->u.narrow[2 * 18 + 1]; \587a43l = (state)->u.narrow[2 * 19 + 0]; \588a43h = (state)->u.narrow[2 * 19 + 1]; \589a04l = (state)->u.narrow[2 * 20 + 0]; \590a04h = (state)->u.narrow[2 * 20 + 1]; \591a14l = (state)->u.narrow[2 * 21 + 0]; \592a14h = (state)->u.narrow[2 * 21 + 1]; \593a24l = (state)->u.narrow[2 * 22 + 0]; \594a24h = (state)->u.narrow[2 * 22 + 1]; \595a34l = (state)->u.narrow[2 * 23 + 0]; \596a34h = (state)->u.narrow[2 * 23 + 1]; \597a44l = (state)->u.narrow[2 * 24 + 0]; \598a44h = (state)->u.narrow[2 * 24 + 1]; \599} while (0)600601#define WRITE_STATE(state) do { \602(state)->u.narrow[2 * 0 + 0] = a00l; \603(state)->u.narrow[2 * 0 + 1] = a00h; \604(state)->u.narrow[2 * 1 + 0] = a10l; \605(state)->u.narrow[2 * 1 + 1] = a10h; \606(state)->u.narrow[2 * 2 + 0] = a20l; \607(state)->u.narrow[2 * 2 + 1] = a20h; \608(state)->u.narrow[2 * 3 + 0] = a30l; \609(state)->u.narrow[2 * 3 + 1] = a30h; \610(state)->u.narrow[2 * 4 + 0] = a40l; \611(state)->u.narrow[2 * 4 + 1] = a40h; \612(state)->u.narrow[2 * 5 + 0] = a01l; \613(state)->u.narrow[2 * 5 + 1] = a01h; \614(state)->u.narrow[2 * 6 + 0] = a11l; \615(state)->u.narrow[2 * 6 + 1] = a11h; \616(state)->u.narrow[2 * 7 + 0] = a21l; \617(state)->u.narrow[2 * 7 + 1] = a21h; \618(state)->u.narrow[2 * 8 + 0] = a31l; \619(state)->u.narrow[2 * 8 + 1] = a31h; \620(state)->u.narrow[2 * 9 + 0] = a41l; \621(state)->u.narrow[2 * 9 + 1] = a41h; \622(state)->u.narrow[2 * 10 + 0] = a02l; \623(state)->u.narrow[2 * 10 + 1] = a02h; \624(state)->u.narrow[2 * 11 + 0] = a12l; \625(state)->u.narrow[2 * 11 + 1] = a12h; \626(state)->u.narrow[2 * 12 + 0] = a22l; \627(state)->u.narrow[2 * 12 + 1] = a22h; \628(state)->u.narrow[2 * 13 + 0] = a32l; \629(state)->u.narrow[2 * 13 + 1] = a32h; \630(state)->u.narrow[2 * 14 + 0] = a42l; \631(state)->u.narrow[2 * 14 + 1] = a42h; \632(state)->u.narrow[2 * 15 + 0] = a03l; \633(state)->u.narrow[2 * 15 + 1] = a03h; \634(state)->u.narrow[2 * 16 + 0] = a13l; \635(state)->u.narrow[2 * 16 + 1] = a13h; \636(state)->u.narrow[2 * 17 + 0] = a23l; \637(state)->u.narrow[2 * 17 + 1] = a23h; \638(state)->u.narrow[2 * 18 + 0] = a33l; \639(state)->u.narrow[2 * 18 + 1] = a33h; \640(state)->u.narrow[2 * 19 + 0] = a43l; \641(state)->u.narrow[2 * 19 + 1] = a43h; \642(state)->u.narrow[2 * 20 + 0] = a04l; \643(state)->u.narrow[2 * 20 + 1] = a04h; \644(state)->u.narrow[2 * 21 + 0] = a14l; \645(state)->u.narrow[2 * 21 + 1] = a14h; \646(state)->u.narrow[2 * 22 + 0] = a24l; \647(state)->u.narrow[2 * 22 + 1] = a24h; \648(state)->u.narrow[2 * 23 + 0] = a34l; \649(state)->u.narrow[2 * 23 + 1] = a34h; \650(state)->u.narrow[2 * 24 + 0] = a44l; \651(state)->u.narrow[2 * 24 + 1] = a44h; \652} while (0)653654#define READ64(d, off) do { \655sph_u32 tl, th; \656tl = sph_dec32le_aligned(buf + (off)); \657th = sph_dec32le_aligned(buf + (off) + 4); \658INTERLEAVE(tl, th); \659d ## l ^= tl; \660d ## h ^= th; \661} while (0)662663#define INPUT_BUF144 do { \664READ64(a00, 0); \665READ64(a10, 8); \666READ64(a20, 16); \667READ64(a30, 24); \668READ64(a40, 32); \669READ64(a01, 40); \670READ64(a11, 48); \671READ64(a21, 56); \672READ64(a31, 64); \673READ64(a41, 72); \674READ64(a02, 80); \675READ64(a12, 88); \676READ64(a22, 96); \677READ64(a32, 104); \678READ64(a42, 112); \679READ64(a03, 120); \680READ64(a13, 128); \681READ64(a23, 136); \682} while (0)683684#define INPUT_BUF136 do { \685READ64(a00, 0); \686READ64(a10, 8); \687READ64(a20, 16); \688READ64(a30, 24); \689READ64(a40, 32); \690READ64(a01, 40); \691READ64(a11, 48); \692READ64(a21, 56); \693READ64(a31, 64); \694READ64(a41, 72); \695READ64(a02, 80); \696READ64(a12, 88); \697READ64(a22, 96); \698READ64(a32, 104); \699READ64(a42, 112); \700READ64(a03, 120); \701READ64(a13, 128); \702} while (0)703704#define INPUT_BUF104 do { \705READ64(a00, 0); \706READ64(a10, 8); \707READ64(a20, 16); \708READ64(a30, 24); \709READ64(a40, 32); \710READ64(a01, 40); \711READ64(a11, 48); \712READ64(a21, 56); \713READ64(a31, 64); \714READ64(a41, 72); \715READ64(a02, 80); \716READ64(a12, 88); \717READ64(a22, 96); \718} while (0)719720#define INPUT_BUF72 do { \721READ64(a00, 0); \722READ64(a10, 8); \723READ64(a20, 16); \724READ64(a30, 24); \725READ64(a40, 32); \726READ64(a01, 40); \727READ64(a11, 48); \728READ64(a21, 56); \729READ64(a31, 64); \730} while (0)731732#define INPUT_BUF(lim) do { \733READ64(a00, 0); \734READ64(a10, 8); \735READ64(a20, 16); \736READ64(a30, 24); \737READ64(a40, 32); \738READ64(a01, 40); \739READ64(a11, 48); \740READ64(a21, 56); \741READ64(a31, 64); \742if ((lim) == 72) \743break; \744READ64(a41, 72); \745READ64(a02, 80); \746READ64(a12, 88); \747READ64(a22, 96); \748if ((lim) == 104) \749break; \750READ64(a32, 104); \751READ64(a42, 112); \752READ64(a03, 120); \753READ64(a13, 128); \754if ((lim) == 136) \755break; \756READ64(a23, 136); \757} while (0)758759#endif760761#define DECL64(x) sph_u64 x ## l, x ## h762#define MOV64(d, s) (d ## l = s ## l, d ## h = s ## h)763#define XOR64(d, a, b) (d ## l = a ## l ^ b ## l, d ## h = a ## h ^ b ## h)764#define AND64(d, a, b) (d ## l = a ## l & b ## l, d ## h = a ## h & b ## h)765#define OR64(d, a, b) (d ## l = a ## l | b ## l, d ## h = a ## h | b ## h)766#define NOT64(d, s) (d ## l = SPH_T32(~s ## l), d ## h = SPH_T32(~s ## h))767#define ROL64(d, v, n) ROL64_ ## n(d, v)768769#if SPH_KECCAK_INTERLEAVE770771#define ROL64_odd1(d, v) do { \772sph_u32 tmp; \773tmp = v ## l; \774d ## l = SPH_T32(v ## h << 1) | (v ## h >> 31); \775d ## h = tmp; \776} while (0)777778#define ROL64_odd63(d, v) do { \779sph_u32 tmp; \780tmp = SPH_T32(v ## l << 31) | (v ## l >> 1); \781d ## l = v ## h; \782d ## h = tmp; \783} while (0)784785#define ROL64_odd(d, v, n) do { \786sph_u32 tmp; \787tmp = SPH_T32(v ## l << (n - 1)) | (v ## l >> (33 - n)); \788d ## l = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \789d ## h = tmp; \790} while (0)791792#define ROL64_even(d, v, n) do { \793d ## l = SPH_T32(v ## l << n) | (v ## l >> (32 - n)); \794d ## h = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \795} while (0)796797#define ROL64_0(d, v)798#define ROL64_1(d, v) ROL64_odd1(d, v)799#define ROL64_2(d, v) ROL64_even(d, v, 1)800#define ROL64_3(d, v) ROL64_odd( d, v, 2)801#define ROL64_4(d, v) ROL64_even(d, v, 2)802#define ROL64_5(d, v) ROL64_odd( d, v, 3)803#define ROL64_6(d, v) ROL64_even(d, v, 3)804#define ROL64_7(d, v) ROL64_odd( d, v, 4)805#define ROL64_8(d, v) ROL64_even(d, v, 4)806#define ROL64_9(d, v) ROL64_odd( d, v, 5)807#define ROL64_10(d, v) ROL64_even(d, v, 5)808#define ROL64_11(d, v) ROL64_odd( d, v, 6)809#define ROL64_12(d, v) ROL64_even(d, v, 6)810#define ROL64_13(d, v) ROL64_odd( d, v, 7)811#define ROL64_14(d, v) ROL64_even(d, v, 7)812#define ROL64_15(d, v) ROL64_odd( d, v, 8)813#define ROL64_16(d, v) ROL64_even(d, v, 8)814#define ROL64_17(d, v) ROL64_odd( d, v, 9)815#define ROL64_18(d, v) ROL64_even(d, v, 9)816#define ROL64_19(d, v) ROL64_odd( d, v, 10)817#define ROL64_20(d, v) ROL64_even(d, v, 10)818#define ROL64_21(d, v) ROL64_odd( d, v, 11)819#define ROL64_22(d, v) ROL64_even(d, v, 11)820#define ROL64_23(d, v) ROL64_odd( d, v, 12)821#define ROL64_24(d, v) ROL64_even(d, v, 12)822#define ROL64_25(d, v) ROL64_odd( d, v, 13)823#define ROL64_26(d, v) ROL64_even(d, v, 13)824#define ROL64_27(d, v) ROL64_odd( d, v, 14)825#define ROL64_28(d, v) ROL64_even(d, v, 14)826#define ROL64_29(d, v) ROL64_odd( d, v, 15)827#define ROL64_30(d, v) ROL64_even(d, v, 15)828#define ROL64_31(d, v) ROL64_odd( d, v, 16)829#define ROL64_32(d, v) ROL64_even(d, v, 16)830#define ROL64_33(d, v) ROL64_odd( d, v, 17)831#define ROL64_34(d, v) ROL64_even(d, v, 17)832#define ROL64_35(d, v) ROL64_odd( d, v, 18)833#define ROL64_36(d, v) ROL64_even(d, v, 18)834#define ROL64_37(d, v) ROL64_odd( d, v, 19)835#define ROL64_38(d, v) ROL64_even(d, v, 19)836#define ROL64_39(d, v) ROL64_odd( d, v, 20)837#define ROL64_40(d, v) ROL64_even(d, v, 20)838#define ROL64_41(d, v) ROL64_odd( d, v, 21)839#define ROL64_42(d, v) ROL64_even(d, v, 21)840#define ROL64_43(d, v) ROL64_odd( d, v, 22)841#define ROL64_44(d, v) ROL64_even(d, v, 22)842#define ROL64_45(d, v) ROL64_odd( d, v, 23)843#define ROL64_46(d, v) ROL64_even(d, v, 23)844#define ROL64_47(d, v) ROL64_odd( d, v, 24)845#define ROL64_48(d, v) ROL64_even(d, v, 24)846#define ROL64_49(d, v) ROL64_odd( d, v, 25)847#define ROL64_50(d, v) ROL64_even(d, v, 25)848#define ROL64_51(d, v) ROL64_odd( d, v, 26)849#define ROL64_52(d, v) ROL64_even(d, v, 26)850#define ROL64_53(d, v) ROL64_odd( d, v, 27)851#define ROL64_54(d, v) ROL64_even(d, v, 27)852#define ROL64_55(d, v) ROL64_odd( d, v, 28)853#define ROL64_56(d, v) ROL64_even(d, v, 28)854#define ROL64_57(d, v) ROL64_odd( d, v, 29)855#define ROL64_58(d, v) ROL64_even(d, v, 29)856#define ROL64_59(d, v) ROL64_odd( d, v, 30)857#define ROL64_60(d, v) ROL64_even(d, v, 30)858#define ROL64_61(d, v) ROL64_odd( d, v, 31)859#define ROL64_62(d, v) ROL64_even(d, v, 31)860#define ROL64_63(d, v) ROL64_odd63(d, v)861862#else863864#define ROL64_small(d, v, n) do { \865sph_u32 tmp; \866tmp = SPH_T32(v ## l << n) | (v ## h >> (32 - n)); \867d ## h = SPH_T32(v ## h << n) | (v ## l >> (32 - n)); \868d ## l = tmp; \869} while (0)870871#define ROL64_0(d, v) 0872#define ROL64_1(d, v) ROL64_small(d, v, 1)873#define ROL64_2(d, v) ROL64_small(d, v, 2)874#define ROL64_3(d, v) ROL64_small(d, v, 3)875#define ROL64_4(d, v) ROL64_small(d, v, 4)876#define ROL64_5(d, v) ROL64_small(d, v, 5)877#define ROL64_6(d, v) ROL64_small(d, v, 6)878#define ROL64_7(d, v) ROL64_small(d, v, 7)879#define ROL64_8(d, v) ROL64_small(d, v, 8)880#define ROL64_9(d, v) ROL64_small(d, v, 9)881#define ROL64_10(d, v) ROL64_small(d, v, 10)882#define ROL64_11(d, v) ROL64_small(d, v, 11)883#define ROL64_12(d, v) ROL64_small(d, v, 12)884#define ROL64_13(d, v) ROL64_small(d, v, 13)885#define ROL64_14(d, v) ROL64_small(d, v, 14)886#define ROL64_15(d, v) ROL64_small(d, v, 15)887#define ROL64_16(d, v) ROL64_small(d, v, 16)888#define ROL64_17(d, v) ROL64_small(d, v, 17)889#define ROL64_18(d, v) ROL64_small(d, v, 18)890#define ROL64_19(d, v) ROL64_small(d, v, 19)891#define ROL64_20(d, v) ROL64_small(d, v, 20)892#define ROL64_21(d, v) ROL64_small(d, v, 21)893#define ROL64_22(d, v) ROL64_small(d, v, 22)894#define ROL64_23(d, v) ROL64_small(d, v, 23)895#define ROL64_24(d, v) ROL64_small(d, v, 24)896#define ROL64_25(d, v) ROL64_small(d, v, 25)897#define ROL64_26(d, v) ROL64_small(d, v, 26)898#define ROL64_27(d, v) ROL64_small(d, v, 27)899#define ROL64_28(d, v) ROL64_small(d, v, 28)900#define ROL64_29(d, v) ROL64_small(d, v, 29)901#define ROL64_30(d, v) ROL64_small(d, v, 30)902#define ROL64_31(d, v) ROL64_small(d, v, 31)903904#define ROL64_32(d, v) do { \905sph_u32 tmp; \906tmp = v ## l; \907d ## l = v ## h; \908d ## h = tmp; \909} while (0)910911#define ROL64_big(d, v, n) do { \912sph_u32 trl, trh; \913ROL64_small(tr, v, n); \914d ## h = trl; \915d ## l = trh; \916} while (0)917918#define ROL64_33(d, v) ROL64_big(d, v, 1)919#define ROL64_34(d, v) ROL64_big(d, v, 2)920#define ROL64_35(d, v) ROL64_big(d, v, 3)921#define ROL64_36(d, v) ROL64_big(d, v, 4)922#define ROL64_37(d, v) ROL64_big(d, v, 5)923#define ROL64_38(d, v) ROL64_big(d, v, 6)924#define ROL64_39(d, v) ROL64_big(d, v, 7)925#define ROL64_40(d, v) ROL64_big(d, v, 8)926#define ROL64_41(d, v) ROL64_big(d, v, 9)927#define ROL64_42(d, v) ROL64_big(d, v, 10)928#define ROL64_43(d, v) ROL64_big(d, v, 11)929#define ROL64_44(d, v) ROL64_big(d, v, 12)930#define ROL64_45(d, v) ROL64_big(d, v, 13)931#define ROL64_46(d, v) ROL64_big(d, v, 14)932#define ROL64_47(d, v) ROL64_big(d, v, 15)933#define ROL64_48(d, v) ROL64_big(d, v, 16)934#define ROL64_49(d, v) ROL64_big(d, v, 17)935#define ROL64_50(d, v) ROL64_big(d, v, 18)936#define ROL64_51(d, v) ROL64_big(d, v, 19)937#define ROL64_52(d, v) ROL64_big(d, v, 20)938#define ROL64_53(d, v) ROL64_big(d, v, 21)939#define ROL64_54(d, v) ROL64_big(d, v, 22)940#define ROL64_55(d, v) ROL64_big(d, v, 23)941#define ROL64_56(d, v) ROL64_big(d, v, 24)942#define ROL64_57(d, v) ROL64_big(d, v, 25)943#define ROL64_58(d, v) ROL64_big(d, v, 26)944#define ROL64_59(d, v) ROL64_big(d, v, 27)945#define ROL64_60(d, v) ROL64_big(d, v, 28)946#define ROL64_61(d, v) ROL64_big(d, v, 29)947#define ROL64_62(d, v) ROL64_big(d, v, 30)948#define ROL64_63(d, v) ROL64_big(d, v, 31)949950#endif951952#define XOR64_IOTA(d, s, k) \953(d ## l = s ## l ^ k.low, d ## h = s ## h ^ k.high)954955#endif956957#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \958DECL64(tt0); \959DECL64(tt1); \960DECL64(tt2); \961DECL64(tt3); \962XOR64(tt0, d0, d1); \963XOR64(tt1, d2, d3); \964XOR64(tt0, tt0, d4); \965XOR64(tt0, tt0, tt1); \966ROL64(tt0, tt0, 1); \967XOR64(tt2, c0, c1); \968XOR64(tt3, c2, c3); \969XOR64(tt0, tt0, c4); \970XOR64(tt2, tt2, tt3); \971XOR64(t, tt0, tt2); \972} while (0)973974#define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \975b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \976b40, b41, b42, b43, b44) \977do { \978DECL64(t0); \979DECL64(t1); \980DECL64(t2); \981DECL64(t3); \982DECL64(t4); \983TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \984TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \985TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \986TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \987TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \988XOR64(b00, b00, t0); \989XOR64(b01, b01, t0); \990XOR64(b02, b02, t0); \991XOR64(b03, b03, t0); \992XOR64(b04, b04, t0); \993XOR64(b10, b10, t1); \994XOR64(b11, b11, t1); \995XOR64(b12, b12, t1); \996XOR64(b13, b13, t1); \997XOR64(b14, b14, t1); \998XOR64(b20, b20, t2); \999XOR64(b21, b21, t2); \1000XOR64(b22, b22, t2); \1001XOR64(b23, b23, t2); \1002XOR64(b24, b24, t2); \1003XOR64(b30, b30, t3); \1004XOR64(b31, b31, t3); \1005XOR64(b32, b32, t3); \1006XOR64(b33, b33, t3); \1007XOR64(b34, b34, t3); \1008XOR64(b40, b40, t4); \1009XOR64(b41, b41, t4); \1010XOR64(b42, b42, t4); \1011XOR64(b43, b43, t4); \1012XOR64(b44, b44, t4); \1013} while (0)10141015#define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \1016b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \1017b40, b41, b42, b43, b44) \1018do { \1019/* ROL64(b00, b00, 0); */ \1020ROL64(b01, b01, 36); \1021ROL64(b02, b02, 3); \1022ROL64(b03, b03, 41); \1023ROL64(b04, b04, 18); \1024ROL64(b10, b10, 1); \1025ROL64(b11, b11, 44); \1026ROL64(b12, b12, 10); \1027ROL64(b13, b13, 45); \1028ROL64(b14, b14, 2); \1029ROL64(b20, b20, 62); \1030ROL64(b21, b21, 6); \1031ROL64(b22, b22, 43); \1032ROL64(b23, b23, 15); \1033ROL64(b24, b24, 61); \1034ROL64(b30, b30, 28); \1035ROL64(b31, b31, 55); \1036ROL64(b32, b32, 25); \1037ROL64(b33, b33, 21); \1038ROL64(b34, b34, 56); \1039ROL64(b40, b40, 27); \1040ROL64(b41, b41, 20); \1041ROL64(b42, b42, 39); \1042ROL64(b43, b43, 8); \1043ROL64(b44, b44, 14); \1044} while (0)10451046/*1047* The KHI macro integrates the "lane complement" optimization. On input,1048* some words are complemented:1049* a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a431050* On output, the following words are complemented:1051* a04 a10 a20 a22 a23 a311052*1053* The (implicit) permutation and the theta expansion will bring back1054* the input mask for the next round.1055*/10561057#define KHI_XO(d, a, b, c) do { \1058DECL64(kt); \1059OR64(kt, b, c); \1060XOR64(d, a, kt); \1061} while (0)10621063#define KHI_XA(d, a, b, c) do { \1064DECL64(kt); \1065AND64(kt, b, c); \1066XOR64(d, a, kt); \1067} while (0)10681069#define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \1070b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \1071b40, b41, b42, b43, b44) \1072do { \1073DECL64(c0); \1074DECL64(c1); \1075DECL64(c2); \1076DECL64(c3); \1077DECL64(c4); \1078DECL64(bnn); \1079NOT64(bnn, b20); \1080KHI_XO(c0, b00, b10, b20); \1081KHI_XO(c1, b10, bnn, b30); \1082KHI_XA(c2, b20, b30, b40); \1083KHI_XO(c3, b30, b40, b00); \1084KHI_XA(c4, b40, b00, b10); \1085MOV64(b00, c0); \1086MOV64(b10, c1); \1087MOV64(b20, c2); \1088MOV64(b30, c3); \1089MOV64(b40, c4); \1090NOT64(bnn, b41); \1091KHI_XO(c0, b01, b11, b21); \1092KHI_XA(c1, b11, b21, b31); \1093KHI_XO(c2, b21, b31, bnn); \1094KHI_XO(c3, b31, b41, b01); \1095KHI_XA(c4, b41, b01, b11); \1096MOV64(b01, c0); \1097MOV64(b11, c1); \1098MOV64(b21, c2); \1099MOV64(b31, c3); \1100MOV64(b41, c4); \1101NOT64(bnn, b32); \1102KHI_XO(c0, b02, b12, b22); \1103KHI_XA(c1, b12, b22, b32); \1104KHI_XA(c2, b22, bnn, b42); \1105KHI_XO(c3, bnn, b42, b02); \1106KHI_XA(c4, b42, b02, b12); \1107MOV64(b02, c0); \1108MOV64(b12, c1); \1109MOV64(b22, c2); \1110MOV64(b32, c3); \1111MOV64(b42, c4); \1112NOT64(bnn, b33); \1113KHI_XA(c0, b03, b13, b23); \1114KHI_XO(c1, b13, b23, b33); \1115KHI_XO(c2, b23, bnn, b43); \1116KHI_XA(c3, bnn, b43, b03); \1117KHI_XO(c4, b43, b03, b13); \1118MOV64(b03, c0); \1119MOV64(b13, c1); \1120MOV64(b23, c2); \1121MOV64(b33, c3); \1122MOV64(b43, c4); \1123NOT64(bnn, b14); \1124KHI_XA(c0, b04, bnn, b24); \1125KHI_XO(c1, bnn, b24, b34); \1126KHI_XA(c2, b24, b34, b44); \1127KHI_XO(c3, b34, b44, b04); \1128KHI_XA(c4, b44, b04, b14); \1129MOV64(b04, c0); \1130MOV64(b14, c1); \1131MOV64(b24, c2); \1132MOV64(b34, c3); \1133MOV64(b44, c4); \1134} while (0)11351136#define IOTA(r) XOR64_IOTA(a00, a00, r)11371138#define P0 a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \1139a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a441140#define P1 a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \1141a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a141142#define P2 a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \1143a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a311144#define P3 a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \1145a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a131146#define P4 a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \1147a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a011148#define P5 a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \1149a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a301150#define P6 a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \1151a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a331152#define P7 a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \1153a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a231154#define P8 a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \1155a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a121156#define P9 a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \1157a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a211158#define P10 a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \1159a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a021160#define P11 a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \1161a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a101162#define P12 a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \1163a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a111164#define P13 a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \1165a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a411166#define P14 a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \1167a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a241168#define P15 a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \1169a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a421170#define P16 a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \1171a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a041172#define P17 a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \1173a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a201174#define P18 a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \1175a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a221176#define P19 a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \1177a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a321178#define P20 a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \1179a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a431180#define P21 a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \1181a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a341182#define P22 a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \1183a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a031184#define P23 a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \1185a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a4011861187#define P1_TO_P0 do { \1188DECL64(t); \1189MOV64(t, a01); \1190MOV64(a01, a30); \1191MOV64(a30, a33); \1192MOV64(a33, a23); \1193MOV64(a23, a12); \1194MOV64(a12, a21); \1195MOV64(a21, a02); \1196MOV64(a02, a10); \1197MOV64(a10, a11); \1198MOV64(a11, a41); \1199MOV64(a41, a24); \1200MOV64(a24, a42); \1201MOV64(a42, a04); \1202MOV64(a04, a20); \1203MOV64(a20, a22); \1204MOV64(a22, a32); \1205MOV64(a32, a43); \1206MOV64(a43, a34); \1207MOV64(a34, a03); \1208MOV64(a03, a40); \1209MOV64(a40, a44); \1210MOV64(a44, a14); \1211MOV64(a14, a31); \1212MOV64(a31, a13); \1213MOV64(a13, t); \1214} while (0)12151216#define P2_TO_P0 do { \1217DECL64(t); \1218MOV64(t, a01); \1219MOV64(a01, a33); \1220MOV64(a33, a12); \1221MOV64(a12, a02); \1222MOV64(a02, a11); \1223MOV64(a11, a24); \1224MOV64(a24, a04); \1225MOV64(a04, a22); \1226MOV64(a22, a43); \1227MOV64(a43, a03); \1228MOV64(a03, a44); \1229MOV64(a44, a31); \1230MOV64(a31, t); \1231MOV64(t, a10); \1232MOV64(a10, a41); \1233MOV64(a41, a42); \1234MOV64(a42, a20); \1235MOV64(a20, a32); \1236MOV64(a32, a34); \1237MOV64(a34, a40); \1238MOV64(a40, a14); \1239MOV64(a14, a13); \1240MOV64(a13, a30); \1241MOV64(a30, a23); \1242MOV64(a23, a21); \1243MOV64(a21, t); \1244} while (0)12451246#define P4_TO_P0 do { \1247DECL64(t); \1248MOV64(t, a01); \1249MOV64(a01, a12); \1250MOV64(a12, a11); \1251MOV64(a11, a04); \1252MOV64(a04, a43); \1253MOV64(a43, a44); \1254MOV64(a44, t); \1255MOV64(t, a02); \1256MOV64(a02, a24); \1257MOV64(a24, a22); \1258MOV64(a22, a03); \1259MOV64(a03, a31); \1260MOV64(a31, a33); \1261MOV64(a33, t); \1262MOV64(t, a10); \1263MOV64(a10, a42); \1264MOV64(a42, a32); \1265MOV64(a32, a40); \1266MOV64(a40, a13); \1267MOV64(a13, a23); \1268MOV64(a23, t); \1269MOV64(t, a14); \1270MOV64(a14, a30); \1271MOV64(a30, a21); \1272MOV64(a21, a41); \1273MOV64(a41, a20); \1274MOV64(a20, a34); \1275MOV64(a34, t); \1276} while (0)12771278#define P6_TO_P0 do { \1279DECL64(t); \1280MOV64(t, a01); \1281MOV64(a01, a02); \1282MOV64(a02, a04); \1283MOV64(a04, a03); \1284MOV64(a03, t); \1285MOV64(t, a10); \1286MOV64(a10, a20); \1287MOV64(a20, a40); \1288MOV64(a40, a30); \1289MOV64(a30, t); \1290MOV64(t, a11); \1291MOV64(a11, a22); \1292MOV64(a22, a44); \1293MOV64(a44, a33); \1294MOV64(a33, t); \1295MOV64(t, a12); \1296MOV64(a12, a24); \1297MOV64(a24, a43); \1298MOV64(a43, a31); \1299MOV64(a31, t); \1300MOV64(t, a13); \1301MOV64(a13, a21); \1302MOV64(a21, a42); \1303MOV64(a42, a34); \1304MOV64(a34, t); \1305MOV64(t, a14); \1306MOV64(a14, a23); \1307MOV64(a23, a41); \1308MOV64(a41, a32); \1309MOV64(a32, t); \1310} while (0)13111312#define P8_TO_P0 do { \1313DECL64(t); \1314MOV64(t, a01); \1315MOV64(a01, a11); \1316MOV64(a11, a43); \1317MOV64(a43, t); \1318MOV64(t, a02); \1319MOV64(a02, a22); \1320MOV64(a22, a31); \1321MOV64(a31, t); \1322MOV64(t, a03); \1323MOV64(a03, a33); \1324MOV64(a33, a24); \1325MOV64(a24, t); \1326MOV64(t, a04); \1327MOV64(a04, a44); \1328MOV64(a44, a12); \1329MOV64(a12, t); \1330MOV64(t, a10); \1331MOV64(a10, a32); \1332MOV64(a32, a13); \1333MOV64(a13, t); \1334MOV64(t, a14); \1335MOV64(a14, a21); \1336MOV64(a21, a20); \1337MOV64(a20, t); \1338MOV64(t, a23); \1339MOV64(a23, a42); \1340MOV64(a42, a40); \1341MOV64(a40, t); \1342MOV64(t, a30); \1343MOV64(a30, a41); \1344MOV64(a41, a34); \1345MOV64(a34, t); \1346} while (0)13471348#define P12_TO_P0 do { \1349DECL64(t); \1350MOV64(t, a01); \1351MOV64(a01, a04); \1352MOV64(a04, t); \1353MOV64(t, a02); \1354MOV64(a02, a03); \1355MOV64(a03, t); \1356MOV64(t, a10); \1357MOV64(a10, a40); \1358MOV64(a40, t); \1359MOV64(t, a11); \1360MOV64(a11, a44); \1361MOV64(a44, t); \1362MOV64(t, a12); \1363MOV64(a12, a43); \1364MOV64(a43, t); \1365MOV64(t, a13); \1366MOV64(a13, a42); \1367MOV64(a42, t); \1368MOV64(t, a14); \1369MOV64(a14, a41); \1370MOV64(a41, t); \1371MOV64(t, a20); \1372MOV64(a20, a30); \1373MOV64(a30, t); \1374MOV64(t, a21); \1375MOV64(a21, a34); \1376MOV64(a34, t); \1377MOV64(t, a22); \1378MOV64(a22, a33); \1379MOV64(a33, t); \1380MOV64(t, a23); \1381MOV64(a23, a32); \1382MOV64(a32, t); \1383MOV64(t, a24); \1384MOV64(a24, a31); \1385MOV64(a31, t); \1386} while (0)13871388#define LPAR (1389#define RPAR )13901391#define KF_ELT(r, s, k) do { \1392THETA LPAR P ## r RPAR; \1393RHO LPAR P ## r RPAR; \1394KHI LPAR P ## s RPAR; \1395IOTA(k); \1396} while (0)13971398#define DO(x) x13991400#define KECCAK_F_1600 DO(KECCAK_F_1600_)14011402#if SPH_KECCAK_UNROLL == 114031404#define KECCAK_F_1600_ do { \1405int j; \1406for (j = 0; j < 24; j ++) { \1407KF_ELT( 0, 1, RC[j + 0]); \1408P1_TO_P0; \1409} \1410} while (0)14111412#elif SPH_KECCAK_UNROLL == 214131414#define KECCAK_F_1600_ do { \1415int j; \1416for (j = 0; j < 24; j += 2) { \1417KF_ELT( 0, 1, RC[j + 0]); \1418KF_ELT( 1, 2, RC[j + 1]); \1419P2_TO_P0; \1420} \1421} while (0)14221423#elif SPH_KECCAK_UNROLL == 414241425#define KECCAK_F_1600_ do { \1426int j; \1427for (j = 0; j < 24; j += 4) { \1428KF_ELT( 0, 1, RC[j + 0]); \1429KF_ELT( 1, 2, RC[j + 1]); \1430KF_ELT( 2, 3, RC[j + 2]); \1431KF_ELT( 3, 4, RC[j + 3]); \1432P4_TO_P0; \1433} \1434} while (0)14351436#elif SPH_KECCAK_UNROLL == 614371438#define KECCAK_F_1600_ do { \1439int j; \1440for (j = 0; j < 24; j += 6) { \1441KF_ELT( 0, 1, RC[j + 0]); \1442KF_ELT( 1, 2, RC[j + 1]); \1443KF_ELT( 2, 3, RC[j + 2]); \1444KF_ELT( 3, 4, RC[j + 3]); \1445KF_ELT( 4, 5, RC[j + 4]); \1446KF_ELT( 5, 6, RC[j + 5]); \1447P6_TO_P0; \1448} \1449} while (0)14501451#elif SPH_KECCAK_UNROLL == 814521453#define KECCAK_F_1600_ do { \1454int j; \1455for (j = 0; j < 24; j += 8) { \1456KF_ELT( 0, 1, RC[j + 0]); \1457KF_ELT( 1, 2, RC[j + 1]); \1458KF_ELT( 2, 3, RC[j + 2]); \1459KF_ELT( 3, 4, RC[j + 3]); \1460KF_ELT( 4, 5, RC[j + 4]); \1461KF_ELT( 5, 6, RC[j + 5]); \1462KF_ELT( 6, 7, RC[j + 6]); \1463KF_ELT( 7, 8, RC[j + 7]); \1464P8_TO_P0; \1465} \1466} while (0)14671468#elif SPH_KECCAK_UNROLL == 1214691470#define KECCAK_F_1600_ do { \1471int j; \1472for (j = 0; j < 24; j += 12) { \1473KF_ELT( 0, 1, RC[j + 0]); \1474KF_ELT( 1, 2, RC[j + 1]); \1475KF_ELT( 2, 3, RC[j + 2]); \1476KF_ELT( 3, 4, RC[j + 3]); \1477KF_ELT( 4, 5, RC[j + 4]); \1478KF_ELT( 5, 6, RC[j + 5]); \1479KF_ELT( 6, 7, RC[j + 6]); \1480KF_ELT( 7, 8, RC[j + 7]); \1481KF_ELT( 8, 9, RC[j + 8]); \1482KF_ELT( 9, 10, RC[j + 9]); \1483KF_ELT(10, 11, RC[j + 10]); \1484KF_ELT(11, 12, RC[j + 11]); \1485P12_TO_P0; \1486} \1487} while (0)14881489#elif SPH_KECCAK_UNROLL == 014901491#define KECCAK_F_1600_ do { \1492KF_ELT( 0, 1, RC[ 0]); \1493KF_ELT( 1, 2, RC[ 1]); \1494KF_ELT( 2, 3, RC[ 2]); \1495KF_ELT( 3, 4, RC[ 3]); \1496KF_ELT( 4, 5, RC[ 4]); \1497KF_ELT( 5, 6, RC[ 5]); \1498KF_ELT( 6, 7, RC[ 6]); \1499KF_ELT( 7, 8, RC[ 7]); \1500KF_ELT( 8, 9, RC[ 8]); \1501KF_ELT( 9, 10, RC[ 9]); \1502KF_ELT(10, 11, RC[10]); \1503KF_ELT(11, 12, RC[11]); \1504KF_ELT(12, 13, RC[12]); \1505KF_ELT(13, 14, RC[13]); \1506KF_ELT(14, 15, RC[14]); \1507KF_ELT(15, 16, RC[15]); \1508KF_ELT(16, 17, RC[16]); \1509KF_ELT(17, 18, RC[17]); \1510KF_ELT(18, 19, RC[18]); \1511KF_ELT(19, 20, RC[19]); \1512KF_ELT(20, 21, RC[20]); \1513KF_ELT(21, 22, RC[21]); \1514KF_ELT(22, 23, RC[22]); \1515KF_ELT(23, 0, RC[23]); \1516} while (0)15171518#else15191520#error Unimplemented unroll count for Keccak.15211522#endif15231524static void1525keccak_init(sph_keccak_context *kc, unsigned out_size)1526{1527int i;15281529#if SPH_KECCAK_641530for (i = 0; i < 25; i ++)1531kc->u.wide[i] = 0;1532/*1533* Initialization for the "lane complement".1534*/1535kc->u.wide[ 1] = SPH_C64(0xFFFFFFFFFFFFFFFF);1536kc->u.wide[ 2] = SPH_C64(0xFFFFFFFFFFFFFFFF);1537kc->u.wide[ 8] = SPH_C64(0xFFFFFFFFFFFFFFFF);1538kc->u.wide[12] = SPH_C64(0xFFFFFFFFFFFFFFFF);1539kc->u.wide[17] = SPH_C64(0xFFFFFFFFFFFFFFFF);1540kc->u.wide[20] = SPH_C64(0xFFFFFFFFFFFFFFFF);1541#else15421543for (i = 0; i < 50; i ++)1544kc->u.narrow[i] = 0;1545/*1546* Initialization for the "lane complement".1547* Note: since we set to all-one full 64-bit words,1548* interleaving (if applicable) is a no-op.1549*/1550kc->u.narrow[ 2] = SPH_C32(0xFFFFFFFF);1551kc->u.narrow[ 3] = SPH_C32(0xFFFFFFFF);1552kc->u.narrow[ 4] = SPH_C32(0xFFFFFFFF);1553kc->u.narrow[ 5] = SPH_C32(0xFFFFFFFF);1554kc->u.narrow[16] = SPH_C32(0xFFFFFFFF);1555kc->u.narrow[17] = SPH_C32(0xFFFFFFFF);1556kc->u.narrow[24] = SPH_C32(0xFFFFFFFF);1557kc->u.narrow[25] = SPH_C32(0xFFFFFFFF);1558kc->u.narrow[34] = SPH_C32(0xFFFFFFFF);1559kc->u.narrow[35] = SPH_C32(0xFFFFFFFF);1560kc->u.narrow[40] = SPH_C32(0xFFFFFFFF);1561kc->u.narrow[41] = SPH_C32(0xFFFFFFFF);1562#endif1563kc->ptr = 0;1564kc->lim = 200 - (out_size >> 2);1565}15661567static void1568keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim)1569{1570unsigned char *buf;1571size_t ptr;1572DECL_STATE15731574buf = kc->buf;1575ptr = kc->ptr;15761577if (len < (lim - ptr)) {1578memcpy(buf + ptr, data, len);1579kc->ptr = ptr + len;1580return;1581}15821583READ_STATE(kc);1584while (len > 0) {1585size_t clen;15861587clen = (lim - ptr);1588if (clen > len)1589clen = len;1590memcpy(buf + ptr, data, clen);1591ptr += clen;1592data = (const unsigned char *)data + clen;1593len -= clen;1594if (ptr == lim) {1595INPUT_BUF(lim);1596KECCAK_F_1600;1597ptr = 0;1598}1599}1600WRITE_STATE(kc);1601kc->ptr = ptr;1602}16031604#if SPH_KECCAK_6416051606#define DEFCLOSE(d, lim) \1607static void keccak_close ## d( \1608sph_keccak_context *kc, unsigned ub, unsigned n, void *dst) \1609{ \1610unsigned eb; \1611union { \1612unsigned char tmp[lim + 1]; \1613sph_u64 dummy; /* for alignment */ \1614} u; \1615size_t j; \1616\1617eb = (0x100 | (ub & 0xFF)) >> (8 - n); \1618if (kc->ptr == (lim - 1)) { \1619if (n == 7) { \1620u.tmp[0] = eb; \1621memset(u.tmp + 1, 0, lim - 1); \1622u.tmp[lim] = 0x80; \1623j = 1 + lim; \1624} else { \1625u.tmp[0] = eb | 0x80; \1626j = 1; \1627} \1628} else { \1629j = lim - kc->ptr; \1630u.tmp[0] = eb; \1631memset(u.tmp + 1, 0, j - 2); \1632u.tmp[j - 1] = 0x80; \1633} \1634keccak_core(kc, u.tmp, j, lim); \1635/* Finalize the "lane complement" */ \1636kc->u.wide[ 1] = ~kc->u.wide[ 1]; \1637kc->u.wide[ 2] = ~kc->u.wide[ 2]; \1638kc->u.wide[ 8] = ~kc->u.wide[ 8]; \1639kc->u.wide[12] = ~kc->u.wide[12]; \1640kc->u.wide[17] = ~kc->u.wide[17]; \1641kc->u.wide[20] = ~kc->u.wide[20]; \1642for (j = 0; j < d; j += 8) \1643sph_enc64le_aligned(u.tmp + j, kc->u.wide[j >> 3]); \1644memcpy(dst, u.tmp, d); \1645keccak_init(kc, (unsigned)d << 3); \1646} \16471648#else16491650#define DEFCLOSE(d, lim) \1651static void keccak_close ## d( \1652sph_keccak_context *kc, unsigned ub, unsigned n, void *dst) \1653{ \1654unsigned eb; \1655union { \1656unsigned char tmp[lim + 1]; \1657sph_u64 dummy; /* for alignment */ \1658} u; \1659size_t j; \1660\1661eb = (0x100 | (ub & 0xFF)) >> (8 - n); \1662if (kc->ptr == (lim - 1)) { \1663if (n == 7) { \1664u.tmp[0] = eb; \1665memset(u.tmp + 1, 0, lim - 1); \1666u.tmp[lim] = 0x80; \1667j = 1 + lim; \1668} else { \1669u.tmp[0] = eb | 0x80; \1670j = 1; \1671} \1672} else { \1673j = lim - kc->ptr; \1674u.tmp[0] = eb; \1675memset(u.tmp + 1, 0, j - 2); \1676u.tmp[j - 1] = 0x80; \1677} \1678keccak_core(kc, u.tmp, j, lim); \1679/* Finalize the "lane complement" */ \1680kc->u.narrow[ 2] = ~kc->u.narrow[ 2]; \1681kc->u.narrow[ 3] = ~kc->u.narrow[ 3]; \1682kc->u.narrow[ 4] = ~kc->u.narrow[ 4]; \1683kc->u.narrow[ 5] = ~kc->u.narrow[ 5]; \1684kc->u.narrow[16] = ~kc->u.narrow[16]; \1685kc->u.narrow[17] = ~kc->u.narrow[17]; \1686kc->u.narrow[24] = ~kc->u.narrow[24]; \1687kc->u.narrow[25] = ~kc->u.narrow[25]; \1688kc->u.narrow[34] = ~kc->u.narrow[34]; \1689kc->u.narrow[35] = ~kc->u.narrow[35]; \1690kc->u.narrow[40] = ~kc->u.narrow[40]; \1691kc->u.narrow[41] = ~kc->u.narrow[41]; \1692/* un-interleave */ \1693for (j = 0; j < 50; j += 2) \1694UNINTERLEAVE(kc->u.narrow[j], kc->u.narrow[j + 1]); \1695for (j = 0; j < d; j += 4) \1696sph_enc32le_aligned(u.tmp + j, kc->u.narrow[j >> 2]); \1697memcpy(dst, u.tmp, d); \1698keccak_init(kc, (unsigned)d << 3); \1699} \17001701#endif17021703DEFCLOSE(28, 144)1704DEFCLOSE(32, 136)1705DEFCLOSE(48, 104)1706DEFCLOSE(64, 72)17071708/* see sph_keccak.h */1709void1710sph_keccak224_init(void *cc)1711{1712keccak_init(cc, 224);1713}17141715/* see sph_keccak.h */1716void1717sph_keccak224(void *cc, const void *data, size_t len)1718{1719keccak_core(cc, data, len, 144);1720}17211722/* see sph_keccak.h */1723void1724sph_keccak224_close(void *cc, void *dst)1725{1726sph_keccak224_addbits_and_close(cc, 0, 0, dst);1727}17281729/* see sph_keccak.h */1730void1731sph_keccak224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)1732{1733keccak_close28(cc, ub, n, dst);1734}17351736/* see sph_keccak.h */1737void1738sph_keccak256_init(void *cc)1739{1740keccak_init(cc, 256);1741}17421743/* see sph_keccak.h */1744void1745sph_keccak256(void *cc, const void *data, size_t len)1746{1747keccak_core(cc, data, len, 136);1748}17491750/* see sph_keccak.h */1751void1752sph_keccak256_close(void *cc, void *dst)1753{1754sph_keccak256_addbits_and_close(cc, 0, 0, dst);1755}17561757/* see sph_keccak.h */1758void1759sph_keccak256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)1760{1761keccak_close32(cc, ub, n, dst);1762}17631764/* see sph_keccak.h */1765void1766sph_keccak384_init(void *cc)1767{1768keccak_init(cc, 384);1769}17701771/* see sph_keccak.h */1772void1773sph_keccak384(void *cc, const void *data, size_t len)1774{1775keccak_core(cc, data, len, 104);1776}17771778/* see sph_keccak.h */1779void1780sph_keccak384_close(void *cc, void *dst)1781{1782sph_keccak384_addbits_and_close(cc, 0, 0, dst);1783}17841785/* see sph_keccak.h */1786void1787sph_keccak384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)1788{1789keccak_close48(cc, ub, n, dst);1790}17911792/* see sph_keccak.h */1793void1794sph_keccak512_init(void *cc)1795{1796keccak_init(cc, 512);1797}17981799/* see sph_keccak.h */1800void1801sph_keccak512(void *cc, const void *data, size_t len)1802{1803keccak_core(cc, data, len, 72);1804}18051806/* see sph_keccak.h */1807void1808sph_keccak512_close(void *cc, void *dst)1809{1810sph_keccak512_addbits_and_close(cc, 0, 0, dst);1811}18121813/* see sph_keccak.h */1814void1815sph_keccak512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)1816{1817keccak_close64(cc, ub, n, dst);1818}181918201821#ifdef __cplusplus1822}1823#endif182418251826