Path: blob/master/sha3/sph_shavite.c
1299 views
/* $Id: shavite.c 227 2010-06-16 17:28:38Z tp $ */1/*2* SHAvite-3 implementation.3*4* ==========================(LICENSE BEGIN)============================5*6* Copyright (c) 2007-2010 Projet RNRT SAPHIR7*8* Permission is hereby granted, free of charge, to any person obtaining9* a copy of this software and associated documentation files (the10* "Software"), to deal in the Software without restriction, including11* without limitation the rights to use, copy, modify, merge, publish,12* distribute, sublicense, and/or sell copies of the Software, and to13* permit persons to whom the Software is furnished to do so, subject to14* the following conditions:15*16* The above copyright notice and this permission notice shall be17* included in all copies or substantial portions of the Software.18*19* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,20* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF21* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.22* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY23* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,24* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE25* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.26*27* ===========================(LICENSE END)=============================28*29* @author Thomas Pornin <[email protected]>30*/3132#include <stddef.h>33#include <string.h>3435#include "sph_shavite.h"3637#ifdef __cplusplus38extern "C"{39#endif4041#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHAVITE42#define SPH_SMALL_FOOTPRINT_SHAVITE 143#endif4445#ifdef _MSC_VER46#pragma warning (disable: 4146)47#endif4849#define C32 SPH_C325051/*52* As of round 2 of the SHA-3 competition, the published reference53* implementation and test vectors are wrong, because they use54* big-endian AES tables while the internal decoding uses little-endian.55* The code below follows the specification. To turn it into a code56* which follows the reference implementation (the one called "BugFix"57* on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out58* the code below (from the '#define AES_BIG_ENDIAN...' to the definition59* of the AES_ROUND_NOKEY macro) and replace it with the version which60* is commented out afterwards.61*/6263#define AES_BIG_ENDIAN 064#include "aes_helper.c"6566static const sph_u32 IV224[] = {67C32(0x6774F31C), C32(0x990AE210), C32(0xC87D4274), C32(0xC9546371),68C32(0x62B2AEA8), C32(0x4B5801D8), C32(0x1B702860), C32(0x842F3017)69};7071static const sph_u32 IV256[] = {72C32(0x49BB3E47), C32(0x2674860D), C32(0xA8B392AC), C32(0x021AC4E6),73C32(0x409283CF), C32(0x620E5D86), C32(0x6D929DCB), C32(0x96CC2A8B)74};7576static const sph_u32 IV384[] = {77C32(0x83DF1545), C32(0xF9AAEC13), C32(0xF4803CB0), C32(0x11FE1F47),78C32(0xDA6CD269), C32(0x4F53FCD7), C32(0x950529A2), C32(0x97908147),79C32(0xB0A4D7AF), C32(0x2B9132BF), C32(0x226E607D), C32(0x3C0F8D7C),80C32(0x487B3F0F), C32(0x04363E22), C32(0x0155C99C), C32(0xEC2E20D3)81};8283static const sph_u32 IV512[] = {84C32(0x72FCCDD8), C32(0x79CA4727), C32(0x128A077B), C32(0x40D55AEC),85C32(0xD1901A06), C32(0x430AE307), C32(0xB29F5CD1), C32(0xDF07FBFC),86C32(0x8E45D73D), C32(0x681AB538), C32(0xBDE86578), C32(0xDD577E47),87C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)88};8990#define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \91sph_u32 t0 = (x0); \92sph_u32 t1 = (x1); \93sph_u32 t2 = (x2); \94sph_u32 t3 = (x3); \95AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \96} while (0)9798/*99* This is the code needed to match the "reference implementation" as100* published on Nov 23rd, 2009, instead of the published specification.101*102103#define AES_BIG_ENDIAN 1104#include "aes_helper.c"105106static const sph_u32 IV224[] = {107C32(0xC4C67795), C32(0xC0B1817F), C32(0xEAD88924), C32(0x1ABB1BB0),108C32(0xE0C29152), C32(0xBDE046BA), C32(0xAEEECF99), C32(0x58D509D8)109};110111static const sph_u32 IV256[] = {112C32(0x3EECF551), C32(0xBF10819B), C32(0xE6DC8559), C32(0xF3E23FD5),113C32(0x431AEC73), C32(0x79E3F731), C32(0x98325F05), C32(0xA92A31F1)114};115116static const sph_u32 IV384[] = {117C32(0x71F48510), C32(0xA903A8AC), C32(0xFE3216DD), C32(0x0B2D2AD4),118C32(0x6672900A), C32(0x41032819), C32(0x15A7D780), C32(0xB3CAB8D9),119C32(0x34EF4711), C32(0xDE019FE8), C32(0x4D674DC4), C32(0xE056D96B),120C32(0xA35C016B), C32(0xDD903BA7), C32(0x8C1B09B4), C32(0x2C3E9F25)121};122123static const sph_u32 IV512[] = {124C32(0xD5652B63), C32(0x25F1E6EA), C32(0xB18F48FA), C32(0xA1EE3A47),125C32(0xC8B67B07), C32(0xBDCE48D3), C32(0xE3937B78), C32(0x05DB5186),126C32(0x613BE326), C32(0xA11FA303), C32(0x90C833D4), C32(0x79CEE316),127C32(0x1E1AF00F), C32(0x2829B165), C32(0x23B25F80), C32(0x21E11499)128};129130#define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \131sph_u32 t0 = (x0); \132sph_u32 t1 = (x1); \133sph_u32 t2 = (x2); \134sph_u32 t3 = (x3); \135AES_ROUND_NOKEY_BE(t0, t1, t2, t3, x0, x1, x2, x3); \136} while (0)137138*/139140#define KEY_EXPAND_ELT(k0, k1, k2, k3) do { \141sph_u32 kt; \142AES_ROUND_NOKEY(k1, k2, k3, k0); \143kt = (k0); \144(k0) = (k1); \145(k1) = (k2); \146(k2) = (k3); \147(k3) = kt; \148} while (0)149150#if SPH_SMALL_FOOTPRINT_SHAVITE151152/*153* This function assumes that "msg" is aligned for 32-bit access.154*/155static void156c256(sph_shavite_small_context *sc, const void *msg)157{158sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;159sph_u32 rk[144];160size_t u;161int r, s;162163#if SPH_LITTLE_ENDIAN164memcpy(rk, msg, 64);165#else166for (u = 0; u < 16; u += 4) {167rk[u + 0] = sph_dec32le_aligned(168(const unsigned char *)msg + (u << 2) + 0);169rk[u + 1] = sph_dec32le_aligned(170(const unsigned char *)msg + (u << 2) + 4);171rk[u + 2] = sph_dec32le_aligned(172(const unsigned char *)msg + (u << 2) + 8);173rk[u + 3] = sph_dec32le_aligned(174(const unsigned char *)msg + (u << 2) + 12);175}176#endif177u = 16;178for (r = 0; r < 4; r ++) {179for (s = 0; s < 2; s ++) {180sph_u32 x0, x1, x2, x3;181182x0 = rk[u - 15];183x1 = rk[u - 14];184x2 = rk[u - 13];185x3 = rk[u - 16];186AES_ROUND_NOKEY(x0, x1, x2, x3);187rk[u + 0] = x0 ^ rk[u - 4];188rk[u + 1] = x1 ^ rk[u - 3];189rk[u + 2] = x2 ^ rk[u - 2];190rk[u + 3] = x3 ^ rk[u - 1];191if (u == 16) {192rk[ 16] ^= sc->count0;193rk[ 17] ^= SPH_T32(~sc->count1);194} else if (u == 56) {195rk[ 57] ^= sc->count1;196rk[ 58] ^= SPH_T32(~sc->count0);197}198u += 4;199200x0 = rk[u - 15];201x1 = rk[u - 14];202x2 = rk[u - 13];203x3 = rk[u - 16];204AES_ROUND_NOKEY(x0, x1, x2, x3);205rk[u + 0] = x0 ^ rk[u - 4];206rk[u + 1] = x1 ^ rk[u - 3];207rk[u + 2] = x2 ^ rk[u - 2];208rk[u + 3] = x3 ^ rk[u - 1];209if (u == 84) {210rk[ 86] ^= sc->count1;211rk[ 87] ^= SPH_T32(~sc->count0);212} else if (u == 124) {213rk[124] ^= sc->count0;214rk[127] ^= SPH_T32(~sc->count1);215}216u += 4;217}218for (s = 0; s < 4; s ++) {219rk[u + 0] = rk[u - 16] ^ rk[u - 3];220rk[u + 1] = rk[u - 15] ^ rk[u - 2];221rk[u + 2] = rk[u - 14] ^ rk[u - 1];222rk[u + 3] = rk[u - 13] ^ rk[u - 0];223u += 4;224}225}226227p0 = sc->h[0x0];228p1 = sc->h[0x1];229p2 = sc->h[0x2];230p3 = sc->h[0x3];231p4 = sc->h[0x4];232p5 = sc->h[0x5];233p6 = sc->h[0x6];234p7 = sc->h[0x7];235u = 0;236for (r = 0; r < 6; r ++) {237sph_u32 x0, x1, x2, x3;238239x0 = p4 ^ rk[u ++];240x1 = p5 ^ rk[u ++];241x2 = p6 ^ rk[u ++];242x3 = p7 ^ rk[u ++];243AES_ROUND_NOKEY(x0, x1, x2, x3);244x0 ^= rk[u ++];245x1 ^= rk[u ++];246x2 ^= rk[u ++];247x3 ^= rk[u ++];248AES_ROUND_NOKEY(x0, x1, x2, x3);249x0 ^= rk[u ++];250x1 ^= rk[u ++];251x2 ^= rk[u ++];252x3 ^= rk[u ++];253AES_ROUND_NOKEY(x0, x1, x2, x3);254p0 ^= x0;255p1 ^= x1;256p2 ^= x2;257p3 ^= x3;258259x0 = p0 ^ rk[u ++];260x1 = p1 ^ rk[u ++];261x2 = p2 ^ rk[u ++];262x3 = p3 ^ rk[u ++];263AES_ROUND_NOKEY(x0, x1, x2, x3);264x0 ^= rk[u ++];265x1 ^= rk[u ++];266x2 ^= rk[u ++];267x3 ^= rk[u ++];268AES_ROUND_NOKEY(x0, x1, x2, x3);269x0 ^= rk[u ++];270x1 ^= rk[u ++];271x2 ^= rk[u ++];272x3 ^= rk[u ++];273AES_ROUND_NOKEY(x0, x1, x2, x3);274p4 ^= x0;275p5 ^= x1;276p6 ^= x2;277p7 ^= x3;278}279sc->h[0x0] ^= p0;280sc->h[0x1] ^= p1;281sc->h[0x2] ^= p2;282sc->h[0x3] ^= p3;283sc->h[0x4] ^= p4;284sc->h[0x5] ^= p5;285sc->h[0x6] ^= p6;286sc->h[0x7] ^= p7;287}288289#else290291/*292* This function assumes that "msg" is aligned for 32-bit access.293*/294static void295c256(sph_shavite_small_context *sc, const void *msg)296{297sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;298sph_u32 x0, x1, x2, x3;299sph_u32 rk0, rk1, rk2, rk3, rk4, rk5, rk6, rk7;300sph_u32 rk8, rk9, rkA, rkB, rkC, rkD, rkE, rkF;301302p0 = sc->h[0x0];303p1 = sc->h[0x1];304p2 = sc->h[0x2];305p3 = sc->h[0x3];306p4 = sc->h[0x4];307p5 = sc->h[0x5];308p6 = sc->h[0x6];309p7 = sc->h[0x7];310/* round 0 */311rk0 = sph_dec32le_aligned((const unsigned char *)msg + 0);312x0 = p4 ^ rk0;313rk1 = sph_dec32le_aligned((const unsigned char *)msg + 4);314x1 = p5 ^ rk1;315rk2 = sph_dec32le_aligned((const unsigned char *)msg + 8);316x2 = p6 ^ rk2;317rk3 = sph_dec32le_aligned((const unsigned char *)msg + 12);318x3 = p7 ^ rk3;319AES_ROUND_NOKEY(x0, x1, x2, x3);320rk4 = sph_dec32le_aligned((const unsigned char *)msg + 16);321x0 ^= rk4;322rk5 = sph_dec32le_aligned((const unsigned char *)msg + 20);323x1 ^= rk5;324rk6 = sph_dec32le_aligned((const unsigned char *)msg + 24);325x2 ^= rk6;326rk7 = sph_dec32le_aligned((const unsigned char *)msg + 28);327x3 ^= rk7;328AES_ROUND_NOKEY(x0, x1, x2, x3);329rk8 = sph_dec32le_aligned((const unsigned char *)msg + 32);330x0 ^= rk8;331rk9 = sph_dec32le_aligned((const unsigned char *)msg + 36);332x1 ^= rk9;333rkA = sph_dec32le_aligned((const unsigned char *)msg + 40);334x2 ^= rkA;335rkB = sph_dec32le_aligned((const unsigned char *)msg + 44);336x3 ^= rkB;337AES_ROUND_NOKEY(x0, x1, x2, x3);338p0 ^= x0;339p1 ^= x1;340p2 ^= x2;341p3 ^= x3;342/* round 1 */343rkC = sph_dec32le_aligned((const unsigned char *)msg + 48);344x0 = p0 ^ rkC;345rkD = sph_dec32le_aligned((const unsigned char *)msg + 52);346x1 = p1 ^ rkD;347rkE = sph_dec32le_aligned((const unsigned char *)msg + 56);348x2 = p2 ^ rkE;349rkF = sph_dec32le_aligned((const unsigned char *)msg + 60);350x3 = p3 ^ rkF;351AES_ROUND_NOKEY(x0, x1, x2, x3);352KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);353rk0 ^= rkC ^ sc->count0;354rk1 ^= rkD ^ SPH_T32(~sc->count1);355rk2 ^= rkE;356rk3 ^= rkF;357x0 ^= rk0;358x1 ^= rk1;359x2 ^= rk2;360x3 ^= rk3;361AES_ROUND_NOKEY(x0, x1, x2, x3);362KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);363rk4 ^= rk0;364rk5 ^= rk1;365rk6 ^= rk2;366rk7 ^= rk3;367x0 ^= rk4;368x1 ^= rk5;369x2 ^= rk6;370x3 ^= rk7;371AES_ROUND_NOKEY(x0, x1, x2, x3);372p4 ^= x0;373p5 ^= x1;374p6 ^= x2;375p7 ^= x3;376/* round 2 */377KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);378rk8 ^= rk4;379rk9 ^= rk5;380rkA ^= rk6;381rkB ^= rk7;382x0 = p4 ^ rk8;383x1 = p5 ^ rk9;384x2 = p6 ^ rkA;385x3 = p7 ^ rkB;386AES_ROUND_NOKEY(x0, x1, x2, x3);387KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);388rkC ^= rk8;389rkD ^= rk9;390rkE ^= rkA;391rkF ^= rkB;392x0 ^= rkC;393x1 ^= rkD;394x2 ^= rkE;395x3 ^= rkF;396AES_ROUND_NOKEY(x0, x1, x2, x3);397rk0 ^= rkD;398x0 ^= rk0;399rk1 ^= rkE;400x1 ^= rk1;401rk2 ^= rkF;402x2 ^= rk2;403rk3 ^= rk0;404x3 ^= rk3;405AES_ROUND_NOKEY(x0, x1, x2, x3);406p0 ^= x0;407p1 ^= x1;408p2 ^= x2;409p3 ^= x3;410/* round 3 */411rk4 ^= rk1;412x0 = p0 ^ rk4;413rk5 ^= rk2;414x1 = p1 ^ rk5;415rk6 ^= rk3;416x2 = p2 ^ rk6;417rk7 ^= rk4;418x3 = p3 ^ rk7;419AES_ROUND_NOKEY(x0, x1, x2, x3);420rk8 ^= rk5;421x0 ^= rk8;422rk9 ^= rk6;423x1 ^= rk9;424rkA ^= rk7;425x2 ^= rkA;426rkB ^= rk8;427x3 ^= rkB;428AES_ROUND_NOKEY(x0, x1, x2, x3);429rkC ^= rk9;430x0 ^= rkC;431rkD ^= rkA;432x1 ^= rkD;433rkE ^= rkB;434x2 ^= rkE;435rkF ^= rkC;436x3 ^= rkF;437AES_ROUND_NOKEY(x0, x1, x2, x3);438p4 ^= x0;439p5 ^= x1;440p6 ^= x2;441p7 ^= x3;442/* round 4 */443KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);444rk0 ^= rkC;445rk1 ^= rkD;446rk2 ^= rkE;447rk3 ^= rkF;448x0 = p4 ^ rk0;449x1 = p5 ^ rk1;450x2 = p6 ^ rk2;451x3 = p7 ^ rk3;452AES_ROUND_NOKEY(x0, x1, x2, x3);453KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);454rk4 ^= rk0;455rk5 ^= rk1;456rk6 ^= rk2;457rk7 ^= rk3;458x0 ^= rk4;459x1 ^= rk5;460x2 ^= rk6;461x3 ^= rk7;462AES_ROUND_NOKEY(x0, x1, x2, x3);463KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);464rk8 ^= rk4;465rk9 ^= rk5 ^ sc->count1;466rkA ^= rk6 ^ SPH_T32(~sc->count0);467rkB ^= rk7;468x0 ^= rk8;469x1 ^= rk9;470x2 ^= rkA;471x3 ^= rkB;472AES_ROUND_NOKEY(x0, x1, x2, x3);473p0 ^= x0;474p1 ^= x1;475p2 ^= x2;476p3 ^= x3;477/* round 5 */478KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);479rkC ^= rk8;480rkD ^= rk9;481rkE ^= rkA;482rkF ^= rkB;483x0 = p0 ^ rkC;484x1 = p1 ^ rkD;485x2 = p2 ^ rkE;486x3 = p3 ^ rkF;487AES_ROUND_NOKEY(x0, x1, x2, x3);488rk0 ^= rkD;489x0 ^= rk0;490rk1 ^= rkE;491x1 ^= rk1;492rk2 ^= rkF;493x2 ^= rk2;494rk3 ^= rk0;495x3 ^= rk3;496AES_ROUND_NOKEY(x0, x1, x2, x3);497rk4 ^= rk1;498x0 ^= rk4;499rk5 ^= rk2;500x1 ^= rk5;501rk6 ^= rk3;502x2 ^= rk6;503rk7 ^= rk4;504x3 ^= rk7;505AES_ROUND_NOKEY(x0, x1, x2, x3);506p4 ^= x0;507p5 ^= x1;508p6 ^= x2;509p7 ^= x3;510/* round 6 */511rk8 ^= rk5;512x0 = p4 ^ rk8;513rk9 ^= rk6;514x1 = p5 ^ rk9;515rkA ^= rk7;516x2 = p6 ^ rkA;517rkB ^= rk8;518x3 = p7 ^ rkB;519AES_ROUND_NOKEY(x0, x1, x2, x3);520rkC ^= rk9;521x0 ^= rkC;522rkD ^= rkA;523x1 ^= rkD;524rkE ^= rkB;525x2 ^= rkE;526rkF ^= rkC;527x3 ^= rkF;528AES_ROUND_NOKEY(x0, x1, x2, x3);529KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);530rk0 ^= rkC;531rk1 ^= rkD;532rk2 ^= rkE;533rk3 ^= rkF;534x0 ^= rk0;535x1 ^= rk1;536x2 ^= rk2;537x3 ^= rk3;538AES_ROUND_NOKEY(x0, x1, x2, x3);539p0 ^= x0;540p1 ^= x1;541p2 ^= x2;542p3 ^= x3;543/* round 7 */544KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);545rk4 ^= rk0;546rk5 ^= rk1;547rk6 ^= rk2 ^ sc->count1;548rk7 ^= rk3 ^ SPH_T32(~sc->count0);549x0 = p0 ^ rk4;550x1 = p1 ^ rk5;551x2 = p2 ^ rk6;552x3 = p3 ^ rk7;553AES_ROUND_NOKEY(x0, x1, x2, x3);554KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);555rk8 ^= rk4;556rk9 ^= rk5;557rkA ^= rk6;558rkB ^= rk7;559x0 ^= rk8;560x1 ^= rk9;561x2 ^= rkA;562x3 ^= rkB;563AES_ROUND_NOKEY(x0, x1, x2, x3);564KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);565rkC ^= rk8;566rkD ^= rk9;567rkE ^= rkA;568rkF ^= rkB;569x0 ^= rkC;570x1 ^= rkD;571x2 ^= rkE;572x3 ^= rkF;573AES_ROUND_NOKEY(x0, x1, x2, x3);574p4 ^= x0;575p5 ^= x1;576p6 ^= x2;577p7 ^= x3;578/* round 8 */579rk0 ^= rkD;580x0 = p4 ^ rk0;581rk1 ^= rkE;582x1 = p5 ^ rk1;583rk2 ^= rkF;584x2 = p6 ^ rk2;585rk3 ^= rk0;586x3 = p7 ^ rk3;587AES_ROUND_NOKEY(x0, x1, x2, x3);588rk4 ^= rk1;589x0 ^= rk4;590rk5 ^= rk2;591x1 ^= rk5;592rk6 ^= rk3;593x2 ^= rk6;594rk7 ^= rk4;595x3 ^= rk7;596AES_ROUND_NOKEY(x0, x1, x2, x3);597rk8 ^= rk5;598x0 ^= rk8;599rk9 ^= rk6;600x1 ^= rk9;601rkA ^= rk7;602x2 ^= rkA;603rkB ^= rk8;604x3 ^= rkB;605AES_ROUND_NOKEY(x0, x1, x2, x3);606p0 ^= x0;607p1 ^= x1;608p2 ^= x2;609p3 ^= x3;610/* round 9 */611rkC ^= rk9;612x0 = p0 ^ rkC;613rkD ^= rkA;614x1 = p1 ^ rkD;615rkE ^= rkB;616x2 = p2 ^ rkE;617rkF ^= rkC;618x3 = p3 ^ rkF;619AES_ROUND_NOKEY(x0, x1, x2, x3);620KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);621rk0 ^= rkC;622rk1 ^= rkD;623rk2 ^= rkE;624rk3 ^= rkF;625x0 ^= rk0;626x1 ^= rk1;627x2 ^= rk2;628x3 ^= rk3;629AES_ROUND_NOKEY(x0, x1, x2, x3);630KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);631rk4 ^= rk0;632rk5 ^= rk1;633rk6 ^= rk2;634rk7 ^= rk3;635x0 ^= rk4;636x1 ^= rk5;637x2 ^= rk6;638x3 ^= rk7;639AES_ROUND_NOKEY(x0, x1, x2, x3);640p4 ^= x0;641p5 ^= x1;642p6 ^= x2;643p7 ^= x3;644/* round 10 */645KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);646rk8 ^= rk4;647rk9 ^= rk5;648rkA ^= rk6;649rkB ^= rk7;650x0 = p4 ^ rk8;651x1 = p5 ^ rk9;652x2 = p6 ^ rkA;653x3 = p7 ^ rkB;654AES_ROUND_NOKEY(x0, x1, x2, x3);655KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);656rkC ^= rk8 ^ sc->count0;657rkD ^= rk9;658rkE ^= rkA;659rkF ^= rkB ^ SPH_T32(~sc->count1);660x0 ^= rkC;661x1 ^= rkD;662x2 ^= rkE;663x3 ^= rkF;664AES_ROUND_NOKEY(x0, x1, x2, x3);665rk0 ^= rkD;666x0 ^= rk0;667rk1 ^= rkE;668x1 ^= rk1;669rk2 ^= rkF;670x2 ^= rk2;671rk3 ^= rk0;672x3 ^= rk3;673AES_ROUND_NOKEY(x0, x1, x2, x3);674p0 ^= x0;675p1 ^= x1;676p2 ^= x2;677p3 ^= x3;678/* round 11 */679rk4 ^= rk1;680x0 = p0 ^ rk4;681rk5 ^= rk2;682x1 = p1 ^ rk5;683rk6 ^= rk3;684x2 = p2 ^ rk6;685rk7 ^= rk4;686x3 = p3 ^ rk7;687AES_ROUND_NOKEY(x0, x1, x2, x3);688rk8 ^= rk5;689x0 ^= rk8;690rk9 ^= rk6;691x1 ^= rk9;692rkA ^= rk7;693x2 ^= rkA;694rkB ^= rk8;695x3 ^= rkB;696AES_ROUND_NOKEY(x0, x1, x2, x3);697rkC ^= rk9;698x0 ^= rkC;699rkD ^= rkA;700x1 ^= rkD;701rkE ^= rkB;702x2 ^= rkE;703rkF ^= rkC;704x3 ^= rkF;705AES_ROUND_NOKEY(x0, x1, x2, x3);706p4 ^= x0;707p5 ^= x1;708p6 ^= x2;709p7 ^= x3;710sc->h[0x0] ^= p0;711sc->h[0x1] ^= p1;712sc->h[0x2] ^= p2;713sc->h[0x3] ^= p3;714sc->h[0x4] ^= p4;715sc->h[0x5] ^= p5;716sc->h[0x6] ^= p6;717sc->h[0x7] ^= p7;718}719720#endif721722#if SPH_SMALL_FOOTPRINT_SHAVITE723724/*725* This function assumes that "msg" is aligned for 32-bit access.726*/727static void728c512(sph_shavite_big_context *sc, const void *msg)729{730sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;731sph_u32 p8, p9, pA, pB, pC, pD, pE, pF;732sph_u32 rk[448];733size_t u;734int r, s;735736#if SPH_LITTLE_ENDIAN737memcpy(rk, msg, 128);738#else739for (u = 0; u < 32; u += 4) {740rk[u + 0] = sph_dec32le_aligned(741(const unsigned char *)msg + (u << 2) + 0);742rk[u + 1] = sph_dec32le_aligned(743(const unsigned char *)msg + (u << 2) + 4);744rk[u + 2] = sph_dec32le_aligned(745(const unsigned char *)msg + (u << 2) + 8);746rk[u + 3] = sph_dec32le_aligned(747(const unsigned char *)msg + (u << 2) + 12);748}749#endif750u = 32;751for (;;) {752for (s = 0; s < 4; s ++) {753sph_u32 x0, x1, x2, x3;754755x0 = rk[u - 31];756x1 = rk[u - 30];757x2 = rk[u - 29];758x3 = rk[u - 32];759AES_ROUND_NOKEY(x0, x1, x2, x3);760rk[u + 0] = x0 ^ rk[u - 4];761rk[u + 1] = x1 ^ rk[u - 3];762rk[u + 2] = x2 ^ rk[u - 2];763rk[u + 3] = x3 ^ rk[u - 1];764if (u == 32) {765rk[ 32] ^= sc->count0;766rk[ 33] ^= sc->count1;767rk[ 34] ^= sc->count2;768rk[ 35] ^= SPH_T32(~sc->count3);769} else if (u == 440) {770rk[440] ^= sc->count1;771rk[441] ^= sc->count0;772rk[442] ^= sc->count3;773rk[443] ^= SPH_T32(~sc->count2);774}775u += 4;776777x0 = rk[u - 31];778x1 = rk[u - 30];779x2 = rk[u - 29];780x3 = rk[u - 32];781AES_ROUND_NOKEY(x0, x1, x2, x3);782rk[u + 0] = x0 ^ rk[u - 4];783rk[u + 1] = x1 ^ rk[u - 3];784rk[u + 2] = x2 ^ rk[u - 2];785rk[u + 3] = x3 ^ rk[u - 1];786if (u == 164) {787rk[164] ^= sc->count3;788rk[165] ^= sc->count2;789rk[166] ^= sc->count1;790rk[167] ^= SPH_T32(~sc->count0);791} else if (u == 316) {792rk[316] ^= sc->count2;793rk[317] ^= sc->count3;794rk[318] ^= sc->count0;795rk[319] ^= SPH_T32(~sc->count1);796}797u += 4;798}799if (u == 448)800break;801for (s = 0; s < 8; s ++) {802rk[u + 0] = rk[u - 32] ^ rk[u - 7];803rk[u + 1] = rk[u - 31] ^ rk[u - 6];804rk[u + 2] = rk[u - 30] ^ rk[u - 5];805rk[u + 3] = rk[u - 29] ^ rk[u - 4];806u += 4;807}808}809810p0 = sc->h[0x0];811p1 = sc->h[0x1];812p2 = sc->h[0x2];813p3 = sc->h[0x3];814p4 = sc->h[0x4];815p5 = sc->h[0x5];816p6 = sc->h[0x6];817p7 = sc->h[0x7];818p8 = sc->h[0x8];819p9 = sc->h[0x9];820pA = sc->h[0xA];821pB = sc->h[0xB];822pC = sc->h[0xC];823pD = sc->h[0xD];824pE = sc->h[0xE];825pF = sc->h[0xF];826u = 0;827for (r = 0; r < 14; r ++) {828#define C512_ELT(l0, l1, l2, l3, r0, r1, r2, r3) do { \829sph_u32 x0, x1, x2, x3; \830x0 = r0 ^ rk[u ++]; \831x1 = r1 ^ rk[u ++]; \832x2 = r2 ^ rk[u ++]; \833x3 = r3 ^ rk[u ++]; \834AES_ROUND_NOKEY(x0, x1, x2, x3); \835x0 ^= rk[u ++]; \836x1 ^= rk[u ++]; \837x2 ^= rk[u ++]; \838x3 ^= rk[u ++]; \839AES_ROUND_NOKEY(x0, x1, x2, x3); \840x0 ^= rk[u ++]; \841x1 ^= rk[u ++]; \842x2 ^= rk[u ++]; \843x3 ^= rk[u ++]; \844AES_ROUND_NOKEY(x0, x1, x2, x3); \845x0 ^= rk[u ++]; \846x1 ^= rk[u ++]; \847x2 ^= rk[u ++]; \848x3 ^= rk[u ++]; \849AES_ROUND_NOKEY(x0, x1, x2, x3); \850l0 ^= x0; \851l1 ^= x1; \852l2 ^= x2; \853l3 ^= x3; \854} while (0)855856#define WROT(a, b, c, d) do { \857sph_u32 t = d; \858d = c; \859c = b; \860b = a; \861a = t; \862} while (0)863864C512_ELT(p0, p1, p2, p3, p4, p5, p6, p7);865C512_ELT(p8, p9, pA, pB, pC, pD, pE, pF);866867WROT(p0, p4, p8, pC);868WROT(p1, p5, p9, pD);869WROT(p2, p6, pA, pE);870WROT(p3, p7, pB, pF);871872#undef C512_ELT873#undef WROT874}875sc->h[0x0] ^= p0;876sc->h[0x1] ^= p1;877sc->h[0x2] ^= p2;878sc->h[0x3] ^= p3;879sc->h[0x4] ^= p4;880sc->h[0x5] ^= p5;881sc->h[0x6] ^= p6;882sc->h[0x7] ^= p7;883sc->h[0x8] ^= p8;884sc->h[0x9] ^= p9;885sc->h[0xA] ^= pA;886sc->h[0xB] ^= pB;887sc->h[0xC] ^= pC;888sc->h[0xD] ^= pD;889sc->h[0xE] ^= pE;890sc->h[0xF] ^= pF;891}892893#else894895/*896* This function assumes that "msg" is aligned for 32-bit access.897*/898static void899c512(sph_shavite_big_context *sc, const void *msg)900{901sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;902sph_u32 p8, p9, pA, pB, pC, pD, pE, pF;903sph_u32 x0, x1, x2, x3;904sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07;905sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;906sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;907sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;908int r;909910p0 = sc->h[0x0];911p1 = sc->h[0x1];912p2 = sc->h[0x2];913p3 = sc->h[0x3];914p4 = sc->h[0x4];915p5 = sc->h[0x5];916p6 = sc->h[0x6];917p7 = sc->h[0x7];918p8 = sc->h[0x8];919p9 = sc->h[0x9];920pA = sc->h[0xA];921pB = sc->h[0xB];922pC = sc->h[0xC];923pD = sc->h[0xD];924pE = sc->h[0xE];925pF = sc->h[0xF];926/* round 0 */927rk00 = sph_dec32le_aligned((const unsigned char *)msg + 0);928x0 = p4 ^ rk00;929rk01 = sph_dec32le_aligned((const unsigned char *)msg + 4);930x1 = p5 ^ rk01;931rk02 = sph_dec32le_aligned((const unsigned char *)msg + 8);932x2 = p6 ^ rk02;933rk03 = sph_dec32le_aligned((const unsigned char *)msg + 12);934x3 = p7 ^ rk03;935AES_ROUND_NOKEY(x0, x1, x2, x3);936rk04 = sph_dec32le_aligned((const unsigned char *)msg + 16);937x0 ^= rk04;938rk05 = sph_dec32le_aligned((const unsigned char *)msg + 20);939x1 ^= rk05;940rk06 = sph_dec32le_aligned((const unsigned char *)msg + 24);941x2 ^= rk06;942rk07 = sph_dec32le_aligned((const unsigned char *)msg + 28);943x3 ^= rk07;944AES_ROUND_NOKEY(x0, x1, x2, x3);945rk08 = sph_dec32le_aligned((const unsigned char *)msg + 32);946x0 ^= rk08;947rk09 = sph_dec32le_aligned((const unsigned char *)msg + 36);948x1 ^= rk09;949rk0A = sph_dec32le_aligned((const unsigned char *)msg + 40);950x2 ^= rk0A;951rk0B = sph_dec32le_aligned((const unsigned char *)msg + 44);952x3 ^= rk0B;953AES_ROUND_NOKEY(x0, x1, x2, x3);954rk0C = sph_dec32le_aligned((const unsigned char *)msg + 48);955x0 ^= rk0C;956rk0D = sph_dec32le_aligned((const unsigned char *)msg + 52);957x1 ^= rk0D;958rk0E = sph_dec32le_aligned((const unsigned char *)msg + 56);959x2 ^= rk0E;960rk0F = sph_dec32le_aligned((const unsigned char *)msg + 60);961x3 ^= rk0F;962AES_ROUND_NOKEY(x0, x1, x2, x3);963p0 ^= x0;964p1 ^= x1;965p2 ^= x2;966p3 ^= x3;967rk10 = sph_dec32le_aligned((const unsigned char *)msg + 64);968x0 = pC ^ rk10;969rk11 = sph_dec32le_aligned((const unsigned char *)msg + 68);970x1 = pD ^ rk11;971rk12 = sph_dec32le_aligned((const unsigned char *)msg + 72);972x2 = pE ^ rk12;973rk13 = sph_dec32le_aligned((const unsigned char *)msg + 76);974x3 = pF ^ rk13;975AES_ROUND_NOKEY(x0, x1, x2, x3);976rk14 = sph_dec32le_aligned((const unsigned char *)msg + 80);977x0 ^= rk14;978rk15 = sph_dec32le_aligned((const unsigned char *)msg + 84);979x1 ^= rk15;980rk16 = sph_dec32le_aligned((const unsigned char *)msg + 88);981x2 ^= rk16;982rk17 = sph_dec32le_aligned((const unsigned char *)msg + 92);983x3 ^= rk17;984AES_ROUND_NOKEY(x0, x1, x2, x3);985rk18 = sph_dec32le_aligned((const unsigned char *)msg + 96);986x0 ^= rk18;987rk19 = sph_dec32le_aligned((const unsigned char *)msg + 100);988x1 ^= rk19;989rk1A = sph_dec32le_aligned((const unsigned char *)msg + 104);990x2 ^= rk1A;991rk1B = sph_dec32le_aligned((const unsigned char *)msg + 108);992x3 ^= rk1B;993AES_ROUND_NOKEY(x0, x1, x2, x3);994rk1C = sph_dec32le_aligned((const unsigned char *)msg + 112);995x0 ^= rk1C;996rk1D = sph_dec32le_aligned((const unsigned char *)msg + 116);997x1 ^= rk1D;998rk1E = sph_dec32le_aligned((const unsigned char *)msg + 120);999x2 ^= rk1E;1000rk1F = sph_dec32le_aligned((const unsigned char *)msg + 124);1001x3 ^= rk1F;1002AES_ROUND_NOKEY(x0, x1, x2, x3);1003p8 ^= x0;1004p9 ^= x1;1005pA ^= x2;1006pB ^= x3;10071008for (r = 0; r < 3; r ++) {1009/* round 1, 5, 9 */1010KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);1011rk00 ^= rk1C;1012rk01 ^= rk1D;1013rk02 ^= rk1E;1014rk03 ^= rk1F;1015if (r == 0) {1016rk00 ^= sc->count0;1017rk01 ^= sc->count1;1018rk02 ^= sc->count2;1019rk03 ^= SPH_T32(~sc->count3);1020}1021x0 = p0 ^ rk00;1022x1 = p1 ^ rk01;1023x2 = p2 ^ rk02;1024x3 = p3 ^ rk03;1025AES_ROUND_NOKEY(x0, x1, x2, x3);1026KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);1027rk04 ^= rk00;1028rk05 ^= rk01;1029rk06 ^= rk02;1030rk07 ^= rk03;1031if (r == 1) {1032rk04 ^= sc->count3;1033rk05 ^= sc->count2;1034rk06 ^= sc->count1;1035rk07 ^= SPH_T32(~sc->count0);1036}1037x0 ^= rk04;1038x1 ^= rk05;1039x2 ^= rk06;1040x3 ^= rk07;1041AES_ROUND_NOKEY(x0, x1, x2, x3);1042KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);1043rk08 ^= rk04;1044rk09 ^= rk05;1045rk0A ^= rk06;1046rk0B ^= rk07;1047x0 ^= rk08;1048x1 ^= rk09;1049x2 ^= rk0A;1050x3 ^= rk0B;1051AES_ROUND_NOKEY(x0, x1, x2, x3);1052KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);1053rk0C ^= rk08;1054rk0D ^= rk09;1055rk0E ^= rk0A;1056rk0F ^= rk0B;1057x0 ^= rk0C;1058x1 ^= rk0D;1059x2 ^= rk0E;1060x3 ^= rk0F;1061AES_ROUND_NOKEY(x0, x1, x2, x3);1062pC ^= x0;1063pD ^= x1;1064pE ^= x2;1065pF ^= x3;1066KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);1067rk10 ^= rk0C;1068rk11 ^= rk0D;1069rk12 ^= rk0E;1070rk13 ^= rk0F;1071x0 = p8 ^ rk10;1072x1 = p9 ^ rk11;1073x2 = pA ^ rk12;1074x3 = pB ^ rk13;1075AES_ROUND_NOKEY(x0, x1, x2, x3);1076KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);1077rk14 ^= rk10;1078rk15 ^= rk11;1079rk16 ^= rk12;1080rk17 ^= rk13;1081x0 ^= rk14;1082x1 ^= rk15;1083x2 ^= rk16;1084x3 ^= rk17;1085AES_ROUND_NOKEY(x0, x1, x2, x3);1086KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);1087rk18 ^= rk14;1088rk19 ^= rk15;1089rk1A ^= rk16;1090rk1B ^= rk17;1091x0 ^= rk18;1092x1 ^= rk19;1093x2 ^= rk1A;1094x3 ^= rk1B;1095AES_ROUND_NOKEY(x0, x1, x2, x3);1096KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);1097rk1C ^= rk18;1098rk1D ^= rk19;1099rk1E ^= rk1A;1100rk1F ^= rk1B;1101if (r == 2) {1102rk1C ^= sc->count2;1103rk1D ^= sc->count3;1104rk1E ^= sc->count0;1105rk1F ^= SPH_T32(~sc->count1);1106}1107x0 ^= rk1C;1108x1 ^= rk1D;1109x2 ^= rk1E;1110x3 ^= rk1F;1111AES_ROUND_NOKEY(x0, x1, x2, x3);1112p4 ^= x0;1113p5 ^= x1;1114p6 ^= x2;1115p7 ^= x3;1116/* round 2, 6, 10 */1117rk00 ^= rk19;1118x0 = pC ^ rk00;1119rk01 ^= rk1A;1120x1 = pD ^ rk01;1121rk02 ^= rk1B;1122x2 = pE ^ rk02;1123rk03 ^= rk1C;1124x3 = pF ^ rk03;1125AES_ROUND_NOKEY(x0, x1, x2, x3);1126rk04 ^= rk1D;1127x0 ^= rk04;1128rk05 ^= rk1E;1129x1 ^= rk05;1130rk06 ^= rk1F;1131x2 ^= rk06;1132rk07 ^= rk00;1133x3 ^= rk07;1134AES_ROUND_NOKEY(x0, x1, x2, x3);1135rk08 ^= rk01;1136x0 ^= rk08;1137rk09 ^= rk02;1138x1 ^= rk09;1139rk0A ^= rk03;1140x2 ^= rk0A;1141rk0B ^= rk04;1142x3 ^= rk0B;1143AES_ROUND_NOKEY(x0, x1, x2, x3);1144rk0C ^= rk05;1145x0 ^= rk0C;1146rk0D ^= rk06;1147x1 ^= rk0D;1148rk0E ^= rk07;1149x2 ^= rk0E;1150rk0F ^= rk08;1151x3 ^= rk0F;1152AES_ROUND_NOKEY(x0, x1, x2, x3);1153p8 ^= x0;1154p9 ^= x1;1155pA ^= x2;1156pB ^= x3;1157rk10 ^= rk09;1158x0 = p4 ^ rk10;1159rk11 ^= rk0A;1160x1 = p5 ^ rk11;1161rk12 ^= rk0B;1162x2 = p6 ^ rk12;1163rk13 ^= rk0C;1164x3 = p7 ^ rk13;1165AES_ROUND_NOKEY(x0, x1, x2, x3);1166rk14 ^= rk0D;1167x0 ^= rk14;1168rk15 ^= rk0E;1169x1 ^= rk15;1170rk16 ^= rk0F;1171x2 ^= rk16;1172rk17 ^= rk10;1173x3 ^= rk17;1174AES_ROUND_NOKEY(x0, x1, x2, x3);1175rk18 ^= rk11;1176x0 ^= rk18;1177rk19 ^= rk12;1178x1 ^= rk19;1179rk1A ^= rk13;1180x2 ^= rk1A;1181rk1B ^= rk14;1182x3 ^= rk1B;1183AES_ROUND_NOKEY(x0, x1, x2, x3);1184rk1C ^= rk15;1185x0 ^= rk1C;1186rk1D ^= rk16;1187x1 ^= rk1D;1188rk1E ^= rk17;1189x2 ^= rk1E;1190rk1F ^= rk18;1191x3 ^= rk1F;1192AES_ROUND_NOKEY(x0, x1, x2, x3);1193p0 ^= x0;1194p1 ^= x1;1195p2 ^= x2;1196p3 ^= x3;1197/* round 3, 7, 11 */1198KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);1199rk00 ^= rk1C;1200rk01 ^= rk1D;1201rk02 ^= rk1E;1202rk03 ^= rk1F;1203x0 = p8 ^ rk00;1204x1 = p9 ^ rk01;1205x2 = pA ^ rk02;1206x3 = pB ^ rk03;1207AES_ROUND_NOKEY(x0, x1, x2, x3);1208KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);1209rk04 ^= rk00;1210rk05 ^= rk01;1211rk06 ^= rk02;1212rk07 ^= rk03;1213x0 ^= rk04;1214x1 ^= rk05;1215x2 ^= rk06;1216x3 ^= rk07;1217AES_ROUND_NOKEY(x0, x1, x2, x3);1218KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);1219rk08 ^= rk04;1220rk09 ^= rk05;1221rk0A ^= rk06;1222rk0B ^= rk07;1223x0 ^= rk08;1224x1 ^= rk09;1225x2 ^= rk0A;1226x3 ^= rk0B;1227AES_ROUND_NOKEY(x0, x1, x2, x3);1228KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);1229rk0C ^= rk08;1230rk0D ^= rk09;1231rk0E ^= rk0A;1232rk0F ^= rk0B;1233x0 ^= rk0C;1234x1 ^= rk0D;1235x2 ^= rk0E;1236x3 ^= rk0F;1237AES_ROUND_NOKEY(x0, x1, x2, x3);1238p4 ^= x0;1239p5 ^= x1;1240p6 ^= x2;1241p7 ^= x3;1242KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);1243rk10 ^= rk0C;1244rk11 ^= rk0D;1245rk12 ^= rk0E;1246rk13 ^= rk0F;1247x0 = p0 ^ rk10;1248x1 = p1 ^ rk11;1249x2 = p2 ^ rk12;1250x3 = p3 ^ rk13;1251AES_ROUND_NOKEY(x0, x1, x2, x3);1252KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);1253rk14 ^= rk10;1254rk15 ^= rk11;1255rk16 ^= rk12;1256rk17 ^= rk13;1257x0 ^= rk14;1258x1 ^= rk15;1259x2 ^= rk16;1260x3 ^= rk17;1261AES_ROUND_NOKEY(x0, x1, x2, x3);1262KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);1263rk18 ^= rk14;1264rk19 ^= rk15;1265rk1A ^= rk16;1266rk1B ^= rk17;1267x0 ^= rk18;1268x1 ^= rk19;1269x2 ^= rk1A;1270x3 ^= rk1B;1271AES_ROUND_NOKEY(x0, x1, x2, x3);1272KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);1273rk1C ^= rk18;1274rk1D ^= rk19;1275rk1E ^= rk1A;1276rk1F ^= rk1B;1277x0 ^= rk1C;1278x1 ^= rk1D;1279x2 ^= rk1E;1280x3 ^= rk1F;1281AES_ROUND_NOKEY(x0, x1, x2, x3);1282pC ^= x0;1283pD ^= x1;1284pE ^= x2;1285pF ^= x3;1286/* round 4, 8, 12 */1287rk00 ^= rk19;1288x0 = p4 ^ rk00;1289rk01 ^= rk1A;1290x1 = p5 ^ rk01;1291rk02 ^= rk1B;1292x2 = p6 ^ rk02;1293rk03 ^= rk1C;1294x3 = p7 ^ rk03;1295AES_ROUND_NOKEY(x0, x1, x2, x3);1296rk04 ^= rk1D;1297x0 ^= rk04;1298rk05 ^= rk1E;1299x1 ^= rk05;1300rk06 ^= rk1F;1301x2 ^= rk06;1302rk07 ^= rk00;1303x3 ^= rk07;1304AES_ROUND_NOKEY(x0, x1, x2, x3);1305rk08 ^= rk01;1306x0 ^= rk08;1307rk09 ^= rk02;1308x1 ^= rk09;1309rk0A ^= rk03;1310x2 ^= rk0A;1311rk0B ^= rk04;1312x3 ^= rk0B;1313AES_ROUND_NOKEY(x0, x1, x2, x3);1314rk0C ^= rk05;1315x0 ^= rk0C;1316rk0D ^= rk06;1317x1 ^= rk0D;1318rk0E ^= rk07;1319x2 ^= rk0E;1320rk0F ^= rk08;1321x3 ^= rk0F;1322AES_ROUND_NOKEY(x0, x1, x2, x3);1323p0 ^= x0;1324p1 ^= x1;1325p2 ^= x2;1326p3 ^= x3;1327rk10 ^= rk09;1328x0 = pC ^ rk10;1329rk11 ^= rk0A;1330x1 = pD ^ rk11;1331rk12 ^= rk0B;1332x2 = pE ^ rk12;1333rk13 ^= rk0C;1334x3 = pF ^ rk13;1335AES_ROUND_NOKEY(x0, x1, x2, x3);1336rk14 ^= rk0D;1337x0 ^= rk14;1338rk15 ^= rk0E;1339x1 ^= rk15;1340rk16 ^= rk0F;1341x2 ^= rk16;1342rk17 ^= rk10;1343x3 ^= rk17;1344AES_ROUND_NOKEY(x0, x1, x2, x3);1345rk18 ^= rk11;1346x0 ^= rk18;1347rk19 ^= rk12;1348x1 ^= rk19;1349rk1A ^= rk13;1350x2 ^= rk1A;1351rk1B ^= rk14;1352x3 ^= rk1B;1353AES_ROUND_NOKEY(x0, x1, x2, x3);1354rk1C ^= rk15;1355x0 ^= rk1C;1356rk1D ^= rk16;1357x1 ^= rk1D;1358rk1E ^= rk17;1359x2 ^= rk1E;1360rk1F ^= rk18;1361x3 ^= rk1F;1362AES_ROUND_NOKEY(x0, x1, x2, x3);1363p8 ^= x0;1364p9 ^= x1;1365pA ^= x2;1366pB ^= x3;1367}1368/* round 13 */1369KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);1370rk00 ^= rk1C;1371rk01 ^= rk1D;1372rk02 ^= rk1E;1373rk03 ^= rk1F;1374x0 = p0 ^ rk00;1375x1 = p1 ^ rk01;1376x2 = p2 ^ rk02;1377x3 = p3 ^ rk03;1378AES_ROUND_NOKEY(x0, x1, x2, x3);1379KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);1380rk04 ^= rk00;1381rk05 ^= rk01;1382rk06 ^= rk02;1383rk07 ^= rk03;1384x0 ^= rk04;1385x1 ^= rk05;1386x2 ^= rk06;1387x3 ^= rk07;1388AES_ROUND_NOKEY(x0, x1, x2, x3);1389KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);1390rk08 ^= rk04;1391rk09 ^= rk05;1392rk0A ^= rk06;1393rk0B ^= rk07;1394x0 ^= rk08;1395x1 ^= rk09;1396x2 ^= rk0A;1397x3 ^= rk0B;1398AES_ROUND_NOKEY(x0, x1, x2, x3);1399KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);1400rk0C ^= rk08;1401rk0D ^= rk09;1402rk0E ^= rk0A;1403rk0F ^= rk0B;1404x0 ^= rk0C;1405x1 ^= rk0D;1406x2 ^= rk0E;1407x3 ^= rk0F;1408AES_ROUND_NOKEY(x0, x1, x2, x3);1409pC ^= x0;1410pD ^= x1;1411pE ^= x2;1412pF ^= x3;1413KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);1414rk10 ^= rk0C;1415rk11 ^= rk0D;1416rk12 ^= rk0E;1417rk13 ^= rk0F;1418x0 = p8 ^ rk10;1419x1 = p9 ^ rk11;1420x2 = pA ^ rk12;1421x3 = pB ^ rk13;1422AES_ROUND_NOKEY(x0, x1, x2, x3);1423KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);1424rk14 ^= rk10;1425rk15 ^= rk11;1426rk16 ^= rk12;1427rk17 ^= rk13;1428x0 ^= rk14;1429x1 ^= rk15;1430x2 ^= rk16;1431x3 ^= rk17;1432AES_ROUND_NOKEY(x0, x1, x2, x3);1433KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);1434rk18 ^= rk14 ^ sc->count1;1435rk19 ^= rk15 ^ sc->count0;1436rk1A ^= rk16 ^ sc->count3;1437rk1B ^= rk17 ^ SPH_T32(~sc->count2);1438x0 ^= rk18;1439x1 ^= rk19;1440x2 ^= rk1A;1441x3 ^= rk1B;1442AES_ROUND_NOKEY(x0, x1, x2, x3);1443KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);1444rk1C ^= rk18;1445rk1D ^= rk19;1446rk1E ^= rk1A;1447rk1F ^= rk1B;1448x0 ^= rk1C;1449x1 ^= rk1D;1450x2 ^= rk1E;1451x3 ^= rk1F;1452AES_ROUND_NOKEY(x0, x1, x2, x3);1453p4 ^= x0;1454p5 ^= x1;1455p6 ^= x2;1456p7 ^= x3;1457sc->h[0x0] ^= p8;1458sc->h[0x1] ^= p9;1459sc->h[0x2] ^= pA;1460sc->h[0x3] ^= pB;1461sc->h[0x4] ^= pC;1462sc->h[0x5] ^= pD;1463sc->h[0x6] ^= pE;1464sc->h[0x7] ^= pF;1465sc->h[0x8] ^= p0;1466sc->h[0x9] ^= p1;1467sc->h[0xA] ^= p2;1468sc->h[0xB] ^= p3;1469sc->h[0xC] ^= p4;1470sc->h[0xD] ^= p5;1471sc->h[0xE] ^= p6;1472sc->h[0xF] ^= p7;1473}14741475#endif14761477static void1478shavite_small_init(sph_shavite_small_context *sc, const sph_u32 *iv)1479{1480memcpy(sc->h, iv, sizeof sc->h);1481sc->ptr = 0;1482sc->count0 = 0;1483sc->count1 = 0;1484}14851486static void1487shavite_small_core(sph_shavite_small_context *sc, const void *data, size_t len)1488{1489unsigned char *buf;1490size_t ptr;14911492buf = sc->buf;1493ptr = sc->ptr;1494while (len > 0) {1495size_t clen;14961497clen = (sizeof sc->buf) - ptr;1498if (clen > len)1499clen = len;1500memcpy(buf + ptr, data, clen);1501data = (const unsigned char *)data + clen;1502ptr += clen;1503len -= clen;1504if (ptr == sizeof sc->buf) {1505if ((sc->count0 = SPH_T32(sc->count0 + 512)) == 0)1506sc->count1 = SPH_T32(sc->count1 + 1);1507c256(sc, buf);1508ptr = 0;1509}1510}1511sc->ptr = ptr;1512}15131514static void1515shavite_small_close(sph_shavite_small_context *sc,1516unsigned ub, unsigned n, void *dst, size_t out_size_w32)1517{1518unsigned char *buf;1519size_t ptr, u;1520unsigned z;1521sph_u32 count0, count1;15221523buf = sc->buf;1524ptr = sc->ptr;1525count0 = (sc->count0 += SPH_T32(ptr << 3) + n);1526count1 = sc->count1;1527z = 0x80 >> n;1528z = ((ub & -z) | z) & 0xFF;1529if (ptr == 0 && n == 0) {1530buf[0] = 0x80;1531memset(buf + 1, 0, 53);1532sc->count0 = sc->count1 = 0;1533} else if (ptr < 54) {1534buf[ptr ++] = z;1535memset(buf + ptr, 0, 54 - ptr);1536} else {1537buf[ptr ++] = z;1538memset(buf + ptr, 0, 64 - ptr);1539c256(sc, buf);1540memset(buf, 0, 54);1541sc->count0 = sc->count1 = 0;1542}1543sph_enc32le(buf + 54, count0);1544sph_enc32le(buf + 58, count1);1545buf[62] = (unsigned char) (out_size_w32 << 5);1546buf[63] = (unsigned char) (out_size_w32 >> 3);1547c256(sc, buf);1548for (u = 0; u < out_size_w32; u ++)1549sph_enc32le((unsigned char *)dst + (u << 2), sc->h[u]);1550}15511552static void1553shavite_big_init(sph_shavite_big_context *sc, const sph_u32 *iv)1554{1555memcpy(sc->h, iv, sizeof sc->h);1556sc->ptr = 0;1557sc->count0 = 0;1558sc->count1 = 0;1559sc->count2 = 0;1560sc->count3 = 0;1561}15621563static void1564shavite_big_core(sph_shavite_big_context *sc, const void *data, size_t len)1565{1566unsigned char *buf;1567size_t ptr;15681569buf = sc->buf;1570ptr = sc->ptr;1571while (len > 0) {1572size_t clen;15731574clen = (sizeof sc->buf) - ptr;1575if (clen > len)1576clen = len;1577memcpy(buf + ptr, data, clen);1578data = (const unsigned char *)data + clen;1579ptr += clen;1580len -= clen;1581if (ptr == sizeof sc->buf) {1582if ((sc->count0 = SPH_T32(sc->count0 + 1024)) == 0) {1583sc->count1 = SPH_T32(sc->count1 + 1);1584if (sc->count1 == 0) {1585sc->count2 = SPH_T32(sc->count2 + 1);1586if (sc->count2 == 0) {1587sc->count3 = SPH_T32(1588sc->count3 + 1);1589}1590}1591}1592c512(sc, buf);1593ptr = 0;1594}1595}1596sc->ptr = ptr;1597}15981599static void1600shavite_big_close(sph_shavite_big_context *sc,1601unsigned ub, unsigned n, void *dst, size_t out_size_w32)1602{1603unsigned char *buf;1604size_t ptr, u;1605unsigned z;1606sph_u32 count0, count1, count2, count3;16071608buf = sc->buf;1609ptr = sc->ptr;1610count0 = (sc->count0 += SPH_T32(ptr << 3) + n);1611count1 = sc->count1;1612count2 = sc->count2;1613count3 = sc->count3;1614z = 0x80 >> n;1615z = ((ub & -z) | z) & 0xFF;1616if (ptr == 0 && n == 0) {1617buf[0] = 0x80;1618memset(buf + 1, 0, 109);1619sc->count0 = sc->count1 = sc->count2 = sc->count3 = 0;1620} else if (ptr < 110) {1621buf[ptr ++] = z;1622memset(buf + ptr, 0, 110 - ptr);1623} else {1624buf[ptr ++] = z;1625memset(buf + ptr, 0, 128 - ptr);1626c512(sc, buf);1627memset(buf, 0, 110);1628sc->count0 = sc->count1 = sc->count2 = sc->count3 = 0;1629}1630sph_enc32le(buf + 110, count0);1631sph_enc32le(buf + 114, count1);1632sph_enc32le(buf + 118, count2);1633sph_enc32le(buf + 122, count3);1634buf[126] = (unsigned char) (out_size_w32 << 5);1635buf[127] = (unsigned char) (out_size_w32 >> 3);1636c512(sc, buf);1637for (u = 0; u < out_size_w32; u ++)1638sph_enc32le((unsigned char *)dst + (u << 2), sc->h[u]);1639}16401641/* see sph_shavite.h */1642void1643sph_shavite224_init(void *cc)1644{1645shavite_small_init(cc, IV224);1646}16471648/* see sph_shavite.h */1649void1650sph_shavite224(void *cc, const void *data, size_t len)1651{1652shavite_small_core(cc, data, len);1653}16541655/* see sph_shavite.h */1656void1657sph_shavite224_close(void *cc, void *dst)1658{1659shavite_small_close(cc, 0, 0, dst, 7);1660shavite_small_init(cc, IV224);1661}16621663/* see sph_shavite.h */1664void1665sph_shavite224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)1666{1667shavite_small_close(cc, ub, n, dst, 7);1668shavite_small_init(cc, IV224);1669}16701671/* see sph_shavite.h */1672void1673sph_shavite256_init(void *cc)1674{1675shavite_small_init(cc, IV256);1676}16771678/* see sph_shavite.h */1679void1680sph_shavite256(void *cc, const void *data, size_t len)1681{1682shavite_small_core(cc, data, len);1683}16841685/* see sph_shavite.h */1686void1687sph_shavite256_close(void *cc, void *dst)1688{1689shavite_small_close(cc, 0, 0, dst, 8);1690shavite_small_init(cc, IV256);1691}16921693/* see sph_shavite.h */1694void1695sph_shavite256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)1696{1697shavite_small_close(cc, ub, n, dst, 8);1698shavite_small_init(cc, IV256);1699}17001701/* see sph_shavite.h */1702void1703sph_shavite384_init(void *cc)1704{1705shavite_big_init(cc, IV384);1706}17071708/* see sph_shavite.h */1709void1710sph_shavite384(void *cc, const void *data, size_t len)1711{1712shavite_big_core(cc, data, len);1713}17141715/* see sph_shavite.h */1716void1717sph_shavite384_close(void *cc, void *dst)1718{1719shavite_big_close(cc, 0, 0, dst, 12);1720shavite_big_init(cc, IV384);1721}17221723/* see sph_shavite.h */1724void1725sph_shavite384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)1726{1727shavite_big_close(cc, ub, n, dst, 12);1728shavite_big_init(cc, IV384);1729}17301731/* see sph_shavite.h */1732void1733sph_shavite512_init(void *cc)1734{1735shavite_big_init(cc, IV512);1736}17371738/* see sph_shavite.h */1739void1740sph_shavite512(void *cc, const void *data, size_t len)1741{1742shavite_big_core(cc, data, len);1743}17441745/* see sph_shavite.h */1746void1747sph_shavite512_close(void *cc, void *dst)1748{1749shavite_big_close(cc, 0, 0, dst, 16);1750shavite_big_init(cc, IV512);1751}17521753/* see sph_shavite.h */1754void1755sph_shavite512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)1756{1757shavite_big_close(cc, ub, n, dst, 16);1758shavite_big_init(cc, IV512);1759}17601761#ifdef __cplusplus1762}1763#endif17641765