Path: blob/main/contrib/bearssl/src/symcipher/aes_ct_dec.c
39482 views
/*1* Copyright (c) 2016 Thomas Pornin <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining4* a copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sublicense, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* The above copyright notice and this permission notice shall be12* included in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,15* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF16* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND17* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS18* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN19* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN20* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#include "inner.h"2526/* see inner.h */27void28br_aes_ct_bitslice_invSbox(uint32_t *q)29{30/*31* AES S-box is:32* S(x) = A(I(x)) ^ 0x6333* where I() is inversion in GF(256), and A() is a linear34* transform (0 is formally defined to be its own inverse).35* Since inversion is an involution, the inverse S-box can be36* computed from the S-box as:37* iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)38* where B() is the inverse of A(). Indeed, for any y in GF(256):39* iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y40*41* Note: we reuse the implementation of the forward S-box,42* instead of duplicating it here, so that total code size is43* lower. By merging the B() transforms into the S-box circuit44* we could make faster CBC decryption, but CBC decryption is45* already quite faster than CBC encryption because we can46* process two blocks in parallel.47*/48uint32_t q0, q1, q2, q3, q4, q5, q6, q7;4950q0 = ~q[0];51q1 = ~q[1];52q2 = q[2];53q3 = q[3];54q4 = q[4];55q5 = ~q[5];56q6 = ~q[6];57q7 = q[7];58q[7] = q1 ^ q4 ^ q6;59q[6] = q0 ^ q3 ^ q5;60q[5] = q7 ^ q2 ^ q4;61q[4] = q6 ^ q1 ^ q3;62q[3] = q5 ^ q0 ^ q2;63q[2] = q4 ^ q7 ^ q1;64q[1] = q3 ^ q6 ^ q0;65q[0] = q2 ^ q5 ^ q7;6667br_aes_ct_bitslice_Sbox(q);6869q0 = ~q[0];70q1 = ~q[1];71q2 = q[2];72q3 = q[3];73q4 = q[4];74q5 = ~q[5];75q6 = ~q[6];76q7 = q[7];77q[7] = q1 ^ q4 ^ q6;78q[6] = q0 ^ q3 ^ q5;79q[5] = q7 ^ q2 ^ q4;80q[4] = q6 ^ q1 ^ q3;81q[3] = q5 ^ q0 ^ q2;82q[2] = q4 ^ q7 ^ q1;83q[1] = q3 ^ q6 ^ q0;84q[0] = q2 ^ q5 ^ q7;85}8687static void88add_round_key(uint32_t *q, const uint32_t *sk)89{90int i;9192for (i = 0; i < 8; i ++) {93q[i] ^= sk[i];94}95}9697static void98inv_shift_rows(uint32_t *q)99{100int i;101102for (i = 0; i < 8; i ++) {103uint32_t x;104105x = q[i];106q[i] = (x & 0x000000FF)107| ((x & 0x00003F00) << 2) | ((x & 0x0000C000) >> 6)108| ((x & 0x000F0000) << 4) | ((x & 0x00F00000) >> 4)109| ((x & 0x03000000) << 6) | ((x & 0xFC000000) >> 2);110}111}112113static inline uint32_t114rotr16(uint32_t x)115{116return (x << 16) | (x >> 16);117}118119static void120inv_mix_columns(uint32_t *q)121{122uint32_t q0, q1, q2, q3, q4, q5, q6, q7;123uint32_t r0, r1, r2, r3, r4, r5, r6, r7;124125q0 = q[0];126q1 = q[1];127q2 = q[2];128q3 = q[3];129q4 = q[4];130q5 = q[5];131q6 = q[6];132q7 = q[7];133r0 = (q0 >> 8) | (q0 << 24);134r1 = (q1 >> 8) | (q1 << 24);135r2 = (q2 >> 8) | (q2 << 24);136r3 = (q3 >> 8) | (q3 << 24);137r4 = (q4 >> 8) | (q4 << 24);138r5 = (q5 >> 8) | (q5 << 24);139r6 = (q6 >> 8) | (q6 << 24);140r7 = (q7 >> 8) | (q7 << 24);141142q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr16(q0 ^ q5 ^ q6 ^ r0 ^ r5);143q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);144q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr16(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);145q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr16(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);146q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);147q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);148q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr16(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);149q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr16(q4 ^ q5 ^ q7 ^ r4 ^ r7);150}151152/* see inner.h */153void154br_aes_ct_bitslice_decrypt(unsigned num_rounds,155const uint32_t *skey, uint32_t *q)156{157unsigned u;158159add_round_key(q, skey + (num_rounds << 3));160for (u = num_rounds - 1; u > 0; u --) {161inv_shift_rows(q);162br_aes_ct_bitslice_invSbox(q);163add_round_key(q, skey + (u << 3));164inv_mix_columns(q);165}166inv_shift_rows(q);167br_aes_ct_bitslice_invSbox(q);168add_round_key(q, skey);169}170171172