Path: blob/main/contrib/bearssl/src/symcipher/aes_pwr8_cbcdec.c
39482 views
/*1* Copyright (c) 2017 Thomas Pornin <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining4* a copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sublicense, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* The above copyright notice and this permission notice shall be12* included in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,15* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF16* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND17* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS18* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN19* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN20* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#define BR_POWER_ASM_MACROS 125#include "inner.h"2627#if BR_POWER82829/* see bearssl_block.h */30void31br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,32const void *key, size_t len)33{34ctx->vtable = &br_aes_pwr8_cbcdec_vtable;35ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);36}3738static void39cbcdec_128(const unsigned char *sk,40const unsigned char *iv, unsigned char *buf, size_t num_blocks)41{42long cc0, cc1, cc2, cc3;4344#if BR_POWER8_LE45static const uint32_t idx2be[] = {460x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C47};48#endif4950cc0 = 0;51cc1 = 16;52cc2 = 32;53cc3 = 48;54asm volatile (5556/*57* Load subkeys into v0..v1058*/59lxvw4x(32, %[cc0], %[sk])60addi(%[cc0], %[cc0], 16)61lxvw4x(33, %[cc0], %[sk])62addi(%[cc0], %[cc0], 16)63lxvw4x(34, %[cc0], %[sk])64addi(%[cc0], %[cc0], 16)65lxvw4x(35, %[cc0], %[sk])66addi(%[cc0], %[cc0], 16)67lxvw4x(36, %[cc0], %[sk])68addi(%[cc0], %[cc0], 16)69lxvw4x(37, %[cc0], %[sk])70addi(%[cc0], %[cc0], 16)71lxvw4x(38, %[cc0], %[sk])72addi(%[cc0], %[cc0], 16)73lxvw4x(39, %[cc0], %[sk])74addi(%[cc0], %[cc0], 16)75lxvw4x(40, %[cc0], %[sk])76addi(%[cc0], %[cc0], 16)77lxvw4x(41, %[cc0], %[sk])78addi(%[cc0], %[cc0], 16)79lxvw4x(42, %[cc0], %[sk])80li(%[cc0], 0)8182#if BR_POWER8_LE83/*84* v15 = constant for byteswapping words85*/86lxvw4x(47, 0, %[idx2be])87#endif88/*89* Load IV into v24.90*/91lxvw4x(56, 0, %[iv])92#if BR_POWER8_LE93vperm(24, 24, 24, 15)94#endif9596mtctr(%[num_blocks])97label(loop)98/*99* Load next ciphertext words in v16..v19. Also save them100* in v20..v23.101*/102lxvw4x(48, %[cc0], %[buf])103lxvw4x(49, %[cc1], %[buf])104lxvw4x(50, %[cc2], %[buf])105lxvw4x(51, %[cc3], %[buf])106#if BR_POWER8_LE107vperm(16, 16, 16, 15)108vperm(17, 17, 17, 15)109vperm(18, 18, 18, 15)110vperm(19, 19, 19, 15)111#endif112vand(20, 16, 16)113vand(21, 17, 17)114vand(22, 18, 18)115vand(23, 19, 19)116117/*118* Decrypt the blocks.119*/120vxor(16, 16, 10)121vxor(17, 17, 10)122vxor(18, 18, 10)123vxor(19, 19, 10)124vncipher(16, 16, 9)125vncipher(17, 17, 9)126vncipher(18, 18, 9)127vncipher(19, 19, 9)128vncipher(16, 16, 8)129vncipher(17, 17, 8)130vncipher(18, 18, 8)131vncipher(19, 19, 8)132vncipher(16, 16, 7)133vncipher(17, 17, 7)134vncipher(18, 18, 7)135vncipher(19, 19, 7)136vncipher(16, 16, 6)137vncipher(17, 17, 6)138vncipher(18, 18, 6)139vncipher(19, 19, 6)140vncipher(16, 16, 5)141vncipher(17, 17, 5)142vncipher(18, 18, 5)143vncipher(19, 19, 5)144vncipher(16, 16, 4)145vncipher(17, 17, 4)146vncipher(18, 18, 4)147vncipher(19, 19, 4)148vncipher(16, 16, 3)149vncipher(17, 17, 3)150vncipher(18, 18, 3)151vncipher(19, 19, 3)152vncipher(16, 16, 2)153vncipher(17, 17, 2)154vncipher(18, 18, 2)155vncipher(19, 19, 2)156vncipher(16, 16, 1)157vncipher(17, 17, 1)158vncipher(18, 18, 1)159vncipher(19, 19, 1)160vncipherlast(16, 16, 0)161vncipherlast(17, 17, 0)162vncipherlast(18, 18, 0)163vncipherlast(19, 19, 0)164165/*166* XOR decrypted blocks with IV / previous block.167*/168vxor(16, 16, 24)169vxor(17, 17, 20)170vxor(18, 18, 21)171vxor(19, 19, 22)172173/*174* Store back result (with byteswap)175*/176#if BR_POWER8_LE177vperm(16, 16, 16, 15)178vperm(17, 17, 17, 15)179vperm(18, 18, 18, 15)180vperm(19, 19, 19, 15)181#endif182stxvw4x(48, %[cc0], %[buf])183stxvw4x(49, %[cc1], %[buf])184stxvw4x(50, %[cc2], %[buf])185stxvw4x(51, %[cc3], %[buf])186187/*188* Fourth encrypted block is IV for next run.189*/190vand(24, 23, 23)191192addi(%[buf], %[buf], 64)193194bdnz(loop)195196: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),197[buf] "+b" (buf)198: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)199#if BR_POWER8_LE200, [idx2be] "b" (idx2be)201#endif202: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",203"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",204"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",205"ctr", "memory"206);207}208209static void210cbcdec_192(const unsigned char *sk,211const unsigned char *iv, unsigned char *buf, size_t num_blocks)212{213long cc0, cc1, cc2, cc3;214215#if BR_POWER8_LE216static const uint32_t idx2be[] = {2170x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C218};219#endif220221cc0 = 0;222cc1 = 16;223cc2 = 32;224cc3 = 48;225asm volatile (226227/*228* Load subkeys into v0..v12229*/230lxvw4x(32, %[cc0], %[sk])231addi(%[cc0], %[cc0], 16)232lxvw4x(33, %[cc0], %[sk])233addi(%[cc0], %[cc0], 16)234lxvw4x(34, %[cc0], %[sk])235addi(%[cc0], %[cc0], 16)236lxvw4x(35, %[cc0], %[sk])237addi(%[cc0], %[cc0], 16)238lxvw4x(36, %[cc0], %[sk])239addi(%[cc0], %[cc0], 16)240lxvw4x(37, %[cc0], %[sk])241addi(%[cc0], %[cc0], 16)242lxvw4x(38, %[cc0], %[sk])243addi(%[cc0], %[cc0], 16)244lxvw4x(39, %[cc0], %[sk])245addi(%[cc0], %[cc0], 16)246lxvw4x(40, %[cc0], %[sk])247addi(%[cc0], %[cc0], 16)248lxvw4x(41, %[cc0], %[sk])249addi(%[cc0], %[cc0], 16)250lxvw4x(42, %[cc0], %[sk])251addi(%[cc0], %[cc0], 16)252lxvw4x(43, %[cc0], %[sk])253addi(%[cc0], %[cc0], 16)254lxvw4x(44, %[cc0], %[sk])255li(%[cc0], 0)256257#if BR_POWER8_LE258/*259* v15 = constant for byteswapping words260*/261lxvw4x(47, 0, %[idx2be])262#endif263/*264* Load IV into v24.265*/266lxvw4x(56, 0, %[iv])267#if BR_POWER8_LE268vperm(24, 24, 24, 15)269#endif270271mtctr(%[num_blocks])272label(loop)273/*274* Load next ciphertext words in v16..v19. Also save them275* in v20..v23.276*/277lxvw4x(48, %[cc0], %[buf])278lxvw4x(49, %[cc1], %[buf])279lxvw4x(50, %[cc2], %[buf])280lxvw4x(51, %[cc3], %[buf])281#if BR_POWER8_LE282vperm(16, 16, 16, 15)283vperm(17, 17, 17, 15)284vperm(18, 18, 18, 15)285vperm(19, 19, 19, 15)286#endif287vand(20, 16, 16)288vand(21, 17, 17)289vand(22, 18, 18)290vand(23, 19, 19)291292/*293* Decrypt the blocks.294*/295vxor(16, 16, 12)296vxor(17, 17, 12)297vxor(18, 18, 12)298vxor(19, 19, 12)299vncipher(16, 16, 11)300vncipher(17, 17, 11)301vncipher(18, 18, 11)302vncipher(19, 19, 11)303vncipher(16, 16, 10)304vncipher(17, 17, 10)305vncipher(18, 18, 10)306vncipher(19, 19, 10)307vncipher(16, 16, 9)308vncipher(17, 17, 9)309vncipher(18, 18, 9)310vncipher(19, 19, 9)311vncipher(16, 16, 8)312vncipher(17, 17, 8)313vncipher(18, 18, 8)314vncipher(19, 19, 8)315vncipher(16, 16, 7)316vncipher(17, 17, 7)317vncipher(18, 18, 7)318vncipher(19, 19, 7)319vncipher(16, 16, 6)320vncipher(17, 17, 6)321vncipher(18, 18, 6)322vncipher(19, 19, 6)323vncipher(16, 16, 5)324vncipher(17, 17, 5)325vncipher(18, 18, 5)326vncipher(19, 19, 5)327vncipher(16, 16, 4)328vncipher(17, 17, 4)329vncipher(18, 18, 4)330vncipher(19, 19, 4)331vncipher(16, 16, 3)332vncipher(17, 17, 3)333vncipher(18, 18, 3)334vncipher(19, 19, 3)335vncipher(16, 16, 2)336vncipher(17, 17, 2)337vncipher(18, 18, 2)338vncipher(19, 19, 2)339vncipher(16, 16, 1)340vncipher(17, 17, 1)341vncipher(18, 18, 1)342vncipher(19, 19, 1)343vncipherlast(16, 16, 0)344vncipherlast(17, 17, 0)345vncipherlast(18, 18, 0)346vncipherlast(19, 19, 0)347348/*349* XOR decrypted blocks with IV / previous block.350*/351vxor(16, 16, 24)352vxor(17, 17, 20)353vxor(18, 18, 21)354vxor(19, 19, 22)355356/*357* Store back result (with byteswap)358*/359#if BR_POWER8_LE360vperm(16, 16, 16, 15)361vperm(17, 17, 17, 15)362vperm(18, 18, 18, 15)363vperm(19, 19, 19, 15)364#endif365stxvw4x(48, %[cc0], %[buf])366stxvw4x(49, %[cc1], %[buf])367stxvw4x(50, %[cc2], %[buf])368stxvw4x(51, %[cc3], %[buf])369370/*371* Fourth encrypted block is IV for next run.372*/373vand(24, 23, 23)374375addi(%[buf], %[buf], 64)376377bdnz(loop)378379: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),380[buf] "+b" (buf)381: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)382#if BR_POWER8_LE383, [idx2be] "b" (idx2be)384#endif385: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",386"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",387"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",388"ctr", "memory"389);390}391392static void393cbcdec_256(const unsigned char *sk,394const unsigned char *iv, unsigned char *buf, size_t num_blocks)395{396long cc0, cc1, cc2, cc3;397398#if BR_POWER8_LE399static const uint32_t idx2be[] = {4000x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C401};402#endif403404cc0 = 0;405cc1 = 16;406cc2 = 32;407cc3 = 48;408asm volatile (409410/*411* Load subkeys into v0..v14412*/413lxvw4x(32, %[cc0], %[sk])414addi(%[cc0], %[cc0], 16)415lxvw4x(33, %[cc0], %[sk])416addi(%[cc0], %[cc0], 16)417lxvw4x(34, %[cc0], %[sk])418addi(%[cc0], %[cc0], 16)419lxvw4x(35, %[cc0], %[sk])420addi(%[cc0], %[cc0], 16)421lxvw4x(36, %[cc0], %[sk])422addi(%[cc0], %[cc0], 16)423lxvw4x(37, %[cc0], %[sk])424addi(%[cc0], %[cc0], 16)425lxvw4x(38, %[cc0], %[sk])426addi(%[cc0], %[cc0], 16)427lxvw4x(39, %[cc0], %[sk])428addi(%[cc0], %[cc0], 16)429lxvw4x(40, %[cc0], %[sk])430addi(%[cc0], %[cc0], 16)431lxvw4x(41, %[cc0], %[sk])432addi(%[cc0], %[cc0], 16)433lxvw4x(42, %[cc0], %[sk])434addi(%[cc0], %[cc0], 16)435lxvw4x(43, %[cc0], %[sk])436addi(%[cc0], %[cc0], 16)437lxvw4x(44, %[cc0], %[sk])438addi(%[cc0], %[cc0], 16)439lxvw4x(45, %[cc0], %[sk])440addi(%[cc0], %[cc0], 16)441lxvw4x(46, %[cc0], %[sk])442li(%[cc0], 0)443444#if BR_POWER8_LE445/*446* v15 = constant for byteswapping words447*/448lxvw4x(47, 0, %[idx2be])449#endif450/*451* Load IV into v24.452*/453lxvw4x(56, 0, %[iv])454#if BR_POWER8_LE455vperm(24, 24, 24, 15)456#endif457458mtctr(%[num_blocks])459label(loop)460/*461* Load next ciphertext words in v16..v19. Also save them462* in v20..v23.463*/464lxvw4x(48, %[cc0], %[buf])465lxvw4x(49, %[cc1], %[buf])466lxvw4x(50, %[cc2], %[buf])467lxvw4x(51, %[cc3], %[buf])468#if BR_POWER8_LE469vperm(16, 16, 16, 15)470vperm(17, 17, 17, 15)471vperm(18, 18, 18, 15)472vperm(19, 19, 19, 15)473#endif474vand(20, 16, 16)475vand(21, 17, 17)476vand(22, 18, 18)477vand(23, 19, 19)478479/*480* Decrypt the blocks.481*/482vxor(16, 16, 14)483vxor(17, 17, 14)484vxor(18, 18, 14)485vxor(19, 19, 14)486vncipher(16, 16, 13)487vncipher(17, 17, 13)488vncipher(18, 18, 13)489vncipher(19, 19, 13)490vncipher(16, 16, 12)491vncipher(17, 17, 12)492vncipher(18, 18, 12)493vncipher(19, 19, 12)494vncipher(16, 16, 11)495vncipher(17, 17, 11)496vncipher(18, 18, 11)497vncipher(19, 19, 11)498vncipher(16, 16, 10)499vncipher(17, 17, 10)500vncipher(18, 18, 10)501vncipher(19, 19, 10)502vncipher(16, 16, 9)503vncipher(17, 17, 9)504vncipher(18, 18, 9)505vncipher(19, 19, 9)506vncipher(16, 16, 8)507vncipher(17, 17, 8)508vncipher(18, 18, 8)509vncipher(19, 19, 8)510vncipher(16, 16, 7)511vncipher(17, 17, 7)512vncipher(18, 18, 7)513vncipher(19, 19, 7)514vncipher(16, 16, 6)515vncipher(17, 17, 6)516vncipher(18, 18, 6)517vncipher(19, 19, 6)518vncipher(16, 16, 5)519vncipher(17, 17, 5)520vncipher(18, 18, 5)521vncipher(19, 19, 5)522vncipher(16, 16, 4)523vncipher(17, 17, 4)524vncipher(18, 18, 4)525vncipher(19, 19, 4)526vncipher(16, 16, 3)527vncipher(17, 17, 3)528vncipher(18, 18, 3)529vncipher(19, 19, 3)530vncipher(16, 16, 2)531vncipher(17, 17, 2)532vncipher(18, 18, 2)533vncipher(19, 19, 2)534vncipher(16, 16, 1)535vncipher(17, 17, 1)536vncipher(18, 18, 1)537vncipher(19, 19, 1)538vncipherlast(16, 16, 0)539vncipherlast(17, 17, 0)540vncipherlast(18, 18, 0)541vncipherlast(19, 19, 0)542543/*544* XOR decrypted blocks with IV / previous block.545*/546vxor(16, 16, 24)547vxor(17, 17, 20)548vxor(18, 18, 21)549vxor(19, 19, 22)550551/*552* Store back result (with byteswap)553*/554#if BR_POWER8_LE555vperm(16, 16, 16, 15)556vperm(17, 17, 17, 15)557vperm(18, 18, 18, 15)558vperm(19, 19, 19, 15)559#endif560stxvw4x(48, %[cc0], %[buf])561stxvw4x(49, %[cc1], %[buf])562stxvw4x(50, %[cc2], %[buf])563stxvw4x(51, %[cc3], %[buf])564565/*566* Fourth encrypted block is IV for next run.567*/568vand(24, 23, 23)569570addi(%[buf], %[buf], 64)571572bdnz(loop)573574: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),575[buf] "+b" (buf)576: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)577#if BR_POWER8_LE578, [idx2be] "b" (idx2be)579#endif580: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",581"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",582"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",583"ctr", "memory"584);585}586587/* see bearssl_block.h */588void589br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx,590void *iv, void *data, size_t len)591{592unsigned char nextiv[16];593unsigned char *buf;594595if (len == 0) {596return;597}598buf = data;599memcpy(nextiv, buf + len - 16, 16);600if (len >= 64) {601size_t num_blocks;602unsigned char tmp[16];603604num_blocks = (len >> 4) & ~(size_t)3;605memcpy(tmp, buf + (num_blocks << 4) - 16, 16);606switch (ctx->num_rounds) {607case 10:608cbcdec_128(ctx->skey.skni, iv, buf, num_blocks);609break;610case 12:611cbcdec_192(ctx->skey.skni, iv, buf, num_blocks);612break;613default:614cbcdec_256(ctx->skey.skni, iv, buf, num_blocks);615break;616}617buf += num_blocks << 4;618len &= 63;619memcpy(iv, tmp, 16);620}621if (len > 0) {622unsigned char tmp[64];623624memcpy(tmp, buf, len);625memset(tmp + len, 0, (sizeof tmp) - len);626switch (ctx->num_rounds) {627case 10:628cbcdec_128(ctx->skey.skni, iv, tmp, 4);629break;630case 12:631cbcdec_192(ctx->skey.skni, iv, tmp, 4);632break;633default:634cbcdec_256(ctx->skey.skni, iv, tmp, 4);635break;636}637memcpy(buf, tmp, len);638}639memcpy(iv, nextiv, 16);640}641642/* see bearssl_block.h */643const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = {644sizeof(br_aes_pwr8_cbcdec_keys),64516,6464,647(void (*)(const br_block_cbcdec_class **, const void *, size_t))648&br_aes_pwr8_cbcdec_init,649(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))650&br_aes_pwr8_cbcdec_run651};652653/* see bearssl_block.h */654const br_block_cbcdec_class *655br_aes_pwr8_cbcdec_get_vtable(void)656{657return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL;658}659660#else661662/* see bearssl_block.h */663const br_block_cbcdec_class *664br_aes_pwr8_cbcdec_get_vtable(void)665{666return NULL;667}668669#endif670671672