Path: blob/main/contrib/bearssl/src/symcipher/aes_pwr8_ctr.c
39482 views
/*1* Copyright (c) 2017 Thomas Pornin <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining4* a copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sublicense, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* The above copyright notice and this permission notice shall be12* included in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,15* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF16* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND17* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS18* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN19* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN20* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#define BR_POWER_ASM_MACROS 125#include "inner.h"2627#if BR_POWER82829/* see bearssl_block.h */30void31br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,32const void *key, size_t len)33{34ctx->vtable = &br_aes_pwr8_ctr_vtable;35ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);36}3738static void39ctr_128(const unsigned char *sk, const unsigned char *ivbuf,40unsigned char *buf, size_t num_blocks)41{42long cc0, cc1, cc2, cc3;4344#if BR_POWER8_LE45static const uint32_t idx2be[] = {460x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C47};48#endif49static const uint32_t ctrinc[] = {500, 0, 0, 451};5253cc0 = 0;54cc1 = 16;55cc2 = 32;56cc3 = 48;57asm volatile (5859/*60* Load subkeys into v0..v1061*/62lxvw4x(32, %[cc0], %[sk])63addi(%[cc0], %[cc0], 16)64lxvw4x(33, %[cc0], %[sk])65addi(%[cc0], %[cc0], 16)66lxvw4x(34, %[cc0], %[sk])67addi(%[cc0], %[cc0], 16)68lxvw4x(35, %[cc0], %[sk])69addi(%[cc0], %[cc0], 16)70lxvw4x(36, %[cc0], %[sk])71addi(%[cc0], %[cc0], 16)72lxvw4x(37, %[cc0], %[sk])73addi(%[cc0], %[cc0], 16)74lxvw4x(38, %[cc0], %[sk])75addi(%[cc0], %[cc0], 16)76lxvw4x(39, %[cc0], %[sk])77addi(%[cc0], %[cc0], 16)78lxvw4x(40, %[cc0], %[sk])79addi(%[cc0], %[cc0], 16)80lxvw4x(41, %[cc0], %[sk])81addi(%[cc0], %[cc0], 16)82lxvw4x(42, %[cc0], %[sk])83li(%[cc0], 0)8485#if BR_POWER8_LE86/*87* v15 = constant for byteswapping words88*/89lxvw4x(47, 0, %[idx2be])90#endif91/*92* v28 = increment for IV counter.93*/94lxvw4x(60, 0, %[ctrinc])9596/*97* Load IV into v16..v1998*/99lxvw4x(48, %[cc0], %[ivbuf])100lxvw4x(49, %[cc1], %[ivbuf])101lxvw4x(50, %[cc2], %[ivbuf])102lxvw4x(51, %[cc3], %[ivbuf])103#if BR_POWER8_LE104vperm(16, 16, 16, 15)105vperm(17, 17, 17, 15)106vperm(18, 18, 18, 15)107vperm(19, 19, 19, 15)108#endif109110mtctr(%[num_blocks])111label(loop)112/*113* Compute next IV into v24..v27114*/115vadduwm(24, 16, 28)116vadduwm(25, 17, 28)117vadduwm(26, 18, 28)118vadduwm(27, 19, 28)119120/*121* Load next data blocks. We do this early on but we122* won't need them until IV encryption is done.123*/124lxvw4x(52, %[cc0], %[buf])125lxvw4x(53, %[cc1], %[buf])126lxvw4x(54, %[cc2], %[buf])127lxvw4x(55, %[cc3], %[buf])128129/*130* Encrypt the current IV.131*/132vxor(16, 16, 0)133vxor(17, 17, 0)134vxor(18, 18, 0)135vxor(19, 19, 0)136vcipher(16, 16, 1)137vcipher(17, 17, 1)138vcipher(18, 18, 1)139vcipher(19, 19, 1)140vcipher(16, 16, 2)141vcipher(17, 17, 2)142vcipher(18, 18, 2)143vcipher(19, 19, 2)144vcipher(16, 16, 3)145vcipher(17, 17, 3)146vcipher(18, 18, 3)147vcipher(19, 19, 3)148vcipher(16, 16, 4)149vcipher(17, 17, 4)150vcipher(18, 18, 4)151vcipher(19, 19, 4)152vcipher(16, 16, 5)153vcipher(17, 17, 5)154vcipher(18, 18, 5)155vcipher(19, 19, 5)156vcipher(16, 16, 6)157vcipher(17, 17, 6)158vcipher(18, 18, 6)159vcipher(19, 19, 6)160vcipher(16, 16, 7)161vcipher(17, 17, 7)162vcipher(18, 18, 7)163vcipher(19, 19, 7)164vcipher(16, 16, 8)165vcipher(17, 17, 8)166vcipher(18, 18, 8)167vcipher(19, 19, 8)168vcipher(16, 16, 9)169vcipher(17, 17, 9)170vcipher(18, 18, 9)171vcipher(19, 19, 9)172vcipherlast(16, 16, 10)173vcipherlast(17, 17, 10)174vcipherlast(18, 18, 10)175vcipherlast(19, 19, 10)176177#if BR_POWER8_LE178vperm(16, 16, 16, 15)179vperm(17, 17, 17, 15)180vperm(18, 18, 18, 15)181vperm(19, 19, 19, 15)182#endif183184/*185* Load next plaintext word and XOR with encrypted IV.186*/187vxor(16, 20, 16)188vxor(17, 21, 17)189vxor(18, 22, 18)190vxor(19, 23, 19)191stxvw4x(48, %[cc0], %[buf])192stxvw4x(49, %[cc1], %[buf])193stxvw4x(50, %[cc2], %[buf])194stxvw4x(51, %[cc3], %[buf])195196addi(%[buf], %[buf], 64)197198/*199* Update IV.200*/201vand(16, 24, 24)202vand(17, 25, 25)203vand(18, 26, 26)204vand(19, 27, 27)205206bdnz(loop)207208: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),209[buf] "+b" (buf)210: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),211[ctrinc] "b" (ctrinc)212#if BR_POWER8_LE213, [idx2be] "b" (idx2be)214#endif215: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",216"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",217"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",218"ctr", "memory"219);220}221222static void223ctr_192(const unsigned char *sk, const unsigned char *ivbuf,224unsigned char *buf, size_t num_blocks)225{226long cc0, cc1, cc2, cc3;227228#if BR_POWER8_LE229static const uint32_t idx2be[] = {2300x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C231};232#endif233static const uint32_t ctrinc[] = {2340, 0, 0, 4235};236237cc0 = 0;238cc1 = 16;239cc2 = 32;240cc3 = 48;241asm volatile (242243/*244* Load subkeys into v0..v12245*/246lxvw4x(32, %[cc0], %[sk])247addi(%[cc0], %[cc0], 16)248lxvw4x(33, %[cc0], %[sk])249addi(%[cc0], %[cc0], 16)250lxvw4x(34, %[cc0], %[sk])251addi(%[cc0], %[cc0], 16)252lxvw4x(35, %[cc0], %[sk])253addi(%[cc0], %[cc0], 16)254lxvw4x(36, %[cc0], %[sk])255addi(%[cc0], %[cc0], 16)256lxvw4x(37, %[cc0], %[sk])257addi(%[cc0], %[cc0], 16)258lxvw4x(38, %[cc0], %[sk])259addi(%[cc0], %[cc0], 16)260lxvw4x(39, %[cc0], %[sk])261addi(%[cc0], %[cc0], 16)262lxvw4x(40, %[cc0], %[sk])263addi(%[cc0], %[cc0], 16)264lxvw4x(41, %[cc0], %[sk])265addi(%[cc0], %[cc0], 16)266lxvw4x(42, %[cc0], %[sk])267addi(%[cc0], %[cc0], 16)268lxvw4x(43, %[cc0], %[sk])269addi(%[cc0], %[cc0], 16)270lxvw4x(44, %[cc0], %[sk])271li(%[cc0], 0)272273#if BR_POWER8_LE274/*275* v15 = constant for byteswapping words276*/277lxvw4x(47, 0, %[idx2be])278#endif279/*280* v28 = increment for IV counter.281*/282lxvw4x(60, 0, %[ctrinc])283284/*285* Load IV into v16..v19286*/287lxvw4x(48, %[cc0], %[ivbuf])288lxvw4x(49, %[cc1], %[ivbuf])289lxvw4x(50, %[cc2], %[ivbuf])290lxvw4x(51, %[cc3], %[ivbuf])291#if BR_POWER8_LE292vperm(16, 16, 16, 15)293vperm(17, 17, 17, 15)294vperm(18, 18, 18, 15)295vperm(19, 19, 19, 15)296#endif297298mtctr(%[num_blocks])299label(loop)300/*301* Compute next IV into v24..v27302*/303vadduwm(24, 16, 28)304vadduwm(25, 17, 28)305vadduwm(26, 18, 28)306vadduwm(27, 19, 28)307308/*309* Load next data blocks. We do this early on but we310* won't need them until IV encryption is done.311*/312lxvw4x(52, %[cc0], %[buf])313lxvw4x(53, %[cc1], %[buf])314lxvw4x(54, %[cc2], %[buf])315lxvw4x(55, %[cc3], %[buf])316317/*318* Encrypt the current IV.319*/320vxor(16, 16, 0)321vxor(17, 17, 0)322vxor(18, 18, 0)323vxor(19, 19, 0)324vcipher(16, 16, 1)325vcipher(17, 17, 1)326vcipher(18, 18, 1)327vcipher(19, 19, 1)328vcipher(16, 16, 2)329vcipher(17, 17, 2)330vcipher(18, 18, 2)331vcipher(19, 19, 2)332vcipher(16, 16, 3)333vcipher(17, 17, 3)334vcipher(18, 18, 3)335vcipher(19, 19, 3)336vcipher(16, 16, 4)337vcipher(17, 17, 4)338vcipher(18, 18, 4)339vcipher(19, 19, 4)340vcipher(16, 16, 5)341vcipher(17, 17, 5)342vcipher(18, 18, 5)343vcipher(19, 19, 5)344vcipher(16, 16, 6)345vcipher(17, 17, 6)346vcipher(18, 18, 6)347vcipher(19, 19, 6)348vcipher(16, 16, 7)349vcipher(17, 17, 7)350vcipher(18, 18, 7)351vcipher(19, 19, 7)352vcipher(16, 16, 8)353vcipher(17, 17, 8)354vcipher(18, 18, 8)355vcipher(19, 19, 8)356vcipher(16, 16, 9)357vcipher(17, 17, 9)358vcipher(18, 18, 9)359vcipher(19, 19, 9)360vcipher(16, 16, 10)361vcipher(17, 17, 10)362vcipher(18, 18, 10)363vcipher(19, 19, 10)364vcipher(16, 16, 11)365vcipher(17, 17, 11)366vcipher(18, 18, 11)367vcipher(19, 19, 11)368vcipherlast(16, 16, 12)369vcipherlast(17, 17, 12)370vcipherlast(18, 18, 12)371vcipherlast(19, 19, 12)372373#if BR_POWER8_LE374vperm(16, 16, 16, 15)375vperm(17, 17, 17, 15)376vperm(18, 18, 18, 15)377vperm(19, 19, 19, 15)378#endif379380/*381* Load next plaintext word and XOR with encrypted IV.382*/383vxor(16, 20, 16)384vxor(17, 21, 17)385vxor(18, 22, 18)386vxor(19, 23, 19)387stxvw4x(48, %[cc0], %[buf])388stxvw4x(49, %[cc1], %[buf])389stxvw4x(50, %[cc2], %[buf])390stxvw4x(51, %[cc3], %[buf])391392addi(%[buf], %[buf], 64)393394/*395* Update IV.396*/397vand(16, 24, 24)398vand(17, 25, 25)399vand(18, 26, 26)400vand(19, 27, 27)401402bdnz(loop)403404: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),405[buf] "+b" (buf)406: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),407[ctrinc] "b" (ctrinc)408#if BR_POWER8_LE409, [idx2be] "b" (idx2be)410#endif411: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",412"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",413"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",414"ctr", "memory"415);416}417418static void419ctr_256(const unsigned char *sk, const unsigned char *ivbuf,420unsigned char *buf, size_t num_blocks)421{422long cc0, cc1, cc2, cc3;423424#if BR_POWER8_LE425static const uint32_t idx2be[] = {4260x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C427};428#endif429static const uint32_t ctrinc[] = {4300, 0, 0, 4431};432433cc0 = 0;434cc1 = 16;435cc2 = 32;436cc3 = 48;437asm volatile (438439/*440* Load subkeys into v0..v14441*/442lxvw4x(32, %[cc0], %[sk])443addi(%[cc0], %[cc0], 16)444lxvw4x(33, %[cc0], %[sk])445addi(%[cc0], %[cc0], 16)446lxvw4x(34, %[cc0], %[sk])447addi(%[cc0], %[cc0], 16)448lxvw4x(35, %[cc0], %[sk])449addi(%[cc0], %[cc0], 16)450lxvw4x(36, %[cc0], %[sk])451addi(%[cc0], %[cc0], 16)452lxvw4x(37, %[cc0], %[sk])453addi(%[cc0], %[cc0], 16)454lxvw4x(38, %[cc0], %[sk])455addi(%[cc0], %[cc0], 16)456lxvw4x(39, %[cc0], %[sk])457addi(%[cc0], %[cc0], 16)458lxvw4x(40, %[cc0], %[sk])459addi(%[cc0], %[cc0], 16)460lxvw4x(41, %[cc0], %[sk])461addi(%[cc0], %[cc0], 16)462lxvw4x(42, %[cc0], %[sk])463addi(%[cc0], %[cc0], 16)464lxvw4x(43, %[cc0], %[sk])465addi(%[cc0], %[cc0], 16)466lxvw4x(44, %[cc0], %[sk])467addi(%[cc0], %[cc0], 16)468lxvw4x(45, %[cc0], %[sk])469addi(%[cc0], %[cc0], 16)470lxvw4x(46, %[cc0], %[sk])471li(%[cc0], 0)472473#if BR_POWER8_LE474/*475* v15 = constant for byteswapping words476*/477lxvw4x(47, 0, %[idx2be])478#endif479/*480* v28 = increment for IV counter.481*/482lxvw4x(60, 0, %[ctrinc])483484/*485* Load IV into v16..v19486*/487lxvw4x(48, %[cc0], %[ivbuf])488lxvw4x(49, %[cc1], %[ivbuf])489lxvw4x(50, %[cc2], %[ivbuf])490lxvw4x(51, %[cc3], %[ivbuf])491#if BR_POWER8_LE492vperm(16, 16, 16, 15)493vperm(17, 17, 17, 15)494vperm(18, 18, 18, 15)495vperm(19, 19, 19, 15)496#endif497498mtctr(%[num_blocks])499label(loop)500/*501* Compute next IV into v24..v27502*/503vadduwm(24, 16, 28)504vadduwm(25, 17, 28)505vadduwm(26, 18, 28)506vadduwm(27, 19, 28)507508/*509* Load next data blocks. We do this early on but we510* won't need them until IV encryption is done.511*/512lxvw4x(52, %[cc0], %[buf])513lxvw4x(53, %[cc1], %[buf])514lxvw4x(54, %[cc2], %[buf])515lxvw4x(55, %[cc3], %[buf])516517/*518* Encrypt the current IV.519*/520vxor(16, 16, 0)521vxor(17, 17, 0)522vxor(18, 18, 0)523vxor(19, 19, 0)524vcipher(16, 16, 1)525vcipher(17, 17, 1)526vcipher(18, 18, 1)527vcipher(19, 19, 1)528vcipher(16, 16, 2)529vcipher(17, 17, 2)530vcipher(18, 18, 2)531vcipher(19, 19, 2)532vcipher(16, 16, 3)533vcipher(17, 17, 3)534vcipher(18, 18, 3)535vcipher(19, 19, 3)536vcipher(16, 16, 4)537vcipher(17, 17, 4)538vcipher(18, 18, 4)539vcipher(19, 19, 4)540vcipher(16, 16, 5)541vcipher(17, 17, 5)542vcipher(18, 18, 5)543vcipher(19, 19, 5)544vcipher(16, 16, 6)545vcipher(17, 17, 6)546vcipher(18, 18, 6)547vcipher(19, 19, 6)548vcipher(16, 16, 7)549vcipher(17, 17, 7)550vcipher(18, 18, 7)551vcipher(19, 19, 7)552vcipher(16, 16, 8)553vcipher(17, 17, 8)554vcipher(18, 18, 8)555vcipher(19, 19, 8)556vcipher(16, 16, 9)557vcipher(17, 17, 9)558vcipher(18, 18, 9)559vcipher(19, 19, 9)560vcipher(16, 16, 10)561vcipher(17, 17, 10)562vcipher(18, 18, 10)563vcipher(19, 19, 10)564vcipher(16, 16, 11)565vcipher(17, 17, 11)566vcipher(18, 18, 11)567vcipher(19, 19, 11)568vcipher(16, 16, 12)569vcipher(17, 17, 12)570vcipher(18, 18, 12)571vcipher(19, 19, 12)572vcipher(16, 16, 13)573vcipher(17, 17, 13)574vcipher(18, 18, 13)575vcipher(19, 19, 13)576vcipherlast(16, 16, 14)577vcipherlast(17, 17, 14)578vcipherlast(18, 18, 14)579vcipherlast(19, 19, 14)580581#if BR_POWER8_LE582vperm(16, 16, 16, 15)583vperm(17, 17, 17, 15)584vperm(18, 18, 18, 15)585vperm(19, 19, 19, 15)586#endif587588/*589* Load next plaintext word and XOR with encrypted IV.590*/591vxor(16, 20, 16)592vxor(17, 21, 17)593vxor(18, 22, 18)594vxor(19, 23, 19)595stxvw4x(48, %[cc0], %[buf])596stxvw4x(49, %[cc1], %[buf])597stxvw4x(50, %[cc2], %[buf])598stxvw4x(51, %[cc3], %[buf])599600addi(%[buf], %[buf], 64)601602/*603* Update IV.604*/605vand(16, 24, 24)606vand(17, 25, 25)607vand(18, 26, 26)608vand(19, 27, 27)609610bdnz(loop)611612: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),613[buf] "+b" (buf)614: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),615[ctrinc] "b" (ctrinc)616#if BR_POWER8_LE617, [idx2be] "b" (idx2be)618#endif619: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",620"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",621"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",622"ctr", "memory"623);624}625626/* see bearssl_block.h */627uint32_t628br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,629const void *iv, uint32_t cc, void *data, size_t len)630{631unsigned char *buf;632unsigned char ivbuf[64];633634buf = data;635memcpy(ivbuf + 0, iv, 12);636memcpy(ivbuf + 16, iv, 12);637memcpy(ivbuf + 32, iv, 12);638memcpy(ivbuf + 48, iv, 12);639if (len >= 64) {640br_enc32be(ivbuf + 12, cc + 0);641br_enc32be(ivbuf + 28, cc + 1);642br_enc32be(ivbuf + 44, cc + 2);643br_enc32be(ivbuf + 60, cc + 3);644switch (ctx->num_rounds) {645case 10:646ctr_128(ctx->skey.skni, ivbuf, buf,647(len >> 4) & ~(size_t)3);648break;649case 12:650ctr_192(ctx->skey.skni, ivbuf, buf,651(len >> 4) & ~(size_t)3);652break;653default:654ctr_256(ctx->skey.skni, ivbuf, buf,655(len >> 4) & ~(size_t)3);656break;657}658cc += (len >> 4) & ~(size_t)3;659buf += len & ~(size_t)63;660len &= 63;661}662if (len > 0) {663unsigned char tmp[64];664665memcpy(tmp, buf, len);666memset(tmp + len, 0, (sizeof tmp) - len);667br_enc32be(ivbuf + 12, cc + 0);668br_enc32be(ivbuf + 28, cc + 1);669br_enc32be(ivbuf + 44, cc + 2);670br_enc32be(ivbuf + 60, cc + 3);671switch (ctx->num_rounds) {672case 10:673ctr_128(ctx->skey.skni, ivbuf, tmp, 4);674break;675case 12:676ctr_192(ctx->skey.skni, ivbuf, tmp, 4);677break;678default:679ctr_256(ctx->skey.skni, ivbuf, tmp, 4);680break;681}682memcpy(buf, tmp, len);683cc += (len + 15) >> 4;684}685return cc;686}687688/* see bearssl_block.h */689const br_block_ctr_class br_aes_pwr8_ctr_vtable = {690sizeof(br_aes_pwr8_ctr_keys),69116,6924,693(void (*)(const br_block_ctr_class **, const void *, size_t))694&br_aes_pwr8_ctr_init,695(uint32_t (*)(const br_block_ctr_class *const *,696const void *, uint32_t, void *, size_t))697&br_aes_pwr8_ctr_run698};699700/* see bearssl_block.h */701const br_block_ctr_class *702br_aes_pwr8_ctr_get_vtable(void)703{704return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;705}706707#else708709/* see bearssl_block.h */710const br_block_ctr_class *711br_aes_pwr8_ctr_get_vtable(void)712{713return NULL;714}715716#endif717718719