Path: blob/main/contrib/bearssl/src/symcipher/aes_pwr8_ctrcbc.c
39482 views
/*1* Copyright (c) 2018 Thomas Pornin <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining4* a copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sublicense, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* The above copyright notice and this permission notice shall be12* included in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,15* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF16* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND17* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS18* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN19* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN20* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#define BR_POWER_ASM_MACROS 125#include "inner.h"2627#if BR_POWER82829/* see bearssl_block.h */30const br_block_ctrcbc_class *31br_aes_pwr8_ctrcbc_get_vtable(void)32{33return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL;34}3536/* see bearssl_block.h */37void38br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,39const void *key, size_t len)40{41ctx->vtable = &br_aes_pwr8_ctrcbc_vtable;42ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);43}4445/*46* Register conventions for CTR + CBC-MAC:47*48* AES subkeys are in registers 0 to 10/12/14 (depending on keys size)49* Register v15 contains the byteswap index register (little-endian only)50* Register v16 contains the CTR counter value51* Register v17 contains the CBC-MAC current value52* Registers v18 to v27 are scratch53* Counter increment uses v28, v29 and v3054*55* For CTR alone:56*57* AES subkeys are in registers 0 to 10/12/14 (depending on keys size)58* Register v15 contains the byteswap index register (little-endian only)59* Registers v16 to v19 contain the CTR counter values (four blocks)60* Registers v20 to v27 are scratch61* Counter increment uses v28, v29 and v3062*/6364#define LOAD_SUBKEYS_128 \65lxvw4x(32, %[cc], %[sk]) \66addi(%[cc], %[cc], 16) \67lxvw4x(33, %[cc], %[sk]) \68addi(%[cc], %[cc], 16) \69lxvw4x(34, %[cc], %[sk]) \70addi(%[cc], %[cc], 16) \71lxvw4x(35, %[cc], %[sk]) \72addi(%[cc], %[cc], 16) \73lxvw4x(36, %[cc], %[sk]) \74addi(%[cc], %[cc], 16) \75lxvw4x(37, %[cc], %[sk]) \76addi(%[cc], %[cc], 16) \77lxvw4x(38, %[cc], %[sk]) \78addi(%[cc], %[cc], 16) \79lxvw4x(39, %[cc], %[sk]) \80addi(%[cc], %[cc], 16) \81lxvw4x(40, %[cc], %[sk]) \82addi(%[cc], %[cc], 16) \83lxvw4x(41, %[cc], %[sk]) \84addi(%[cc], %[cc], 16) \85lxvw4x(42, %[cc], %[sk])8687#define LOAD_SUBKEYS_192 \88LOAD_SUBKEYS_128 \89addi(%[cc], %[cc], 16) \90lxvw4x(43, %[cc], %[sk]) \91addi(%[cc], %[cc], 16) \92lxvw4x(44, %[cc], %[sk])9394#define LOAD_SUBKEYS_256 \95LOAD_SUBKEYS_192 \96addi(%[cc], %[cc], 16) \97lxvw4x(45, %[cc], %[sk]) \98addi(%[cc], %[cc], 16) \99lxvw4x(46, %[cc], %[sk])100101#define BLOCK_ENCRYPT_128(x) \102vxor(x, x, 0) \103vcipher(x, x, 1) \104vcipher(x, x, 2) \105vcipher(x, x, 3) \106vcipher(x, x, 4) \107vcipher(x, x, 5) \108vcipher(x, x, 6) \109vcipher(x, x, 7) \110vcipher(x, x, 8) \111vcipher(x, x, 9) \112vcipherlast(x, x, 10)113114#define BLOCK_ENCRYPT_192(x) \115vxor(x, x, 0) \116vcipher(x, x, 1) \117vcipher(x, x, 2) \118vcipher(x, x, 3) \119vcipher(x, x, 4) \120vcipher(x, x, 5) \121vcipher(x, x, 6) \122vcipher(x, x, 7) \123vcipher(x, x, 8) \124vcipher(x, x, 9) \125vcipher(x, x, 10) \126vcipher(x, x, 11) \127vcipherlast(x, x, 12)128129#define BLOCK_ENCRYPT_256(x) \130vxor(x, x, 0) \131vcipher(x, x, 1) \132vcipher(x, x, 2) \133vcipher(x, x, 3) \134vcipher(x, x, 4) \135vcipher(x, x, 5) \136vcipher(x, x, 6) \137vcipher(x, x, 7) \138vcipher(x, x, 8) \139vcipher(x, x, 9) \140vcipher(x, x, 10) \141vcipher(x, x, 11) \142vcipher(x, x, 12) \143vcipher(x, x, 13) \144vcipherlast(x, x, 14)145146#define BLOCK_ENCRYPT_X2_128(x, y) \147vxor(x, x, 0) \148vxor(y, y, 0) \149vcipher(x, x, 1) \150vcipher(y, y, 1) \151vcipher(x, x, 2) \152vcipher(y, y, 2) \153vcipher(x, x, 3) \154vcipher(y, y, 3) \155vcipher(x, x, 4) \156vcipher(y, y, 4) \157vcipher(x, x, 5) \158vcipher(y, y, 5) \159vcipher(x, x, 6) \160vcipher(y, y, 6) \161vcipher(x, x, 7) \162vcipher(y, y, 7) \163vcipher(x, x, 8) \164vcipher(y, y, 8) \165vcipher(x, x, 9) \166vcipher(y, y, 9) \167vcipherlast(x, x, 10) \168vcipherlast(y, y, 10)169170#define BLOCK_ENCRYPT_X2_192(x, y) \171vxor(x, x, 0) \172vxor(y, y, 0) \173vcipher(x, x, 1) \174vcipher(y, y, 1) \175vcipher(x, x, 2) \176vcipher(y, y, 2) \177vcipher(x, x, 3) \178vcipher(y, y, 3) \179vcipher(x, x, 4) \180vcipher(y, y, 4) \181vcipher(x, x, 5) \182vcipher(y, y, 5) \183vcipher(x, x, 6) \184vcipher(y, y, 6) \185vcipher(x, x, 7) \186vcipher(y, y, 7) \187vcipher(x, x, 8) \188vcipher(y, y, 8) \189vcipher(x, x, 9) \190vcipher(y, y, 9) \191vcipher(x, x, 10) \192vcipher(y, y, 10) \193vcipher(x, x, 11) \194vcipher(y, y, 11) \195vcipherlast(x, x, 12) \196vcipherlast(y, y, 12)197198#define BLOCK_ENCRYPT_X2_256(x, y) \199vxor(x, x, 0) \200vxor(y, y, 0) \201vcipher(x, x, 1) \202vcipher(y, y, 1) \203vcipher(x, x, 2) \204vcipher(y, y, 2) \205vcipher(x, x, 3) \206vcipher(y, y, 3) \207vcipher(x, x, 4) \208vcipher(y, y, 4) \209vcipher(x, x, 5) \210vcipher(y, y, 5) \211vcipher(x, x, 6) \212vcipher(y, y, 6) \213vcipher(x, x, 7) \214vcipher(y, y, 7) \215vcipher(x, x, 8) \216vcipher(y, y, 8) \217vcipher(x, x, 9) \218vcipher(y, y, 9) \219vcipher(x, x, 10) \220vcipher(y, y, 10) \221vcipher(x, x, 11) \222vcipher(y, y, 11) \223vcipher(x, x, 12) \224vcipher(y, y, 12) \225vcipher(x, x, 13) \226vcipher(y, y, 13) \227vcipherlast(x, x, 14) \228vcipherlast(y, y, 14)229230#define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \231vxor(x0, x0, 0) \232vxor(x1, x1, 0) \233vxor(x2, x2, 0) \234vxor(x3, x3, 0) \235vcipher(x0, x0, 1) \236vcipher(x1, x1, 1) \237vcipher(x2, x2, 1) \238vcipher(x3, x3, 1) \239vcipher(x0, x0, 2) \240vcipher(x1, x1, 2) \241vcipher(x2, x2, 2) \242vcipher(x3, x3, 2) \243vcipher(x0, x0, 3) \244vcipher(x1, x1, 3) \245vcipher(x2, x2, 3) \246vcipher(x3, x3, 3) \247vcipher(x0, x0, 4) \248vcipher(x1, x1, 4) \249vcipher(x2, x2, 4) \250vcipher(x3, x3, 4) \251vcipher(x0, x0, 5) \252vcipher(x1, x1, 5) \253vcipher(x2, x2, 5) \254vcipher(x3, x3, 5) \255vcipher(x0, x0, 6) \256vcipher(x1, x1, 6) \257vcipher(x2, x2, 6) \258vcipher(x3, x3, 6) \259vcipher(x0, x0, 7) \260vcipher(x1, x1, 7) \261vcipher(x2, x2, 7) \262vcipher(x3, x3, 7) \263vcipher(x0, x0, 8) \264vcipher(x1, x1, 8) \265vcipher(x2, x2, 8) \266vcipher(x3, x3, 8) \267vcipher(x0, x0, 9) \268vcipher(x1, x1, 9) \269vcipher(x2, x2, 9) \270vcipher(x3, x3, 9) \271vcipherlast(x0, x0, 10) \272vcipherlast(x1, x1, 10) \273vcipherlast(x2, x2, 10) \274vcipherlast(x3, x3, 10)275276#define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \277vxor(x0, x0, 0) \278vxor(x1, x1, 0) \279vxor(x2, x2, 0) \280vxor(x3, x3, 0) \281vcipher(x0, x0, 1) \282vcipher(x1, x1, 1) \283vcipher(x2, x2, 1) \284vcipher(x3, x3, 1) \285vcipher(x0, x0, 2) \286vcipher(x1, x1, 2) \287vcipher(x2, x2, 2) \288vcipher(x3, x3, 2) \289vcipher(x0, x0, 3) \290vcipher(x1, x1, 3) \291vcipher(x2, x2, 3) \292vcipher(x3, x3, 3) \293vcipher(x0, x0, 4) \294vcipher(x1, x1, 4) \295vcipher(x2, x2, 4) \296vcipher(x3, x3, 4) \297vcipher(x0, x0, 5) \298vcipher(x1, x1, 5) \299vcipher(x2, x2, 5) \300vcipher(x3, x3, 5) \301vcipher(x0, x0, 6) \302vcipher(x1, x1, 6) \303vcipher(x2, x2, 6) \304vcipher(x3, x3, 6) \305vcipher(x0, x0, 7) \306vcipher(x1, x1, 7) \307vcipher(x2, x2, 7) \308vcipher(x3, x3, 7) \309vcipher(x0, x0, 8) \310vcipher(x1, x1, 8) \311vcipher(x2, x2, 8) \312vcipher(x3, x3, 8) \313vcipher(x0, x0, 9) \314vcipher(x1, x1, 9) \315vcipher(x2, x2, 9) \316vcipher(x3, x3, 9) \317vcipher(x0, x0, 10) \318vcipher(x1, x1, 10) \319vcipher(x2, x2, 10) \320vcipher(x3, x3, 10) \321vcipher(x0, x0, 11) \322vcipher(x1, x1, 11) \323vcipher(x2, x2, 11) \324vcipher(x3, x3, 11) \325vcipherlast(x0, x0, 12) \326vcipherlast(x1, x1, 12) \327vcipherlast(x2, x2, 12) \328vcipherlast(x3, x3, 12)329330#define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \331vxor(x0, x0, 0) \332vxor(x1, x1, 0) \333vxor(x2, x2, 0) \334vxor(x3, x3, 0) \335vcipher(x0, x0, 1) \336vcipher(x1, x1, 1) \337vcipher(x2, x2, 1) \338vcipher(x3, x3, 1) \339vcipher(x0, x0, 2) \340vcipher(x1, x1, 2) \341vcipher(x2, x2, 2) \342vcipher(x3, x3, 2) \343vcipher(x0, x0, 3) \344vcipher(x1, x1, 3) \345vcipher(x2, x2, 3) \346vcipher(x3, x3, 3) \347vcipher(x0, x0, 4) \348vcipher(x1, x1, 4) \349vcipher(x2, x2, 4) \350vcipher(x3, x3, 4) \351vcipher(x0, x0, 5) \352vcipher(x1, x1, 5) \353vcipher(x2, x2, 5) \354vcipher(x3, x3, 5) \355vcipher(x0, x0, 6) \356vcipher(x1, x1, 6) \357vcipher(x2, x2, 6) \358vcipher(x3, x3, 6) \359vcipher(x0, x0, 7) \360vcipher(x1, x1, 7) \361vcipher(x2, x2, 7) \362vcipher(x3, x3, 7) \363vcipher(x0, x0, 8) \364vcipher(x1, x1, 8) \365vcipher(x2, x2, 8) \366vcipher(x3, x3, 8) \367vcipher(x0, x0, 9) \368vcipher(x1, x1, 9) \369vcipher(x2, x2, 9) \370vcipher(x3, x3, 9) \371vcipher(x0, x0, 10) \372vcipher(x1, x1, 10) \373vcipher(x2, x2, 10) \374vcipher(x3, x3, 10) \375vcipher(x0, x0, 11) \376vcipher(x1, x1, 11) \377vcipher(x2, x2, 11) \378vcipher(x3, x3, 11) \379vcipher(x0, x0, 12) \380vcipher(x1, x1, 12) \381vcipher(x2, x2, 12) \382vcipher(x3, x3, 12) \383vcipher(x0, x0, 13) \384vcipher(x1, x1, 13) \385vcipher(x2, x2, 13) \386vcipher(x3, x3, 13) \387vcipherlast(x0, x0, 14) \388vcipherlast(x1, x1, 14) \389vcipherlast(x2, x2, 14) \390vcipherlast(x3, x3, 14)391392#if BR_POWER8_LE393static const uint32_t idx2be[] = {3940x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C395};396#define BYTESWAP_INIT lxvw4x(47, 0, %[idx2be])397#define BYTESWAP(x) vperm(x, x, x, 15)398#define BYTESWAPX(d, s) vperm(d, s, s, 15)399#define BYTESWAP_REG , [idx2be] "b" (idx2be)400#else401#define BYTESWAP_INIT402#define BYTESWAP(x)403#define BYTESWAPX(d, s) vand(d, s, s)404#define BYTESWAP_REG405#endif406407static const uint32_t ctrinc[] = {4080, 0, 0, 1409};410static const uint32_t ctrinc_x4[] = {4110, 0, 0, 4412};413#define INCR_128_INIT lxvw4x(60, 0, %[ctrinc])414#define INCR_128_X4_INIT lxvw4x(60, 0, %[ctrinc_x4])415#define INCR_128(d, s) \416vaddcuw(29, s, 28) \417vadduwm(d, s, 28) \418vsldoi(30, 29, 29, 4) \419vaddcuw(29, d, 30) \420vadduwm(d, d, 30) \421vsldoi(30, 29, 29, 4) \422vaddcuw(29, d, 30) \423vadduwm(d, d, 30) \424vsldoi(30, 29, 29, 4) \425vadduwm(d, d, 30)426427#define MKCTR(size) \428static void \429ctr_ ## size(const unsigned char *sk, \430unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \431{ \432long cc, cc0, cc1, cc2, cc3; \433\434cc = 0; \435cc0 = 0; \436cc1 = 16; \437cc2 = 32; \438cc3 = 48; \439asm volatile ( \440\441/* \442* Load subkeys into v0..v10 \443*/ \444LOAD_SUBKEYS_ ## size \445li(%[cc], 0) \446\447BYTESWAP_INIT \448INCR_128_X4_INIT \449\450/* \451* Load current CTR counters into v16 to v19. \452*/ \453lxvw4x(48, %[cc0], %[ctrbuf]) \454lxvw4x(49, %[cc1], %[ctrbuf]) \455lxvw4x(50, %[cc2], %[ctrbuf]) \456lxvw4x(51, %[cc3], %[ctrbuf]) \457BYTESWAP(16) \458BYTESWAP(17) \459BYTESWAP(18) \460BYTESWAP(19) \461\462mtctr(%[num_blocks_x4]) \463\464label(loop) \465/* \466* Compute next counter values into v20..v23. \467*/ \468INCR_128(20, 16) \469INCR_128(21, 17) \470INCR_128(22, 18) \471INCR_128(23, 19) \472\473/* \474* Encrypt counter values and XOR into next data blocks. \475*/ \476lxvw4x(56, %[cc0], %[buf]) \477lxvw4x(57, %[cc1], %[buf]) \478lxvw4x(58, %[cc2], %[buf]) \479lxvw4x(59, %[cc3], %[buf]) \480BYTESWAP(24) \481BYTESWAP(25) \482BYTESWAP(26) \483BYTESWAP(27) \484BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \485vxor(16, 16, 24) \486vxor(17, 17, 25) \487vxor(18, 18, 26) \488vxor(19, 19, 27) \489BYTESWAP(16) \490BYTESWAP(17) \491BYTESWAP(18) \492BYTESWAP(19) \493stxvw4x(48, %[cc0], %[buf]) \494stxvw4x(49, %[cc1], %[buf]) \495stxvw4x(50, %[cc2], %[buf]) \496stxvw4x(51, %[cc3], %[buf]) \497\498/* \499* Update counters and data pointer. \500*/ \501vand(16, 20, 20) \502vand(17, 21, 21) \503vand(18, 22, 22) \504vand(19, 23, 23) \505addi(%[buf], %[buf], 64) \506\507bdnz(loop) \508\509/* \510* Write back new counter values. \511*/ \512BYTESWAP(16) \513BYTESWAP(17) \514BYTESWAP(18) \515BYTESWAP(19) \516stxvw4x(48, %[cc0], %[ctrbuf]) \517stxvw4x(49, %[cc1], %[ctrbuf]) \518stxvw4x(50, %[cc2], %[ctrbuf]) \519stxvw4x(51, %[cc3], %[ctrbuf]) \520\521: [cc] "+b" (cc), [buf] "+b" (buf), \522[cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \523: [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \524[num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \525BYTESWAP_REG \526: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \527"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \528"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \529"v30", "ctr", "memory" \530); \531}532533MKCTR(128)534MKCTR(192)535MKCTR(256)536537#define MKCBCMAC(size) \538static void \539cbcmac_ ## size(const unsigned char *sk, \540unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \541{ \542long cc; \543\544cc = 0; \545asm volatile ( \546\547/* \548* Load subkeys into v0..v10 \549*/ \550LOAD_SUBKEYS_ ## size \551li(%[cc], 0) \552\553BYTESWAP_INIT \554\555/* \556* Load current CBC-MAC value into v16. \557*/ \558lxvw4x(48, %[cc], %[cbcmac]) \559BYTESWAP(16) \560\561mtctr(%[num_blocks]) \562\563label(loop) \564/* \565* Load next block, XOR into current CBC-MAC value, \566* and then encrypt it. \567*/ \568lxvw4x(49, %[cc], %[buf]) \569BYTESWAP(17) \570vxor(16, 16, 17) \571BLOCK_ENCRYPT_ ## size(16) \572addi(%[buf], %[buf], 16) \573\574bdnz(loop) \575\576/* \577* Write back new CBC-MAC value. \578*/ \579BYTESWAP(16) \580stxvw4x(48, %[cc], %[cbcmac]) \581\582: [cc] "+b" (cc), [buf] "+b" (buf) \583: [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \584BYTESWAP_REG \585: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \586"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \587"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \588"v30", "ctr", "memory" \589); \590}591592MKCBCMAC(128)593MKCBCMAC(192)594MKCBCMAC(256)595596#define MKENCRYPT(size) \597static void \598ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \599unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \600size_t num_blocks) \601{ \602long cc; \603\604cc = 0; \605asm volatile ( \606\607/* \608* Load subkeys into v0..v10 \609*/ \610LOAD_SUBKEYS_ ## size \611li(%[cc], 0) \612\613BYTESWAP_INIT \614INCR_128_INIT \615\616/* \617* Load current CTR counter into v16, and current \618* CBC-MAC IV into v17. \619*/ \620lxvw4x(48, %[cc], %[ctr]) \621lxvw4x(49, %[cc], %[cbcmac]) \622BYTESWAP(16) \623BYTESWAP(17) \624\625/* \626* At each iteration, we do two parallel encryption: \627* - new counter value for encryption of the next block; \628* - CBC-MAC over the previous encrypted block. \629* Thus, each plaintext block implies two AES instances, \630* over two successive iterations. This requires a single \631* counter encryption before the loop, and a single \632* CBC-MAC encryption after the loop. \633*/ \634\635/* \636* Encrypt first block (into v20). \637*/ \638lxvw4x(52, %[cc], %[buf]) \639BYTESWAP(20) \640INCR_128(22, 16) \641BLOCK_ENCRYPT_ ## size(16) \642vxor(20, 20, 16) \643BYTESWAPX(21, 20) \644stxvw4x(53, %[cc], %[buf]) \645vand(16, 22, 22) \646addi(%[buf], %[buf], 16) \647\648/* \649* Load loop counter; skip the loop if there is only \650* one block in total (already handled by the boundary \651* conditions). \652*/ \653mtctr(%[num_blocks]) \654bdz(fastexit) \655\656label(loop) \657/* \658* Upon loop entry: \659* v16 counter value for next block \660* v17 current CBC-MAC value \661* v20 encrypted previous block \662*/ \663vxor(17, 17, 20) \664INCR_128(22, 16) \665lxvw4x(52, %[cc], %[buf]) \666BYTESWAP(20) \667BLOCK_ENCRYPT_X2_ ## size(16, 17) \668vxor(20, 20, 16) \669BYTESWAPX(21, 20) \670stxvw4x(53, %[cc], %[buf]) \671addi(%[buf], %[buf], 16) \672vand(16, 22, 22) \673\674bdnz(loop) \675\676label(fastexit) \677vxor(17, 17, 20) \678BLOCK_ENCRYPT_ ## size(17) \679BYTESWAP(16) \680BYTESWAP(17) \681stxvw4x(48, %[cc], %[ctr]) \682stxvw4x(49, %[cc], %[cbcmac]) \683\684: [cc] "+b" (cc), [buf] "+b" (buf) \685: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \686[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \687BYTESWAP_REG \688: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \689"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \690"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \691"v30", "ctr", "memory" \692); \693}694695MKENCRYPT(128)696MKENCRYPT(192)697MKENCRYPT(256)698699#define MKDECRYPT(size) \700static void \701ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \702unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \703size_t num_blocks) \704{ \705long cc; \706\707cc = 0; \708asm volatile ( \709\710/* \711* Load subkeys into v0..v10 \712*/ \713LOAD_SUBKEYS_ ## size \714li(%[cc], 0) \715\716BYTESWAP_INIT \717INCR_128_INIT \718\719/* \720* Load current CTR counter into v16, and current \721* CBC-MAC IV into v17. \722*/ \723lxvw4x(48, %[cc], %[ctr]) \724lxvw4x(49, %[cc], %[cbcmac]) \725BYTESWAP(16) \726BYTESWAP(17) \727\728/* \729* At each iteration, we do two parallel encryption: \730* - new counter value for decryption of the next block; \731* - CBC-MAC over the next encrypted block. \732* Each iteration performs the two AES instances related \733* to the current block; there is thus no need for some \734* extra pre-loop and post-loop work as in encryption. \735*/ \736\737mtctr(%[num_blocks]) \738\739label(loop) \740/* \741* Upon loop entry: \742* v16 counter value for next block \743* v17 current CBC-MAC value \744*/ \745lxvw4x(52, %[cc], %[buf]) \746BYTESWAP(20) \747vxor(17, 17, 20) \748INCR_128(22, 16) \749BLOCK_ENCRYPT_X2_ ## size(16, 17) \750vxor(20, 20, 16) \751BYTESWAPX(21, 20) \752stxvw4x(53, %[cc], %[buf]) \753addi(%[buf], %[buf], 16) \754vand(16, 22, 22) \755\756bdnz(loop) \757\758/* \759* Store back counter and CBC-MAC value. \760*/ \761BYTESWAP(16) \762BYTESWAP(17) \763stxvw4x(48, %[cc], %[ctr]) \764stxvw4x(49, %[cc], %[cbcmac]) \765\766: [cc] "+b" (cc), [buf] "+b" (buf) \767: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \768[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \769BYTESWAP_REG \770: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \771"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \772"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \773"v30", "ctr", "memory" \774); \775}776777MKDECRYPT(128)778MKDECRYPT(192)779MKDECRYPT(256)780781/* see bearssl_block.h */782void783br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,784void *ctr, void *cbcmac, void *data, size_t len)785{786if (len == 0) {787return;788}789switch (ctx->num_rounds) {790case 10:791ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);792break;793case 12:794ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);795break;796default:797ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);798break;799}800}801802/* see bearssl_block.h */803void804br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,805void *ctr, void *cbcmac, void *data, size_t len)806{807if (len == 0) {808return;809}810switch (ctx->num_rounds) {811case 10:812ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);813break;814case 12:815ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);816break;817default:818ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);819break;820}821}822823static inline void824incr_ctr(void *dst, const void *src)825{826uint64_t hi, lo;827828hi = br_dec64be(src);829lo = br_dec64be((const unsigned char *)src + 8);830lo ++;831hi += ((lo | -lo) >> 63) ^ (uint64_t)1;832br_enc64be(dst, hi);833br_enc64be((unsigned char *)dst + 8, lo);834}835836/* see bearssl_block.h */837void838br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,839void *ctr, void *data, size_t len)840{841unsigned char ctrbuf[64];842843memcpy(ctrbuf, ctr, 16);844incr_ctr(ctrbuf + 16, ctrbuf);845incr_ctr(ctrbuf + 32, ctrbuf + 16);846incr_ctr(ctrbuf + 48, ctrbuf + 32);847if (len >= 64) {848switch (ctx->num_rounds) {849case 10:850ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6);851break;852case 12:853ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6);854break;855default:856ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6);857break;858}859data = (unsigned char *)data + (len & ~(size_t)63);860len &= 63;861}862if (len > 0) {863unsigned char tmp[64];864865if (len >= 32) {866if (len >= 48) {867memcpy(ctr, ctrbuf + 48, 16);868} else {869memcpy(ctr, ctrbuf + 32, 16);870}871} else {872if (len >= 16) {873memcpy(ctr, ctrbuf + 16, 16);874}875}876memcpy(tmp, data, len);877memset(tmp + len, 0, (sizeof tmp) - len);878switch (ctx->num_rounds) {879case 10:880ctr_128(ctx->skey.skni, ctrbuf, tmp, 1);881break;882case 12:883ctr_192(ctx->skey.skni, ctrbuf, tmp, 1);884break;885default:886ctr_256(ctx->skey.skni, ctrbuf, tmp, 1);887break;888}889memcpy(data, tmp, len);890} else {891memcpy(ctr, ctrbuf, 16);892}893}894895/* see bearssl_block.h */896void897br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,898void *cbcmac, const void *data, size_t len)899{900if (len > 0) {901switch (ctx->num_rounds) {902case 10:903cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4);904break;905case 12:906cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4);907break;908default:909cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4);910break;911}912}913}914915/* see bearssl_block.h */916const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = {917sizeof(br_aes_pwr8_ctrcbc_keys),91816,9194,920(void (*)(const br_block_ctrcbc_class **, const void *, size_t))921&br_aes_pwr8_ctrcbc_init,922(void (*)(const br_block_ctrcbc_class *const *,923void *, void *, void *, size_t))924&br_aes_pwr8_ctrcbc_encrypt,925(void (*)(const br_block_ctrcbc_class *const *,926void *, void *, void *, size_t))927&br_aes_pwr8_ctrcbc_decrypt,928(void (*)(const br_block_ctrcbc_class *const *,929void *, void *, size_t))930&br_aes_pwr8_ctrcbc_ctr,931(void (*)(const br_block_ctrcbc_class *const *,932void *, const void *, size_t))933&br_aes_pwr8_ctrcbc_mac934};935936#else937938/* see bearssl_block.h */939const br_block_ctrcbc_class *940br_aes_pwr8_ctrcbc_get_vtable(void)941{942return NULL;943}944945#endif946947948