Path: blob/master/arch/riscv/crypto/aes-riscv64-zvkned.S
54609 views
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */1//2// This file is dual-licensed, meaning that you can use it under your3// choice of either of the following two licenses:4//5// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.6//7// Licensed under the Apache License 2.0 (the "License"). You can obtain8// a copy in the file LICENSE in the source distribution or at9// https://www.openssl.org/source/license.html10//11// or12//13// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>14// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>15// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>16// Copyright 2024 Google LLC17// All rights reserved.18//19// Redistribution and use in source and binary forms, with or without20// modification, are permitted provided that the following conditions21// are met:22// 1. Redistributions of source code must retain the above copyright23// notice, this list of conditions and the following disclaimer.24// 2. Redistributions in binary form must reproduce the above copyright25// notice, this list of conditions and the following disclaimer in the26// documentation and/or other materials provided with the distribution.27//28// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS29// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT30// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR31// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT32// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,33// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT34// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,35// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY36// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT37// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE38// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.3940// The generated code of this file depends on the following RISC-V extensions:41// - RV64I42// - RISC-V Vector ('V') with VLEN >= 12843// - RISC-V Vector AES block cipher extension ('Zvkned')4445#include <linux/linkage.h>4647.text48.option arch, +zvkned4950#include "aes-macros.S"5152#define KEYP a053#define INP a154#define OUTP a255#define LEN a356#define IVP a45758.macro __aes_ecb_crypt enc, keylen59srli t0, LEN, 260// t0 is the remaining length in 32-bit words. It's a multiple of 4.611:62vsetvli t1, t0, e32, m8, ta, ma63sub t0, t0, t1 // Subtract number of words processed64slli t1, t1, 2 // Words to bytes65vle32.v v16, (INP)66aes_crypt v16, \enc, \keylen67vse32.v v16, (OUTP)68add INP, INP, t169add OUTP, OUTP, t170bnez t0, 1b7172ret73.endm7475.macro aes_ecb_crypt enc76aes_begin KEYP, 128f, 192f77__aes_ecb_crypt \enc, 25678128:79__aes_ecb_crypt \enc, 12880192:81__aes_ecb_crypt \enc, 19282.endm8384// void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,85// const u8 *in, u8 *out, size_t len);86//87// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).88SYM_FUNC_START(aes_ecb_encrypt_zvkned)89aes_ecb_crypt 190SYM_FUNC_END(aes_ecb_encrypt_zvkned)9192// Same prototype and calling convention as the encryption function93SYM_FUNC_START(aes_ecb_decrypt_zvkned)94aes_ecb_crypt 095SYM_FUNC_END(aes_ecb_decrypt_zvkned)9697.macro aes_cbc_encrypt keylen98vle32.v v16, (IVP) // Load IV991:100vle32.v v17, (INP) // Load plaintext block101vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block102aes_encrypt v16, \keylen // Encrypt103vse32.v v16, (OUTP) // Store ciphertext block104addi INP, INP, 16105addi OUTP, OUTP, 16106addi LEN, LEN, -16107bnez LEN, 1b108109vse32.v v16, (IVP) // Store next IV110ret111.endm112113.macro aes_cbc_decrypt keylen114srli LEN, LEN, 2 // Convert LEN from bytes to words115vle32.v v16, (IVP) // Load IV1161:117vsetvli t0, LEN, e32, m4, ta, ma118vle32.v v20, (INP) // Load ciphertext blocks119vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks120addi t1, t0, -4121vslidedown.vx v24, v20, t1 // Save last ciphertext block122aes_decrypt v20, \keylen // Decrypt the blocks123vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks124vse32.v v20, (OUTP) // Store plaintext blocks125vmv.v.v v16, v24 // Next "IV" is last ciphertext block126slli t1, t0, 2 // Words to bytes127add INP, INP, t1128add OUTP, OUTP, t1129sub LEN, LEN, t0130bnez LEN, 1b131132vsetivli zero, 4, e32, m1, ta, ma133vse32.v v16, (IVP) // Store next IV134ret135.endm136137// void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,138// const u8 *in, u8 *out, size_t len, u8 iv[16]);139//140// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).141SYM_FUNC_START(aes_cbc_encrypt_zvkned)142aes_begin KEYP, 128f, 192f143aes_cbc_encrypt 256144128:145aes_cbc_encrypt 128146192:147aes_cbc_encrypt 192148SYM_FUNC_END(aes_cbc_encrypt_zvkned)149150// Same prototype and calling convention as the encryption function151SYM_FUNC_START(aes_cbc_decrypt_zvkned)152aes_begin KEYP, 128f, 192f153aes_cbc_decrypt 256154128:155aes_cbc_decrypt 128156192:157aes_cbc_decrypt 192158SYM_FUNC_END(aes_cbc_decrypt_zvkned)159160.macro aes_cbc_cts_encrypt keylen161162// CBC-encrypt all blocks except the last. But don't store the163// second-to-last block to the output buffer yet, since it will be164// handled specially in the ciphertext stealing step. Exception: if the165// message is single-block, still encrypt the last (and only) block.166li t0, 16167j 2f1681:169vse32.v v16, (OUTP) // Store ciphertext block170addi OUTP, OUTP, 161712:172vle32.v v17, (INP) // Load plaintext block173vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block174aes_encrypt v16, \keylen // Encrypt175addi INP, INP, 16176addi LEN, LEN, -16177bgt LEN, t0, 1b // Repeat if more than one block remains178179// Special case: if the message is a single block, just do CBC.180beqz LEN, .Lcts_encrypt_done\@181182// Encrypt the last two blocks using ciphertext stealing as follows:183// C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n])184// C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN]185//186// C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th187// plaintext block. Block n, the last block, may be partial; its length188// is 1 <= LEN <= 16. If there are only 2 blocks, C[n-2] means the IV.189//190// v16 already contains Encrypt(P[n-1] ^ C[n-2]).191// INP points to P[n]. OUTP points to where C[n-1] should go.192// To support in-place encryption, load P[n] before storing C[n].193addi t0, OUTP, 16 // Get pointer to where C[n] should go194vsetvli zero, LEN, e8, m1, tu, ma195vle8.v v17, (INP) // Load P[n]196vse8.v v16, (t0) // Store C[n]197vxor.vv v16, v16, v17 // v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n]198vsetivli zero, 4, e32, m1, ta, ma199aes_encrypt v16, \keylen200.Lcts_encrypt_done\@:201vse32.v v16, (OUTP) // Store C[n-1] (or C[n] in single-block case)202ret203.endm204205#define LEN32 t4 // Length of remaining full blocks in 32-bit words206#define LEN_MOD16 t5 // Length of message in bytes mod 16207208.macro aes_cbc_cts_decrypt keylen209andi LEN32, LEN, ~15210srli LEN32, LEN32, 2211andi LEN_MOD16, LEN, 15212213// Save C[n-2] in v28 so that it's available later during the ciphertext214// stealing step. If there are fewer than three blocks, C[n-2] means215// the IV, otherwise it means the third-to-last ciphertext block.216vmv.v.v v28, v16 // IV217add t0, LEN, -33218bltz t0, .Lcts_decrypt_loop\@219andi t0, t0, ~15220add t0, t0, INP221vle32.v v28, (t0)222223// CBC-decrypt all full blocks. For the last full block, or the last 2224// full blocks if the message is block-aligned, this doesn't write the225// correct output blocks (unless the message is only a single block),226// because it XORs the wrong values with the raw AES plaintexts. But we227// fix this after this loop without redoing the AES decryptions. This228// approach allows more of the AES decryptions to be parallelized.229.Lcts_decrypt_loop\@:230vsetvli t0, LEN32, e32, m4, ta, ma231addi t1, t0, -4232vle32.v v20, (INP) // Load next set of ciphertext blocks233vmv.v.v v24, v16 // Get IV or last ciphertext block of prev set234vslideup.vi v24, v20, 4 // Setup prev ciphertext blocks235vslidedown.vx v16, v20, t1 // Save last ciphertext block of this set236aes_decrypt v20, \keylen // Decrypt this set of blocks237vxor.vv v24, v24, v20 // XOR prev ciphertext blocks with decrypted blocks238vse32.v v24, (OUTP) // Store this set of plaintext blocks239sub LEN32, LEN32, t0240slli t0, t0, 2 // Words to bytes241add INP, INP, t0242add OUTP, OUTP, t0243bnez LEN32, .Lcts_decrypt_loop\@244245vsetivli zero, 4, e32, m4, ta, ma246vslidedown.vx v20, v20, t1 // Extract raw plaintext of last full block247addi t0, OUTP, -16 // Get pointer to last full plaintext block248bnez LEN_MOD16, .Lcts_decrypt_non_block_aligned\@249250// Special case: if the message is a single block, just do CBC.251li t1, 16252beq LEN, t1, .Lcts_decrypt_done\@253254// Block-aligned message. Just fix up the last 2 blocks. We need:255//256// P[n-1] = Decrypt(C[n]) ^ C[n-2]257// P[n] = Decrypt(C[n-1]) ^ C[n]258//259// We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28.260// Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this261// is everything needed to fix the output without re-decrypting blocks.262addi t1, OUTP, -32 // Get pointer to where P[n-1] should go263vxor.vv v20, v20, v28 // Decrypt(C[n]) ^ C[n-2] == P[n-1]264vle32.v v24, (t1) // Decrypt(C[n-1]) ^ C[n-2]265vse32.v v20, (t1) // Store P[n-1]266vxor.vv v20, v24, v16 // Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2]267j .Lcts_decrypt_finish\@268269.Lcts_decrypt_non_block_aligned\@:270// Decrypt the last two blocks using ciphertext stealing as follows:271//272// P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2]273// P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16]274//275// We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28.276vmv.v.v v16, v20 // v16 = Decrypt(C[n-1])277vsetvli zero, LEN_MOD16, e8, m1, tu, ma278vle8.v v20, (INP) // v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16]279vxor.vv v16, v16, v20 // v16 = Decrypt(C[n-1]) ^ C[n]280vse8.v v16, (OUTP) // Store P[n]281vsetivli zero, 4, e32, m1, ta, ma282aes_decrypt v20, \keylen // v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16])283.Lcts_decrypt_finish\@:284vxor.vv v20, v20, v28 // XOR with C[n-2]285vse32.v v20, (t0) // Store last full plaintext block286.Lcts_decrypt_done\@:287ret288.endm289290.macro aes_cbc_cts_crypt keylen291vle32.v v16, (IVP) // Load IV292beqz a5, .Lcts_decrypt\@293aes_cbc_cts_encrypt \keylen294.Lcts_decrypt\@:295aes_cbc_cts_decrypt \keylen296.endm297298// void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key,299// const u8 *in, u8 *out, size_t len,300// const u8 iv[16], bool enc);301//302// Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS.303// This is the variant that unconditionally swaps the last two blocks.304SYM_FUNC_START(aes_cbc_cts_crypt_zvkned)305aes_begin KEYP, 128f, 192f306aes_cbc_cts_crypt 256307128:308aes_cbc_cts_crypt 128309192:310aes_cbc_cts_crypt 192311SYM_FUNC_END(aes_cbc_cts_crypt_zvkned)312313314