Path: blob/master/arch/loongarch/vdso/vgetrandom-chacha.S
26427 views
// SPDX-License-Identifier: GPL-2.01/*2* Copyright (C) 2024 Xi Ruoyao <[email protected]>. All Rights Reserved.3*/45#include <asm/asm.h>6#include <asm/regdef.h>7#include <linux/linkage.h>89.text1011.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s312\op \d0, \d0, \s013\op \d1, \d1, \s114\op \d2, \d2, \s215\op \d3, \d3, \s316.endm1718/*19* Very basic LoongArch implementation of ChaCha20. Produces a given positive20* number of blocks of output with a nonce of 0, taking an input key and21* 8-byte counter. Importantly does not spill to the stack. Its arguments22* are:23*24* a0: output bytes25* a1: 32-byte key input26* a2: 8-byte counter input/output27* a3: number of 64-byte blocks to write to output28*/29SYM_FUNC_START(__arch_chacha20_blocks_nostack)3031/* We don't need a frame pointer */32#define s9 fp3334#define output a035#define key a136#define counter a237#define nblocks a338#define i a439#define state0 s040#define state1 s141#define state2 s242#define state3 s343#define state4 s444#define state5 s545#define state6 s646#define state7 s747#define state8 s848#define state9 s949#define state10 a550#define state11 a651#define state12 a752#define state13 t053#define state14 t154#define state15 t255#define cnt_lo t356#define cnt_hi t457#define copy0 t558#define copy1 t659#define copy2 t760#define copy3 t86162/* Packs to be used with OP_4REG */63#define line0 state0, state1, state2, state364#define line1 state4, state5, state6, state765#define line2 state8, state9, state10, state1166#define line3 state12, state13, state14, state156768#define line1_perm state5, state6, state7, state469#define line2_perm state10, state11, state8, state970#define line3_perm state15, state12, state13, state147172#define copy copy0, copy1, copy2, copy37374#define _16 16, 16, 16, 1675#define _20 20, 20, 20, 2076#define _24 24, 24, 24, 2477#define _25 25, 25, 25, 257879/*80* The ABI requires s0-s9 saved, and sp aligned to 16-byte.81* This does not violate the stack-less requirement: no sensitive data82* is spilled onto the stack.83*/84PTR_ADDI sp, sp, (-SZREG * 10) & STACK_ALIGN85REG_S s0, sp, 086REG_S s1, sp, SZREG87REG_S s2, sp, SZREG * 288REG_S s3, sp, SZREG * 389REG_S s4, sp, SZREG * 490REG_S s5, sp, SZREG * 591REG_S s6, sp, SZREG * 692REG_S s7, sp, SZREG * 793REG_S s8, sp, SZREG * 894REG_S s9, sp, SZREG * 99596li.w copy0, 0x6170786597li.w copy1, 0x3320646e98li.w copy2, 0x79622d3299li.w copy3, 0x6b206574100101ld.w cnt_lo, counter, 0102ld.w cnt_hi, counter, 4103104.Lblock:105/* state[0,1,2,3] = "expand 32-byte k" */106move state0, copy0107move state1, copy1108move state2, copy2109move state3, copy3110111/* state[4,5,..,11] = key */112ld.w state4, key, 0113ld.w state5, key, 4114ld.w state6, key, 8115ld.w state7, key, 12116ld.w state8, key, 16117ld.w state9, key, 20118ld.w state10, key, 24119ld.w state11, key, 28120121/* state[12,13] = counter */122move state12, cnt_lo123move state13, cnt_hi124125/* state[14,15] = 0 */126move state14, zero127move state15, zero128129li.w i, 10130.Lpermute:131/* odd round */132OP_4REG add.w line0, line1133OP_4REG xor line3, line0134OP_4REG rotri.w line3, _16135136OP_4REG add.w line2, line3137OP_4REG xor line1, line2138OP_4REG rotri.w line1, _20139140OP_4REG add.w line0, line1141OP_4REG xor line3, line0142OP_4REG rotri.w line3, _24143144OP_4REG add.w line2, line3145OP_4REG xor line1, line2146OP_4REG rotri.w line1, _25147148/* even round */149OP_4REG add.w line0, line1_perm150OP_4REG xor line3_perm, line0151OP_4REG rotri.w line3_perm, _16152153OP_4REG add.w line2_perm, line3_perm154OP_4REG xor line1_perm, line2_perm155OP_4REG rotri.w line1_perm, _20156157OP_4REG add.w line0, line1_perm158OP_4REG xor line3_perm, line0159OP_4REG rotri.w line3_perm, _24160161OP_4REG add.w line2_perm, line3_perm162OP_4REG xor line1_perm, line2_perm163OP_4REG rotri.w line1_perm, _25164165addi.w i, i, -1166bnez i, .Lpermute167168/* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */169OP_4REG add.w line0, copy170st.w state0, output, 0171st.w state1, output, 4172st.w state2, output, 8173st.w state3, output, 12174175/* from now on state[0,1,2,3] are scratch registers */176177/* state[0,1,2,3] = lo32(key) */178ld.w state0, key, 0179ld.w state1, key, 4180ld.w state2, key, 8181ld.w state3, key, 12182183/* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */184OP_4REG add.w line1, line0185st.w state4, output, 16186st.w state5, output, 20187st.w state6, output, 24188st.w state7, output, 28189190/* state[0,1,2,3] = hi32(key) */191ld.w state0, key, 16192ld.w state1, key, 20193ld.w state2, key, 24194ld.w state3, key, 28195196/* output[8,9,10,11] = state[0,1,2,3] + state[8,9,10,11] */197OP_4REG add.w line2, line0198st.w state8, output, 32199st.w state9, output, 36200st.w state10, output, 40201st.w state11, output, 44202203/* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */204add.w state12, state12, cnt_lo205add.w state13, state13, cnt_hi206st.w state12, output, 48207st.w state13, output, 52208st.w state14, output, 56209st.w state15, output, 60210211/* ++counter */212addi.w cnt_lo, cnt_lo, 1213sltui state0, cnt_lo, 1214add.w cnt_hi, cnt_hi, state0215216/* output += 64 */217PTR_ADDI output, output, 64218/* --nblocks */219PTR_ADDI nblocks, nblocks, -1220bnez nblocks, .Lblock221222/* counter = [cnt_lo, cnt_hi] */223st.w cnt_lo, counter, 0224st.w cnt_hi, counter, 4225226/*227* Zero out the potentially sensitive regs, in case nothing uses these228* again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and229* state[0,...,9] are s0-s9 those we'll restore in the epilogue, so we230* only need to zero state[11,...,15].231*/232move state10, zero233move state11, zero234move state12, zero235move state13, zero236move state14, zero237move state15, zero238239REG_L s0, sp, 0240REG_L s1, sp, SZREG241REG_L s2, sp, SZREG * 2242REG_L s3, sp, SZREG * 3243REG_L s4, sp, SZREG * 4244REG_L s5, sp, SZREG * 5245REG_L s6, sp, SZREG * 6246REG_L s7, sp, SZREG * 7247REG_L s8, sp, SZREG * 8248REG_L s9, sp, SZREG * 9249PTR_ADDI sp, sp, -((-SZREG * 10) & STACK_ALIGN)250251jr ra252SYM_FUNC_END(__arch_chacha20_blocks_nostack)253254255