/*1* Intel SHA Extensions optimized implementation of a SHA-256 update function2*3* This file is provided under a dual BSD/GPLv2 license. When using or4* redistributing this file, you may do so under either license.5*6* GPL LICENSE SUMMARY7*8* Copyright(c) 2015 Intel Corporation.9*10* This program is free software; you can redistribute it and/or modify11* it under the terms of version 2 of the GNU General Public License as12* published by the Free Software Foundation.13*14* This program is distributed in the hope that it will be useful, but15* WITHOUT ANY WARRANTY; without even the implied warranty of16* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU17* General Public License for more details.18*19* Contact Information:20* Sean Gulley <[email protected]>21* Tim Chen <[email protected]>22*23* BSD LICENSE24*25* Copyright(c) 2015 Intel Corporation.26*27* Redistribution and use in source and binary forms, with or without28* modification, are permitted provided that the following conditions29* are met:30*31* * Redistributions of source code must retain the above copyright32* notice, this list of conditions and the following disclaimer.33* * Redistributions in binary form must reproduce the above copyright34* notice, this list of conditions and the following disclaimer in35* the documentation and/or other materials provided with the36* distribution.37* * Neither the name of Intel Corporation nor the names of its38* contributors may be used to endorse or promote products derived39* from this software without specific prior written permission.40*41* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS42* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT43* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR44* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT45* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,46* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT47* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,48* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY49* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT50* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE51* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.52*53*/5455#include <linux/linkage.h>5657#define STATE_PTR %rdi /* 1st arg */58#define DATA_PTR %rsi /* 2nd arg */59#define NUM_BLKS %rdx /* 3rd arg */6061#define SHA256CONSTANTS %rax6263#define MSG %xmm0 /* sha256rnds2 implicit operand */64#define STATE0 %xmm165#define STATE1 %xmm266#define MSG0 %xmm367#define MSG1 %xmm468#define MSG2 %xmm569#define MSG3 %xmm670#define TMP %xmm77172#define SHUF_MASK %xmm87374#define ABEF_SAVE %xmm975#define CDGH_SAVE %xmm107677.macro do_4rounds i, m0, m1, m2, m378.if \i < 1679movdqu \i*4(DATA_PTR), \m080pshufb SHUF_MASK, \m081.endif82movdqa (\i-32)*4(SHA256CONSTANTS), MSG83paddd \m0, MSG84sha256rnds2 STATE0, STATE185.if \i >= 12 && \i < 6086movdqa \m0, TMP87palignr $4, \m3, TMP88paddd TMP, \m189sha256msg2 \m0, \m190.endif91punpckhqdq MSG, MSG92sha256rnds2 STATE1, STATE093.if \i >= 4 && \i < 5294sha256msg1 \m0, \m395.endif96.endm9798/*99* Intel SHA Extensions optimized implementation of a SHA-256 block function100*101* This function takes a pointer to the current SHA-256 state, a pointer to the102* input data, and the number of 64-byte blocks to process. Once all blocks103* have been processed, the state is updated with the new state. This function104* only processes complete blocks. State initialization, buffering of partial105* blocks, and digest finalization is expected to be handled elsewhere.106*107* void sha256_ni_transform(struct sha256_block_state *state,108* const u8 *data, size_t nblocks);109*/110.text111SYM_FUNC_START(sha256_ni_transform)112113shl $6, NUM_BLKS /* convert to bytes */114add DATA_PTR, NUM_BLKS /* pointer to end of data */115116/*117* load initial hash values118* Need to reorder these appropriately119* DCBA, HGFE -> ABEF, CDGH120*/121movdqu 0*16(STATE_PTR), STATE0 /* DCBA */122movdqu 1*16(STATE_PTR), STATE1 /* HGFE */123124movdqa STATE0, TMP125punpcklqdq STATE1, STATE0 /* FEBA */126punpckhqdq TMP, STATE1 /* DCHG */127pshufd $0x1B, STATE0, STATE0 /* ABEF */128pshufd $0xB1, STATE1, STATE1 /* CDGH */129130movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK131lea K256+32*4(%rip), SHA256CONSTANTS132133.Lloop0:134/* Save hash values for addition after rounds */135movdqa STATE0, ABEF_SAVE136movdqa STATE1, CDGH_SAVE137138.irp i, 0, 16, 32, 48139do_4rounds (\i + 0), MSG0, MSG1, MSG2, MSG3140do_4rounds (\i + 4), MSG1, MSG2, MSG3, MSG0141do_4rounds (\i + 8), MSG2, MSG3, MSG0, MSG1142do_4rounds (\i + 12), MSG3, MSG0, MSG1, MSG2143.endr144145/* Add current hash values with previously saved */146paddd ABEF_SAVE, STATE0147paddd CDGH_SAVE, STATE1148149/* Increment data pointer and loop if more to process */150add $64, DATA_PTR151cmp NUM_BLKS, DATA_PTR152jne .Lloop0153154/* Write hash values back in the correct order */155movdqa STATE0, TMP156punpcklqdq STATE1, STATE0 /* GHEF */157punpckhqdq TMP, STATE1 /* ABCD */158pshufd $0xB1, STATE0, STATE0 /* HGFE */159pshufd $0x1B, STATE1, STATE1 /* DCBA */160161movdqu STATE1, 0*16(STATE_PTR)162movdqu STATE0, 1*16(STATE_PTR)163164RET165SYM_FUNC_END(sha256_ni_transform)166167.section .rodata.cst256.K256, "aM", @progbits, 256168.align 64169K256:170.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5171.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5172.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3173.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174174.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc175.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da176.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7177.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967178.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13179.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85180.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3181.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070182.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5183.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3184.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208185.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2186187.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16188.align 16189PSHUFFLE_BYTE_FLIP_MASK:190.octa 0x0c0d0e0f08090a0b0405060700010203191192193