Path: blob/main/lib/libc/amd64/string/timingsafe_memcmp.S
39486 views
/*-1* Copyright (c) 2023 The FreeBSD Foundation2*3* This software was developed by Robert Clausecker <[email protected]>4* under sponsorship from the FreeBSD Foundation.5*6* Redistribution and use in source and binary forms, with or without7* modification, are permitted provided that the following conditions8* are met:9* 1. Redistributions of source code must retain the above copyright10* notice, this list of conditions and the following disclaimer.11* 2. Redistributions in binary form must reproduce the above copyright12* notice, this list of conditions and the following disclaimer in the13* documentation and/or other materials provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND16* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE17* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE18* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE19* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL20* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS21* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)22* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT23* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY24* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF25* SUCH DAMAGE26*/2728#include <machine/asm.h>2930#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */3132/* int timingsafe_memcmp(const void *rdi, const void *rsi, size_t rdx) */33ENTRY(timingsafe_memcmp)34cmp $16, %rdx # at least 17 bytes to process?35ja .Lgt163637cmp $8, %edx # at least 9 bytes to process?38ja .L09163940cmp $4, %edx # at least 5 bytes to process?41ja .L05084243cmp $2, %edx # at least 3 bytes to process?44ja .L03044546test %edx, %edx # buffer empty?47jnz .L01024849xor %eax, %eax # empty buffer always matches50ret5152.L0102: movzbl -1(%rdi, %rdx, 1), %eax # load 1--2 bytes from first buffer53movzbl -1(%rsi, %rdx, 1), %ecx54mov (%rdi), %ah # in big endian55mov (%rsi), %ch56sub %ecx, %eax57ret5859.L0304: movzwl -2(%rdi, %rdx, 1), %ecx60movzwl -2(%rsi, %rdx, 1), %edx61movzwl (%rdi), %eax62movzwl (%rsi), %esi63bswap %ecx # convert to big endian64bswap %edx # dito for edx, (e)ax, and (e)si65rol $8, %ax # ROLW is used here so the upper two66rol $8, %si # bytes stay clear, allowing us to67sub %edx, %ecx # save a SBB compared to .L050868sbb %esi, %eax69or %eax, %ecx # nonzero if not equal70setnz %al71ret7273.L0508: mov -4(%rdi, %rdx, 1), %ecx74mov -4(%rsi, %rdx, 1), %edx75mov (%rdi), %edi76mov (%rsi), %esi77bswap %ecx # compare in big endian78bswap %edx79bswap %edi80bswap %esi81sub %edx, %ecx82sbb %esi, %edi83sbb %eax, %eax # -1 if less, 0 if greater or equal84or %edi, %ecx # nonzero if not equal85setnz %al # negative if <, 0 if =, 1 if >86ret8788.L0916: mov -8(%rdi, %rdx, 1), %rcx89mov -8(%rsi, %rdx, 1), %rdx90mov (%rdi), %rdi91mov (%rsi), %rsi92bswap %rcx # compare in big endian93bswap %rdx94bswap %rdi95bswap %rsi96sub %rdx, %rcx97sbb %rsi, %rdi98sbb %eax, %eax # -1 if less, 0 if greater or equal99or %rdi, %rcx # nonzero if not equal100setnz %al # negative if <, 0 if =, 1 if >101ret102103/* compare 17+ bytes */104.Lgt16: mov (%rdi), %r8 # process first 16 bytes105mov (%rsi), %r9106mov $32, %ecx107cmp %r8, %r9 # mismatch in head?108cmove 8(%rdi), %r8 # if not, try second pair109cmove 8(%rsi), %r9110cmp %rdx, %rcx111jae .Ltail112113/* main loop processing 16 bytes per iteration */114ALIGN_TEXT1150: mov -16(%rdi, %rcx, 1), %r10116mov -16(%rsi, %rcx, 1), %r11117cmp %r10, %r11 # mismatch in first pair?118cmove -8(%rdi, %rcx, 1), %r10 # if not, try second pair119cmove -8(%rsi, %rcx, 1), %r11120cmp %r8, %r9 # was there a mismatch previously?121cmove %r10, %r8 # apply new pair if there was not122cmove %r11, %r9123add $16, %rcx124cmp %rdx, %rcx125jb 0b126127.Ltail: mov -8(%rdi, %rdx, 1), %r10128mov -8(%rsi, %rdx, 1), %r11129cmp %r8, %r9130cmove -16(%rdi, %rdx, 1), %r8131cmove -16(%rsi, %rdx, 1), %r9132bswap %r10 # compare in big endian133bswap %r11134bswap %r8135bswap %r9136sub %r11, %r10137sbb %r9, %r8138sbb %eax, %eax # -1 if less, 0 if greater or equal139or %r10, %r8 # nonzero if not equal140setnz %al # negative if <, 0 if =, 1 if >141ret142END(timingsafe_memcmp)143144.section .note.GNU-stack,"",%progbits145146147