Path: blob/main/contrib/arm-optimized-routines/string/aarch64/strcmp.S
39486 views
/*1* strcmp - compare two strings2*3* Copyright (c) 2012-2022, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/678/* Assumptions:9*10* ARMv8-a, AArch64.11* MTE compatible.12*/1314#include "asmdefs.h"1516#define REP8_01 0x010101010101010117#define REP8_7f 0x7f7f7f7f7f7f7f7f1819#define src1 x020#define src2 x121#define result x02223#define data1 x224#define data1w w225#define data2 x326#define data2w w327#define has_nul x428#define diff x529#define off1 x530#define syndrome x631#define tmp x632#define data3 x733#define zeroones x834#define shift x935#define off2 x103637/* On big-endian early bytes are at MSB and on little-endian LSB.38LS_FW means shifting towards early bytes. */39#ifdef __AARCH64EB__40# define LS_FW lsl41#else42# define LS_FW lsr43#endif4445/* NUL detection works on the principle that (X - 1) & (~X) & 0x8046(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and47can be done in parallel across the entire word.48Since carry propagation makes 0x1 bytes before a NUL byte appear49NUL too in big-endian, byte-reverse the data before the NUL check. */505152ENTRY (__strcmp_aarch64)53sub off2, src2, src154mov zeroones, REP8_0155and tmp, src1, 756tst off2, 757b.ne L(misaligned8)58cbnz tmp, L(mutual_align)5960.p2align 46162L(loop_aligned):63ldr data2, [src1, off2]64ldr data1, [src1], 865L(start_realigned):66#ifdef __AARCH64EB__67rev tmp, data168sub has_nul, tmp, zeroones69orr tmp, tmp, REP8_7f70#else71sub has_nul, data1, zeroones72orr tmp, data1, REP8_7f73#endif74bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */75ccmp data1, data2, 0, eq76b.eq L(loop_aligned)77#ifdef __AARCH64EB__78rev has_nul, has_nul79#endif80eor diff, data1, data281orr syndrome, diff, has_nul82L(end):83#ifndef __AARCH64EB__84rev syndrome, syndrome85rev data1, data186rev data2, data287#endif88clz shift, syndrome89/* The most-significant-non-zero bit of the syndrome marks either the90first bit that is different, or the top bit of the first zero byte.91Shifting left now will bring the critical information into the92top bits. */93lsl data1, data1, shift94lsl data2, data2, shift95/* But we need to zero-extend (char is unsigned) the value and then96perform a signed 32-bit subtraction. */97lsr data1, data1, 5698sub result, data1, data2, lsr 5699ret100101.p2align 4102103L(mutual_align):104/* Sources are mutually aligned, but are not currently at an105alignment boundary. Round down the addresses and then mask off106the bytes that precede the start point. */107bic src1, src1, 7108ldr data2, [src1, off2]109ldr data1, [src1], 8110neg shift, src2, lsl 3 /* Bits to alignment -64. */111mov tmp, -1112LS_FW tmp, tmp, shift113orr data1, data1, tmp114orr data2, data2, tmp115b L(start_realigned)116117L(misaligned8):118/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always119checking to make sure that we don't access beyond the end of SRC2. */120cbz tmp, L(src1_aligned)121L(do_misaligned):122ldrb data1w, [src1], 1123ldrb data2w, [src2], 1124cmp data1w, 0125ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */126b.ne L(done)127tst src1, 7128b.ne L(do_misaligned)129130L(src1_aligned):131neg shift, src2, lsl 3132bic src2, src2, 7133ldr data3, [src2], 8134#ifdef __AARCH64EB__135rev data3, data3136#endif137lsr tmp, zeroones, shift138orr data3, data3, tmp139sub has_nul, data3, zeroones140orr tmp, data3, REP8_7f141bics has_nul, has_nul, tmp142b.ne L(tail)143144sub off1, src2, src1145146.p2align 4147148L(loop_unaligned):149ldr data3, [src1, off1]150ldr data2, [src1, off2]151#ifdef __AARCH64EB__152rev data3, data3153#endif154sub has_nul, data3, zeroones155orr tmp, data3, REP8_7f156ldr data1, [src1], 8157bics has_nul, has_nul, tmp158ccmp data1, data2, 0, eq159b.eq L(loop_unaligned)160161lsl tmp, has_nul, shift162#ifdef __AARCH64EB__163rev tmp, tmp164#endif165eor diff, data1, data2166orr syndrome, diff, tmp167cbnz syndrome, L(end)168L(tail):169ldr data1, [src1]170neg shift, shift171lsr data2, data3, shift172lsr has_nul, has_nul, shift173#ifdef __AARCH64EB__174rev data2, data2175rev has_nul, has_nul176#endif177eor diff, data1, data2178orr syndrome, diff, has_nul179b L(end)180181L(done):182sub result, data1, data2183ret184185END (__strcmp_aarch64)186187188189