/* SPDX-License-Identifier: GPL-2.0-only */1/*2* Copyright (c) 2012-2022, Arm Limited.3*4* Adapted from the original at:5* https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S6*/78#include <linux/linkage.h>9#include <asm/assembler.h>1011/* Assumptions:12*13* ARMv8-a, AArch64.14* MTE compatible.15*/1617#define L(label) .L ## label1819#define REP8_01 0x010101010101010120#define REP8_7f 0x7f7f7f7f7f7f7f7f2122#define src1 x023#define src2 x124#define result x02526#define data1 x227#define data1w w228#define data2 x329#define data2w w330#define has_nul x431#define diff x532#define off1 x533#define syndrome x634#define tmp x635#define data3 x736#define zeroones x837#define shift x938#define off2 x103940/* On big-endian early bytes are at MSB and on little-endian LSB.41LS_FW means shifting towards early bytes. */42#ifdef __AARCH64EB__43# define LS_FW lsl44#else45# define LS_FW lsr46#endif4748/* NUL detection works on the principle that (X - 1) & (~X) & 0x8049(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and50can be done in parallel across the entire word.51Since carry propagation makes 0x1 bytes before a NUL byte appear52NUL too in big-endian, byte-reverse the data before the NUL check. */535455SYM_FUNC_START(__pi_strcmp)56sub off2, src2, src157mov zeroones, REP8_0158and tmp, src1, 759tst off2, 760b.ne L(misaligned8)61cbnz tmp, L(mutual_align)6263.p2align 46465L(loop_aligned):66ldr data2, [src1, off2]67ldr data1, [src1], 868L(start_realigned):69#ifdef __AARCH64EB__70rev tmp, data171sub has_nul, tmp, zeroones72orr tmp, tmp, REP8_7f73#else74sub has_nul, data1, zeroones75orr tmp, data1, REP8_7f76#endif77bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */78ccmp data1, data2, 0, eq79b.eq L(loop_aligned)80#ifdef __AARCH64EB__81rev has_nul, has_nul82#endif83eor diff, data1, data284orr syndrome, diff, has_nul85L(end):86#ifndef __AARCH64EB__87rev syndrome, syndrome88rev data1, data189rev data2, data290#endif91clz shift, syndrome92/* The most-significant-non-zero bit of the syndrome marks either the93first bit that is different, or the top bit of the first zero byte.94Shifting left now will bring the critical information into the95top bits. */96lsl data1, data1, shift97lsl data2, data2, shift98/* But we need to zero-extend (char is unsigned) the value and then99perform a signed 32-bit subtraction. */100lsr data1, data1, 56101sub result, data1, data2, lsr 56102ret103104.p2align 4105106L(mutual_align):107/* Sources are mutually aligned, but are not currently at an108alignment boundary. Round down the addresses and then mask off109the bytes that precede the start point. */110bic src1, src1, 7111ldr data2, [src1, off2]112ldr data1, [src1], 8113neg shift, src2, lsl 3 /* Bits to alignment -64. */114mov tmp, -1115LS_FW tmp, tmp, shift116orr data1, data1, tmp117orr data2, data2, tmp118b L(start_realigned)119120L(misaligned8):121/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always122checking to make sure that we don't access beyond the end of SRC2. */123cbz tmp, L(src1_aligned)124L(do_misaligned):125ldrb data1w, [src1], 1126ldrb data2w, [src2], 1127cmp data1w, 0128ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */129b.ne L(done)130tst src1, 7131b.ne L(do_misaligned)132133L(src1_aligned):134neg shift, src2, lsl 3135bic src2, src2, 7136ldr data3, [src2], 8137#ifdef __AARCH64EB__138rev data3, data3139#endif140lsr tmp, zeroones, shift141orr data3, data3, tmp142sub has_nul, data3, zeroones143orr tmp, data3, REP8_7f144bics has_nul, has_nul, tmp145b.ne L(tail)146147sub off1, src2, src1148149.p2align 4150151L(loop_unaligned):152ldr data3, [src1, off1]153ldr data2, [src1, off2]154#ifdef __AARCH64EB__155rev data3, data3156#endif157sub has_nul, data3, zeroones158orr tmp, data3, REP8_7f159ldr data1, [src1], 8160bics has_nul, has_nul, tmp161ccmp data1, data2, 0, eq162b.eq L(loop_unaligned)163164lsl tmp, has_nul, shift165#ifdef __AARCH64EB__166rev tmp, tmp167#endif168eor diff, data1, data2169orr syndrome, diff, tmp170cbnz syndrome, L(end)171L(tail):172ldr data1, [src1]173neg shift, shift174lsr data2, data3, shift175lsr has_nul, has_nul, shift176#ifdef __AARCH64EB__177rev data2, data2178rev has_nul, has_nul179#endif180eor diff, data1, data2181orr syndrome, diff, has_nul182b L(end)183184L(done):185sub result, data1, data2186ret187SYM_FUNC_END(__pi_strcmp)188SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)189EXPORT_SYMBOL_NOKASAN(strcmp)190191192