/* SPDX-License-Identifier: GPL-2.0-only */1/*2* Copyright (c) 2013-2021, Arm Limited.3*4* Adapted from the original at:5* https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S6*/78#include <linux/linkage.h>9#include <asm/assembler.h>1011/* Assumptions:12*13* ARMv8-a, AArch64, unaligned accesses.14*/1516#define L(label) .L ## label1718/* Parameters and result. */19#define src1 x020#define src2 x121#define limit x222#define result w02324/* Internal variables. */25#define data1 x326#define data1w w327#define data1h x428#define data2 x529#define data2w w530#define data2h x631#define tmp1 x732#define tmp2 x83334SYM_FUNC_START(__pi_memcmp)35subs limit, limit, 836b.lo L(less8)3738ldr data1, [src1], 839ldr data2, [src2], 840cmp data1, data241b.ne L(return)4243subs limit, limit, 844b.gt L(more16)4546ldr data1, [src1, limit]47ldr data2, [src2, limit]48b L(return)4950L(more16):51ldr data1, [src1], 852ldr data2, [src2], 853cmp data1, data254bne L(return)5556/* Jump directly to comparing the last 16 bytes for 32 byte (or less)57strings. */58subs limit, limit, 1659b.ls L(last_bytes)6061/* We overlap loads between 0-32 bytes at either side of SRC1 when we62try to align, so limit it only to strings larger than 128 bytes. */63cmp limit, 9664b.ls L(loop16)6566/* Align src1 and adjust src2 with bytes not yet done. */67and tmp1, src1, 1568add limit, limit, tmp169sub src1, src1, tmp170sub src2, src2, tmp17172/* Loop performing 16 bytes per iteration using aligned src1.73Limit is pre-decremented by 16 and must be larger than zero.74Exit if <= 16 bytes left to do or if the data is not equal. */75.p2align 476L(loop16):77ldp data1, data1h, [src1], 1678ldp data2, data2h, [src2], 1679subs limit, limit, 1680ccmp data1, data2, 0, hi81ccmp data1h, data2h, 0, eq82b.eq L(loop16)8384cmp data1, data285bne L(return)86mov data1, data1h87mov data2, data2h88cmp data1, data289bne L(return)9091/* Compare last 1-16 bytes using unaligned access. */92L(last_bytes):93add src1, src1, limit94add src2, src2, limit95ldp data1, data1h, [src1]96ldp data2, data2h, [src2]97cmp data1, data298bne L(return)99mov data1, data1h100mov data2, data2h101cmp data1, data2102103/* Compare data bytes and set return value to 0, -1 or 1. */104L(return):105#ifndef __AARCH64EB__106rev data1, data1107rev data2, data2108#endif109cmp data1, data2110L(ret_eq):111cset result, ne112cneg result, result, lo113ret114115.p2align 4116/* Compare up to 8 bytes. Limit is [-8..-1]. */117L(less8):118adds limit, limit, 4119b.lo L(less4)120ldr data1w, [src1], 4121ldr data2w, [src2], 4122cmp data1w, data2w123b.ne L(return)124sub limit, limit, 4125L(less4):126adds limit, limit, 4127beq L(ret_eq)128L(byte_loop):129ldrb data1w, [src1], 1130ldrb data2w, [src2], 1131subs limit, limit, 1132ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */133b.eq L(byte_loop)134sub result, data1w, data2w135ret136SYM_FUNC_END(__pi_memcmp)137SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)138EXPORT_SYMBOL_NOKASAN(memcmp)139140141