/* SPDX-License-Identifier: GPL-2.0-only */1/*2* Copyright (C) 2013 ARM Ltd.3* Copyright (C) 2013 Linaro.4*5* This code is based on glibc cortex strings work originally authored by Linaro6* be found @7*8* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/9* files/head:/src/aarch64/10*/1112#include <linux/linkage.h>13#include <asm/assembler.h>1415/*16* determine the length of a fixed-size string17*18* Parameters:19* x0 - const string pointer20* x1 - maximal string length21* Returns:22* x0 - the return length of specific string23*/2425/* Arguments and results. */26srcin .req x027len .req x028limit .req x12930/* Locals and temporaries. */31src .req x232data1 .req x333data2 .req x434data2a .req x535has_nul1 .req x636has_nul2 .req x737tmp1 .req x838tmp2 .req x939tmp3 .req x1040tmp4 .req x1141zeroones .req x1242pos .req x1343limit_wd .req x144445#define REP8_01 0x010101010101010146#define REP8_7f 0x7f7f7f7f7f7f7f7f47#define REP8_80 0x80808080808080804849SYM_FUNC_START(__pi_strnlen)50cbz limit, .Lhit_limit51mov zeroones, #REP8_0152bic src, srcin, #1553ands tmp1, srcin, #1554b.ne .Lmisaligned55/* Calculate the number of full and partial words -1. */56sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */57lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */5859/*60* NUL detection works on the principle that (X - 1) & (~X) & 0x8061* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and62* can be done in parallel across the entire word.63*/64/*65* The inner loop deals with two Dwords at a time. This has a66* slightly higher start-up cost, but we should win quite quickly,67* especially on cores with a high number of issue slots per68* cycle, as we get much better parallelism out of the operations.69*/70.Lloop:71ldp data1, data2, [src], #1672.Lrealigned:73sub tmp1, data1, zeroones74orr tmp2, data1, #REP8_7f75sub tmp3, data2, zeroones76orr tmp4, data2, #REP8_7f77bic has_nul1, tmp1, tmp278bic has_nul2, tmp3, tmp479subs limit_wd, limit_wd, #180orr tmp1, has_nul1, has_nul281ccmp tmp1, #0, #0, pl /* NZCV = 0000 */82b.eq .Lloop8384cbz tmp1, .Lhit_limit /* No null in final Qword. */8586/*87* We know there's a null in the final Qword. The easiest thing88* to do now is work out the length of the string and return89* MIN (len, limit).90*/91sub len, src, srcin92cbz has_nul1, .Lnul_in_data293CPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/9495sub len, len, #896mov has_nul2, has_nul197.Lnul_in_data2:98/*99* For big-endian, carry propagation (if the final byte in the100* string is 0x01) means we cannot use has_nul directly. The101* easiest way to get the correct byte is to byte-swap the data102* and calculate the syndrome a second time.103*/104CPU_BE( rev data2, data2 )105CPU_BE( sub tmp1, data2, zeroones )106CPU_BE( orr tmp2, data2, #REP8_7f )107CPU_BE( bic has_nul2, tmp1, tmp2 )108109sub len, len, #8110rev has_nul2, has_nul2111clz pos, has_nul2112add len, len, pos, lsr #3 /* Bits to bytes. */113cmp len, limit114csel len, len, limit, ls /* Return the lower value. */115ret116117.Lmisaligned:118/*119* Deal with a partial first word.120* We're doing two things in parallel here;121* 1) Calculate the number of words (but avoiding overflow if122* limit is near ULONG_MAX) - to do this we need to work out123* limit + tmp1 - 1 as a 65-bit value before shifting it;124* 2) Load and mask the initial data words - we force the bytes125* before the ones we are interested in to 0xff - this ensures126* early bytes will not hit any zero detection.127*/128ldp data1, data2, [src], #16129130sub limit_wd, limit, #1131and tmp3, limit_wd, #15132lsr limit_wd, limit_wd, #4133134add tmp3, tmp3, tmp1135add limit_wd, limit_wd, tmp3, lsr #4136137neg tmp4, tmp1138lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */139140mov tmp2, #~0141/* Big-endian. Early bytes are at MSB. */142CPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */143/* Little-endian. Early bytes are at LSB. */144CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */145146cmp tmp1, #8147148orr data1, data1, tmp2149orr data2a, data2, tmp2150151csinv data1, data1, xzr, le152csel data2, data2, data2a, le153b .Lrealigned154155.Lhit_limit:156mov len, limit157ret158SYM_FUNC_END(__pi_strnlen)159160SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen)161EXPORT_SYMBOL_NOKASAN(strnlen)162163164