Path: blob/main/contrib/arm-optimized-routines/string/aarch64/memset.S
39491 views
/*1* memset - fill memory with a constant byte2*3* Copyright (c) 2012-2024, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67/* Assumptions:8*9* ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.10*11*/1213#include "asmdefs.h"1415#define dstin x016#define val x117#define valw w118#define count x219#define dst x320#define dstend x421#define zva_val x522#define off x323#define dstend2 x52425ENTRY (__memset_aarch64)26dup v0.16B, valw27cmp count, 1628b.lo L(set_small)2930add dstend, dstin, count31cmp count, 6432b.hs L(set_128)3334/* Set 16..63 bytes. */35mov off, 1636and off, off, count, lsr 137sub dstend2, dstend, off38str q0, [dstin]39str q0, [dstin, off]40str q0, [dstend2, -16]41str q0, [dstend, -16]42ret4344.p2align 445/* Set 0..15 bytes. */46L(set_small):47add dstend, dstin, count48cmp count, 449b.lo 2f50lsr off, count, 351sub dstend2, dstend, off, lsl 252str s0, [dstin]53str s0, [dstin, off, lsl 2]54str s0, [dstend2, -4]55str s0, [dstend, -4]56ret5758/* Set 0..3 bytes. */592: cbz count, 3f60lsr off, count, 161strb valw, [dstin]62strb valw, [dstin, off]63strb valw, [dstend, -1]643: ret6566.p2align 467L(set_128):68bic dst, dstin, 1569cmp count, 12870b.hi L(set_long)71stp q0, q0, [dstin]72stp q0, q0, [dstin, 32]73stp q0, q0, [dstend, -64]74stp q0, q0, [dstend, -32]75ret7677.p2align 478L(set_long):79str q0, [dstin]80str q0, [dst, 16]81tst valw, 25582b.ne L(no_zva)83#ifndef SKIP_ZVA_CHECK84mrs zva_val, dczid_el085and zva_val, zva_val, 3186cmp zva_val, 4 /* ZVA size is 64 bytes. */87b.ne L(no_zva)88#endif89stp q0, q0, [dst, 32]90bic dst, dstin, 6391sub count, dstend, dst /* Count is now 64 too large. */92sub count, count, 64 + 64 /* Adjust count and bias for loop. */9394/* Write last bytes before ZVA loop. */95stp q0, q0, [dstend, -64]96stp q0, q0, [dstend, -32]9798.p2align 499L(zva64_loop):100add dst, dst, 64101dc zva, dst102subs count, count, 64103b.hi L(zva64_loop)104ret105106.p2align 3107L(no_zva):108sub count, dstend, dst /* Count is 32 too large. */109sub count, count, 64 + 32 /* Adjust count and bias for loop. */110L(no_zva_loop):111stp q0, q0, [dst, 32]112stp q0, q0, [dst, 64]113add dst, dst, 64114subs count, count, 64115b.hi L(no_zva_loop)116stp q0, q0, [dstend, -64]117stp q0, q0, [dstend, -32]118ret119120END (__memset_aarch64)121122123