/* SPDX-License-Identifier: GPL-2.0 */1/* Copyright 2002 Andi Kleen, SuSE Labs */23#include <linux/export.h>4#include <linux/linkage.h>5#include <linux/cfi_types.h>6#include <asm/cpufeatures.h>7#include <asm/alternative.h>89.section .noinstr.text, "ax"1011/*12* ISO C memset - set a memory block to a byte value. This function uses fast13* string to get better performance than the original function. The code is14* simpler and shorter than the original function as well.15*16* rdi destination17* rsi value (char)18* rdx count (bytes)19*20* rax original destination21*22* The FSRS alternative should be done inline (avoiding the call and23* the disgusting return handling), but that would require some help24* from the compiler for better calling conventions.25*26* The 'rep stosb' itself is small enough to replace the call, but all27* the register moves blow up the code. And two of them are "needed"28* only for the return value that is the same as the source input,29* which the compiler could/should do much better anyway.30*/31SYM_TYPED_FUNC_START(__memset)32ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS3334movq %rdi,%r935movb %sil,%al36movq %rdx,%rcx37rep stosb38movq %r9,%rax39RET40SYM_FUNC_END(__memset)41EXPORT_SYMBOL(__memset)4243SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)44SYM_PIC_ALIAS(memset)45EXPORT_SYMBOL(memset)4647SYM_FUNC_START_LOCAL(memset_orig)48movq %rdi,%r104950/* expand byte value */51movzbl %sil,%ecx52movabs $0x0101010101010101,%rax53imulq %rcx,%rax5455/* align dst */56movl %edi,%r9d57andl $7,%r9d58jnz .Lbad_alignment59.Lafter_bad_alignment:6061movq %rdx,%rcx62shrq $6,%rcx63jz .Lhandle_tail6465.p2align 466.Lloop_64:67decq %rcx68movq %rax,(%rdi)69movq %rax,8(%rdi)70movq %rax,16(%rdi)71movq %rax,24(%rdi)72movq %rax,32(%rdi)73movq %rax,40(%rdi)74movq %rax,48(%rdi)75movq %rax,56(%rdi)76leaq 64(%rdi),%rdi77jnz .Lloop_647879/* Handle tail in loops. The loops should be faster than hard80to predict jump tables. */81.p2align 482.Lhandle_tail:83movl %edx,%ecx84andl $63&(~7),%ecx85jz .Lhandle_786shrl $3,%ecx87.p2align 488.Lloop_8:89decl %ecx90movq %rax,(%rdi)91leaq 8(%rdi),%rdi92jnz .Lloop_89394.Lhandle_7:95andl $7,%edx96jz .Lende97.p2align 498.Lloop_1:99decl %edx100movb %al,(%rdi)101leaq 1(%rdi),%rdi102jnz .Lloop_1103104.Lende:105movq %r10,%rax106RET107108.Lbad_alignment:109cmpq $7,%rdx110jbe .Lhandle_7111movq %rax,(%rdi) /* unaligned store */112movq $8,%r8113subq %r9,%r8114addq %r8,%rdi115subq %r8,%rdx116jmp .Lafter_bad_alignment117.Lfinal:118SYM_FUNC_END(memset_orig)119120121