Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/lib/memset_64.S
10817 views
1
/* Copyright 2002 Andi Kleen, SuSE Labs */
2
3
#include <linux/linkage.h>
4
#include <asm/dwarf2.h>
5
#include <asm/cpufeature.h>
6
#include <asm/alternative-asm.h>
7
8
/*
9
* ISO C memset - set a memory block to a byte value. This function uses fast
10
* string to get better performance than the original function. The code is
11
* simpler and shorter than the orignal function as well.
12
*
13
* rdi destination
14
* rsi value (char)
15
* rdx count (bytes)
16
*
17
* rax original destination
18
*/
19
.section .altinstr_replacement, "ax", @progbits
20
.Lmemset_c:
21
movq %rdi,%r9
22
movl %edx,%r8d
23
andl $7,%r8d
24
movl %edx,%ecx
25
shrl $3,%ecx
26
/* expand byte value */
27
movzbl %sil,%esi
28
movabs $0x0101010101010101,%rax
29
mulq %rsi /* with rax, clobbers rdx */
30
rep stosq
31
movl %r8d,%ecx
32
rep stosb
33
movq %r9,%rax
34
ret
35
.Lmemset_e:
36
.previous
37
38
/*
39
* ISO C memset - set a memory block to a byte value. This function uses
40
* enhanced rep stosb to override the fast string function.
41
* The code is simpler and shorter than the fast string function as well.
42
*
43
* rdi destination
44
* rsi value (char)
45
* rdx count (bytes)
46
*
47
* rax original destination
48
*/
49
.section .altinstr_replacement, "ax", @progbits
50
.Lmemset_c_e:
51
movq %rdi,%r9
52
movb %sil,%al
53
movl %edx,%ecx
54
rep stosb
55
movq %r9,%rax
56
ret
57
.Lmemset_e_e:
58
.previous
59
60
ENTRY(memset)
61
ENTRY(__memset)
62
CFI_STARTPROC
63
movq %rdi,%r10
64
movq %rdx,%r11
65
66
/* expand byte value */
67
movzbl %sil,%ecx
68
movabs $0x0101010101010101,%rax
69
mul %rcx /* with rax, clobbers rdx */
70
71
/* align dst */
72
movl %edi,%r9d
73
andl $7,%r9d
74
jnz .Lbad_alignment
75
CFI_REMEMBER_STATE
76
.Lafter_bad_alignment:
77
78
movl %r11d,%ecx
79
shrl $6,%ecx
80
jz .Lhandle_tail
81
82
.p2align 4
83
.Lloop_64:
84
decl %ecx
85
movq %rax,(%rdi)
86
movq %rax,8(%rdi)
87
movq %rax,16(%rdi)
88
movq %rax,24(%rdi)
89
movq %rax,32(%rdi)
90
movq %rax,40(%rdi)
91
movq %rax,48(%rdi)
92
movq %rax,56(%rdi)
93
leaq 64(%rdi),%rdi
94
jnz .Lloop_64
95
96
/* Handle tail in loops. The loops should be faster than hard
97
to predict jump tables. */
98
.p2align 4
99
.Lhandle_tail:
100
movl %r11d,%ecx
101
andl $63&(~7),%ecx
102
jz .Lhandle_7
103
shrl $3,%ecx
104
.p2align 4
105
.Lloop_8:
106
decl %ecx
107
movq %rax,(%rdi)
108
leaq 8(%rdi),%rdi
109
jnz .Lloop_8
110
111
.Lhandle_7:
112
movl %r11d,%ecx
113
andl $7,%ecx
114
jz .Lende
115
.p2align 4
116
.Lloop_1:
117
decl %ecx
118
movb %al,(%rdi)
119
leaq 1(%rdi),%rdi
120
jnz .Lloop_1
121
122
.Lende:
123
movq %r10,%rax
124
ret
125
126
CFI_RESTORE_STATE
127
.Lbad_alignment:
128
cmpq $7,%r11
129
jbe .Lhandle_7
130
movq %rax,(%rdi) /* unaligned store */
131
movq $8,%r8
132
subq %r9,%r8
133
addq %r8,%rdi
134
subq %r8,%r11
135
jmp .Lafter_bad_alignment
136
.Lfinal:
137
CFI_ENDPROC
138
ENDPROC(memset)
139
ENDPROC(__memset)
140
141
/* Some CPUs support enhanced REP MOVSB/STOSB feature.
142
* It is recommended to use this when possible.
143
*
144
* If enhanced REP MOVSB/STOSB feature is not available, use fast string
145
* instructions.
146
*
147
* Otherwise, use original memset function.
148
*
149
* In .altinstructions section, ERMS feature is placed after REG_GOOD
150
* feature to implement the right patch order.
151
*/
152
.section .altinstructions,"a"
153
altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
154
.Lfinal-memset,.Lmemset_e-.Lmemset_c
155
altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
156
.Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
157
.previous
158
159