CoCalc -- memcpy

GitHub Repository: torvalds/linux
Path: blob/master/tools/arch/x86/lib/memcpy_64.S
²⁶²⁸⁵ views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/* Copyright 2002 Andi Kleen */
3

4
#include <linux/export.h>
5
#include <linux/linkage.h>
6
#include <asm/errno.h>
7
#include <asm/cpufeatures.h>
8
#include <asm/alternative.h>
9

10
.section .noinstr.text, "ax"
11

12
/*
13
 * memcpy - Copy a memory block.
14
 *
15
 * Input:
16
 *  rdi destination
17
 *  rsi source
18
 *  rdx count
19
 *
20
 * Output:
21
 * rax original destination
22
 *
23
 * The FSRM alternative should be done inline (avoiding the call and
24
 * the disgusting return handling), but that would require some help
25
 * from the compiler for better calling conventions.
26
 *
27
 * The 'rep movsb' itself is small enough to replace the call, but the
28
 * two register moves blow up the code. And one of them is "needed"
29
 * only for the return value that is the same as the source input,
30
 * which the compiler could/should do much better anyway.
31
 */
32
SYM_TYPED_FUNC_START(__memcpy)
33
	ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
34

35
	movq %rdi, %rax
36
	movq %rdx, %rcx
37
	rep movsb
38
	RET
39
SYM_FUNC_END(__memcpy)
40
EXPORT_SYMBOL(__memcpy)
41

42
SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
43
SYM_PIC_ALIAS(memcpy)
44
EXPORT_SYMBOL(memcpy)
45

46
SYM_FUNC_START_LOCAL(memcpy_orig)
47
	movq %rdi, %rax
48

49
	cmpq $0x20, %rdx
50
	jb .Lhandle_tail
51

52
	/*
53
	 * We check whether memory false dependence could occur,
54
	 * then jump to corresponding copy mode.
55
	 */
56
	cmp  %dil, %sil
57
	jl .Lcopy_backward
58
	subq $0x20, %rdx
59
.Lcopy_forward_loop:
60
	subq $0x20,	%rdx
61

62
	/*
63
	 * Move in blocks of 4x8 bytes:
64
	 */
65
	movq 0*8(%rsi),	%r8
66
	movq 1*8(%rsi),	%r9
67
	movq 2*8(%rsi),	%r10
68
	movq 3*8(%rsi),	%r11
69
	leaq 4*8(%rsi),	%rsi
70

71
	movq %r8,	0*8(%rdi)
72
	movq %r9,	1*8(%rdi)
73
	movq %r10,	2*8(%rdi)
74
	movq %r11,	3*8(%rdi)
75
	leaq 4*8(%rdi),	%rdi
76
	jae  .Lcopy_forward_loop
77
	addl $0x20,	%edx
78
	jmp  .Lhandle_tail
79

80
.Lcopy_backward:
81
	/*
82
	 * Calculate copy position to tail.
83
	 */
84
	addq %rdx,	%rsi
85
	addq %rdx,	%rdi
86
	subq $0x20,	%rdx
87
	/*
88
	 * At most 3 ALU operations in one cycle,
89
	 * so append NOPS in the same 16 bytes trunk.
90
	 */
91
	.p2align 4
92
.Lcopy_backward_loop:
93
	subq $0x20,	%rdx
94
	movq -1*8(%rsi),	%r8
95
	movq -2*8(%rsi),	%r9
96
	movq -3*8(%rsi),	%r10
97
	movq -4*8(%rsi),	%r11
98
	leaq -4*8(%rsi),	%rsi
99
	movq %r8,		-1*8(%rdi)
100
	movq %r9,		-2*8(%rdi)
101
	movq %r10,		-3*8(%rdi)
102
	movq %r11,		-4*8(%rdi)
103
	leaq -4*8(%rdi),	%rdi
104
	jae  .Lcopy_backward_loop
105

106
	/*
107
	 * Calculate copy position to head.
108
	 */
109
	addl $0x20,	%edx
110
	subq %rdx,	%rsi
111
	subq %rdx,	%rdi
112
.Lhandle_tail:
113
	cmpl $16,	%edx
114
	jb   .Lless_16bytes
115

116
	/*
117
	 * Move data from 16 bytes to 31 bytes.
118
	 */
119
	movq 0*8(%rsi), %r8
120
	movq 1*8(%rsi),	%r9
121
	movq -2*8(%rsi, %rdx),	%r10
122
	movq -1*8(%rsi, %rdx),	%r11
123
	movq %r8,	0*8(%rdi)
124
	movq %r9,	1*8(%rdi)
125
	movq %r10,	-2*8(%rdi, %rdx)
126
	movq %r11,	-1*8(%rdi, %rdx)
127
	RET
128
	.p2align 4
129
.Lless_16bytes:
130
	cmpl $8,	%edx
131
	jb   .Lless_8bytes
132
	/*
133
	 * Move data from 8 bytes to 15 bytes.
134
	 */
135
	movq 0*8(%rsi),	%r8
136
	movq -1*8(%rsi, %rdx),	%r9
137
	movq %r8,	0*8(%rdi)
138
	movq %r9,	-1*8(%rdi, %rdx)
139
	RET
140
	.p2align 4
141
.Lless_8bytes:
142
	cmpl $4,	%edx
143
	jb   .Lless_3bytes
144

145
	/*
146
	 * Move data from 4 bytes to 7 bytes.
147
	 */
148
	movl (%rsi), %ecx
149
	movl -4(%rsi, %rdx), %r8d
150
	movl %ecx, (%rdi)
151
	movl %r8d, -4(%rdi, %rdx)
152
	RET
153
	.p2align 4
154
.Lless_3bytes:
155
	subl $1, %edx
156
	jb .Lend
157
	/*
158
	 * Move data from 1 bytes to 3 bytes.
159
	 */
160
	movzbl (%rsi), %ecx
161
	jz .Lstore_1byte
162
	movzbq 1(%rsi), %r8
163
	movzbq (%rsi, %rdx), %r9
164
	movb %r8b, 1(%rdi)
165
	movb %r9b, (%rdi, %rdx)
166
.Lstore_1byte:
167
	movb %cl, (%rdi)
168

169
.Lend:
170
	RET
171
SYM_FUNC_END(memcpy_orig)
172

173

174
Product

Resources

Company