CoCalc -- memcpy

GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/lib/memcpy_64.S
²⁶⁴⁴² views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/* Copyright 2002 Andi Kleen */
3

4
#include <linux/export.h>
5
#include <linux/linkage.h>
6
#include <linux/cfi_types.h>
7
#include <asm/errno.h>
8
#include <asm/cpufeatures.h>
9
#include <asm/alternative.h>
10

11
.section .noinstr.text, "ax"
12

13
/*
14
 * memcpy - Copy a memory block.
15
 *
16
 * Input:
17
 *  rdi destination
18
 *  rsi source
19
 *  rdx count
20
 *
21
 * Output:
22
 * rax original destination
23
 *
24
 * The FSRM alternative should be done inline (avoiding the call and
25
 * the disgusting return handling), but that would require some help
26
 * from the compiler for better calling conventions.
27
 *
28
 * The 'rep movsb' itself is small enough to replace the call, but the
29
 * two register moves blow up the code. And one of them is "needed"
30
 * only for the return value that is the same as the source input,
31
 * which the compiler could/should do much better anyway.
32
 */
33
SYM_TYPED_FUNC_START(__memcpy)
34
	ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
35

36
	movq %rdi, %rax
37
	movq %rdx, %rcx
38
	rep movsb
39
	RET
40
SYM_FUNC_END(__memcpy)
41
EXPORT_SYMBOL(__memcpy)
42

43
SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
44
SYM_PIC_ALIAS(memcpy)
45
EXPORT_SYMBOL(memcpy)
46

47
SYM_FUNC_START_LOCAL(memcpy_orig)
48
	movq %rdi, %rax
49

50
	cmpq $0x20, %rdx
51
	jb .Lhandle_tail
52

53
	/*
54
	 * We check whether memory false dependence could occur,
55
	 * then jump to corresponding copy mode.
56
	 */
57
	cmp  %dil, %sil
58
	jl .Lcopy_backward
59
	subq $0x20, %rdx
60
.Lcopy_forward_loop:
61
	subq $0x20,	%rdx
62

63
	/*
64
	 * Move in blocks of 4x8 bytes:
65
	 */
66
	movq 0*8(%rsi),	%r8
67
	movq 1*8(%rsi),	%r9
68
	movq 2*8(%rsi),	%r10
69
	movq 3*8(%rsi),	%r11
70
	leaq 4*8(%rsi),	%rsi
71

72
	movq %r8,	0*8(%rdi)
73
	movq %r9,	1*8(%rdi)
74
	movq %r10,	2*8(%rdi)
75
	movq %r11,	3*8(%rdi)
76
	leaq 4*8(%rdi),	%rdi
77
	jae  .Lcopy_forward_loop
78
	addl $0x20,	%edx
79
	jmp  .Lhandle_tail
80

81
.Lcopy_backward:
82
	/*
83
	 * Calculate copy position to tail.
84
	 */
85
	addq %rdx,	%rsi
86
	addq %rdx,	%rdi
87
	subq $0x20,	%rdx
88
	/*
89
	 * At most 3 ALU operations in one cycle,
90
	 * so append NOPS in the same 16 bytes trunk.
91
	 */
92
	.p2align 4
93
.Lcopy_backward_loop:
94
	subq $0x20,	%rdx
95
	movq -1*8(%rsi),	%r8
96
	movq -2*8(%rsi),	%r9
97
	movq -3*8(%rsi),	%r10
98
	movq -4*8(%rsi),	%r11
99
	leaq -4*8(%rsi),	%rsi
100
	movq %r8,		-1*8(%rdi)
101
	movq %r9,		-2*8(%rdi)
102
	movq %r10,		-3*8(%rdi)
103
	movq %r11,		-4*8(%rdi)
104
	leaq -4*8(%rdi),	%rdi
105
	jae  .Lcopy_backward_loop
106

107
	/*
108
	 * Calculate copy position to head.
109
	 */
110
	addl $0x20,	%edx
111
	subq %rdx,	%rsi
112
	subq %rdx,	%rdi
113
.Lhandle_tail:
114
	cmpl $16,	%edx
115
	jb   .Lless_16bytes
116

117
	/*
118
	 * Move data from 16 bytes to 31 bytes.
119
	 */
120
	movq 0*8(%rsi), %r8
121
	movq 1*8(%rsi),	%r9
122
	movq -2*8(%rsi, %rdx),	%r10
123
	movq -1*8(%rsi, %rdx),	%r11
124
	movq %r8,	0*8(%rdi)
125
	movq %r9,	1*8(%rdi)
126
	movq %r10,	-2*8(%rdi, %rdx)
127
	movq %r11,	-1*8(%rdi, %rdx)
128
	RET
129
	.p2align 4
130
.Lless_16bytes:
131
	cmpl $8,	%edx
132
	jb   .Lless_8bytes
133
	/*
134
	 * Move data from 8 bytes to 15 bytes.
135
	 */
136
	movq 0*8(%rsi),	%r8
137
	movq -1*8(%rsi, %rdx),	%r9
138
	movq %r8,	0*8(%rdi)
139
	movq %r9,	-1*8(%rdi, %rdx)
140
	RET
141
	.p2align 4
142
.Lless_8bytes:
143
	cmpl $4,	%edx
144
	jb   .Lless_3bytes
145

146
	/*
147
	 * Move data from 4 bytes to 7 bytes.
148
	 */
149
	movl (%rsi), %ecx
150
	movl -4(%rsi, %rdx), %r8d
151
	movl %ecx, (%rdi)
152
	movl %r8d, -4(%rdi, %rdx)
153
	RET
154
	.p2align 4
155
.Lless_3bytes:
156
	subl $1, %edx
157
	jb .Lend
158
	/*
159
	 * Move data from 1 bytes to 3 bytes.
160
	 */
161
	movzbl (%rsi), %ecx
162
	jz .Lstore_1byte
163
	movzbq 1(%rsi), %r8
164
	movzbq (%rsi, %rdx), %r9
165
	movb %r8b, 1(%rdi)
166
	movb %r9b, (%rdi, %rdx)
167
.Lstore_1byte:
168
	movb %cl, (%rdi)
169

170
.Lend:
171
	RET
172
SYM_FUNC_END(memcpy_orig)
173

174

175
Product

Resources

Company