Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/sparc/lib/NG4memset.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/* NG4memset.S: Niagara-4 optimized memset/bzero.
3
*
4
* Copyright (C) 2012 David S. Miller ([email protected])
5
*/
6
7
#include <asm/asi.h>
8
9
.register %g2, #scratch
10
.register %g3, #scratch
11
12
.text
13
.align 32
14
.globl NG4memset
15
NG4memset:
16
andcc %o1, 0xff, %o4
17
be,pt %icc, 1f
18
mov %o2, %o1
19
sllx %o4, 8, %g1
20
or %g1, %o4, %o2
21
sllx %o2, 16, %g1
22
or %g1, %o2, %o2
23
sllx %o2, 32, %g1
24
ba,pt %icc, 1f
25
or %g1, %o2, %o4
26
.size NG4memset,.-NG4memset
27
28
.align 32
29
.globl NG4bzero
30
NG4bzero:
31
clr %o4
32
1: cmp %o1, 16
33
ble %icc, .Ltiny
34
mov %o0, %o3
35
sub %g0, %o0, %g1
36
and %g1, 0x7, %g1
37
brz,pt %g1, .Laligned8
38
sub %o1, %g1, %o1
39
1: stb %o4, [%o0 + 0x00]
40
subcc %g1, 1, %g1
41
bne,pt %icc, 1b
42
add %o0, 1, %o0
43
.Laligned8:
44
cmp %o1, 64 + (64 - 8)
45
ble .Lmedium
46
sub %g0, %o0, %g1
47
andcc %g1, (64 - 1), %g1
48
brz,pn %g1, .Laligned64
49
sub %o1, %g1, %o1
50
1: stx %o4, [%o0 + 0x00]
51
subcc %g1, 8, %g1
52
bne,pt %icc, 1b
53
add %o0, 0x8, %o0
54
.Laligned64:
55
andn %o1, 64 - 1, %g1
56
sub %o1, %g1, %o1
57
brnz,pn %o4, .Lnon_bzero_loop
58
mov 0x20, %g2
59
1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
60
subcc %g1, 0x40, %g1
61
stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
62
bne,pt %icc, 1b
63
add %o0, 0x40, %o0
64
.Lpostloop:
65
cmp %o1, 8
66
bl,pn %icc, .Ltiny
67
membar #StoreStore|#StoreLoad
68
.Lmedium:
69
andn %o1, 0x7, %g1
70
sub %o1, %g1, %o1
71
1: stx %o4, [%o0 + 0x00]
72
subcc %g1, 0x8, %g1
73
bne,pt %icc, 1b
74
add %o0, 0x08, %o0
75
andcc %o1, 0x4, %g1
76
be,pt %icc, .Ltiny
77
sub %o1, %g1, %o1
78
stw %o4, [%o0 + 0x00]
79
add %o0, 0x4, %o0
80
.Ltiny:
81
cmp %o1, 0
82
be,pn %icc, .Lexit
83
1: subcc %o1, 1, %o1
84
stb %o4, [%o0 + 0x00]
85
bne,pt %icc, 1b
86
add %o0, 1, %o0
87
.Lexit:
88
retl
89
mov %o3, %o0
90
.Lnon_bzero_loop:
91
mov 0x08, %g3
92
mov 0x28, %o5
93
1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
94
subcc %g1, 0x40, %g1
95
stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
96
stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
97
stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
98
add %o0, 0x10, %o0
99
stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
100
stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
101
stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
102
stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
103
bne,pt %icc, 1b
104
add %o0, 0x30, %o0
105
ba,a,pt %icc, .Lpostloop
106
nop
107
.size NG4bzero,.-NG4bzero
108
109