Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Headers/amxmovrstransposeintrin.h
213766 views
1
/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------===
2
*
3
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
* See https://llvm.org/LICENSE.txt for license information.
5
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
*
7
* ===-----------------------------------------------------------------------===
8
*/
9
10
#ifndef __IMMINTRIN_H
11
#error \
12
"Never use <amxmovrstransposeintrin.h> directly; use <immintrin.h> instead."
13
#endif /* __IMMINTRIN_H */
14
15
#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H
16
#define __AMX_MOVRS_TRANSPOSEINTRIN_H
17
#ifdef __x86_64__
18
19
#define __DEFAULT_FN_ATTRS \
20
__attribute__((__always_inline__, __nodebug__, \
21
__target__("amx-transpose,amx-movrs")))
22
23
#define _tile_2rpntlvwz0rs(tdst, base, stride) \
24
__builtin_ia32_t2rpntlvwz0rs(tdst, base, stride)
25
#define _tile_2rpntlvwz0rst1(tdst, base, stride) \
26
__builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride)
27
#define _tile_2rpntlvwz1rs(tdst, base, stride) \
28
__builtin_ia32_t2rpntlvwz1rs(tdst, base, stride)
29
#define _tile_2rpntlvwz1rst1(tdst, base, stride) \
30
__builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride)
31
32
static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal(
33
unsigned short row, unsigned short col0, unsigned short col1,
34
_tile1024i *dst0, _tile1024i *dst1, const void *base,
35
__SIZE_TYPE__ stride) {
36
// Use __tile1024i_1024a* to escape the alignment check in
37
// clang/test/Headers/x86-intrinsics-headers-clean.cpp
38
__builtin_ia32_t2rpntlvwz0rs_internal(
39
row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
40
(__SIZE_TYPE__)(stride));
41
}
42
43
static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal(
44
unsigned short row, unsigned short col0, unsigned short col1,
45
_tile1024i *dst0, _tile1024i *dst1, const void *base,
46
__SIZE_TYPE__ stride) {
47
__builtin_ia32_t2rpntlvwz0rst1_internal(
48
row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
49
(__SIZE_TYPE__)(stride));
50
}
51
52
static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal(
53
unsigned short row, unsigned short col0, unsigned short col1,
54
_tile1024i *dst0, _tile1024i *dst1, const void *base,
55
__SIZE_TYPE__ stride) {
56
__builtin_ia32_t2rpntlvwz1rs_internal(
57
row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
58
(__SIZE_TYPE__)(stride));
59
}
60
61
static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal(
62
unsigned short row, unsigned short col0, unsigned short col1,
63
_tile1024i *dst0, _tile1024i *dst1, const void *base,
64
__SIZE_TYPE__ stride) {
65
__builtin_ia32_t2rpntlvwz1rst1_internal(
66
row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
67
(__SIZE_TYPE__)(stride));
68
}
69
70
/// Converts a pair of tiles from memory into VNNI format, and places the
71
/// results in a pair of destinations specified by dst. The pair of tiles
72
/// in memory is specified via a tsib; the second tile is after the first
73
/// one, separated by the same stride that separates each row.
74
/// The tile configuration for the destination tiles indicates the amount
75
/// of data to read from memory. The instruction will load a number of rows
76
/// that is equal to twice the number of rows in tmm1. The size of each row
77
/// is equal to the average width of the destination tiles. If the second
78
/// tile is configured with zero rows and columns, only the first tile will
79
/// be written.
80
/// Provides a hint to the implementation that the data will likely become
81
/// read shared in the near future and the data caching can be optimized.
82
///
83
/// \headerfile <immintrin.h>
84
///
85
/// This intrinsic corresponds to the <c> T2RPNTLVWZ0RS </c> instruction.
86
///
87
/// \param dst0
88
/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
89
/// \param dst1
90
/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
91
/// \param base
92
/// A pointer to base address.
93
/// \param stride
94
/// The stride between the rows' data to be loaded in memory.
95
__DEFAULT_FN_ATTRS
96
static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1,
97
const void *base, __SIZE_TYPE__ stride) {
98
_tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
99
&dst1->tile, base, stride);
100
}
101
102
/// Converts a pair of tiles from memory into VNNI format, and places the
103
/// results in a pair of destinations specified by dst. The pair of tiles
104
/// in memory is specified via a tsib; the second tile is after the first
105
/// one, separated by the same stride that separates each row.
106
/// The tile configuration for the destination tiles indicates the amount
107
/// of data to read from memory. The instruction will load a number of rows
108
/// that is equal to twice the number of rows in tmm1. The size of each row
109
/// is equal to the average width of the destination tiles. If the second
110
/// tile is configured with zero rows and columns, only the first tile will
111
/// be written.
112
///
113
/// \headerfile <immintrin.h>
114
///
115
/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1RS </c> instruction.
116
///
117
/// \param dst0
118
/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
119
/// \param dst1
120
/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
121
/// \param base
122
/// A pointer to base address.
123
/// \param stride
124
/// The stride between the rows' data to be loaded in memory.
125
__DEFAULT_FN_ATTRS
126
static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1,
127
const void *base, __SIZE_TYPE__ stride) {
128
_tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
129
&dst1->tile, base, stride);
130
}
131
132
/// Converts a pair of tiles from memory into VNNI format, and places the
133
/// results in a pair of destinations specified by dst. The pair of tiles
134
/// in memory is specified via a tsib; the second tile is after the first
135
/// one, separated by the same stride that separates each row.
136
/// The tile configuration for the destination tiles indicates the amount
137
/// of data to read from memory. The instruction will load a number of rows
138
/// that is equal to twice the number of rows in tmm1. The size of each row
139
/// is equal to the average width of the destination tiles. If the second
140
/// tile is configured with zero rows and columns, only the first tile will
141
/// be written. The last row will be not be read from memory but instead
142
/// filled with zeros.
143
/// Provides a hint to the implementation that the data will likely become
144
/// read shared in the near future and the data caching can be optimized.
145
///
146
/// \headerfile <immintrin.h>
147
///
148
/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
149
///
150
/// \param dst0
151
/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
152
/// \param dst1
153
/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
154
/// \param base
155
/// A pointer to base address.
156
/// \param stride
157
/// The stride between the rows' data to be loaded in memory.
158
__DEFAULT_FN_ATTRS
159
static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1,
160
const void *base, __SIZE_TYPE__ stride) {
161
_tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
162
&dst1->tile, base, stride);
163
}
164
165
/// Converts a pair of tiles from memory into VNNI format, and places the
166
/// results in a pair of destinations specified by dst. The pair of tiles
167
/// in memory is specified via a tsib; the second tile is after the first
168
/// one, separated by the same stride that separates each row.
169
/// The tile configuration for the destination tiles indicates the amount
170
/// of data to read from memory. The instruction will load a number of rows
171
/// that is equal to twice the number of rows in tmm1. The size of each row
172
/// is equal to the average width of the destination tiles. If the second
173
/// tile is configured with zero rows and columns, only the first tile will
174
/// be written. The last row will be not be read from memory but instead
175
/// filled with zeros.
176
/// Provides a hint to the implementation that the data will likely become
177
/// read shared in the near future and the data caching can be optimized.
178
///
179
/// \headerfile <immintrin.h>
180
///
181
/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1RS </c> instruction.
182
///
183
/// \param dst0
184
/// First tile of destination tile pair. Max size is 1024i*2 Bytes.
185
/// \param dst1
186
/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.
187
/// \param base
188
/// A pointer to base address.
189
/// \param stride
190
/// The stride between the rows' data to be loaded in memory.
191
__DEFAULT_FN_ATTRS
192
static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1,
193
const void *base, __SIZE_TYPE__ stride) {
194
_tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
195
&dst1->tile, base, stride);
196
}
197
198
#undef __DEFAULT_FN_ATTRS
199
#endif /* __x86_64__ */
200
#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */
201
202