Path: blob/main/contrib/llvm-project/clang/lib/Headers/amxmovrstransposeintrin.h
213766 views
/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------===1*2* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3* See https://llvm.org/LICENSE.txt for license information.4* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5*6* ===-----------------------------------------------------------------------===7*/89#ifndef __IMMINTRIN_H10#error \11"Never use <amxmovrstransposeintrin.h> directly; use <immintrin.h> instead."12#endif /* __IMMINTRIN_H */1314#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H15#define __AMX_MOVRS_TRANSPOSEINTRIN_H16#ifdef __x86_64__1718#define __DEFAULT_FN_ATTRS \19__attribute__((__always_inline__, __nodebug__, \20__target__("amx-transpose,amx-movrs")))2122#define _tile_2rpntlvwz0rs(tdst, base, stride) \23__builtin_ia32_t2rpntlvwz0rs(tdst, base, stride)24#define _tile_2rpntlvwz0rst1(tdst, base, stride) \25__builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride)26#define _tile_2rpntlvwz1rs(tdst, base, stride) \27__builtin_ia32_t2rpntlvwz1rs(tdst, base, stride)28#define _tile_2rpntlvwz1rst1(tdst, base, stride) \29__builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride)3031static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal(32unsigned short row, unsigned short col0, unsigned short col1,33_tile1024i *dst0, _tile1024i *dst1, const void *base,34__SIZE_TYPE__ stride) {35// Use __tile1024i_1024a* to escape the alignment check in36// clang/test/Headers/x86-intrinsics-headers-clean.cpp37__builtin_ia32_t2rpntlvwz0rs_internal(38row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,39(__SIZE_TYPE__)(stride));40}4142static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal(43unsigned short row, unsigned short col0, unsigned short col1,44_tile1024i *dst0, _tile1024i *dst1, const void *base,45__SIZE_TYPE__ stride) {46__builtin_ia32_t2rpntlvwz0rst1_internal(47row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,48(__SIZE_TYPE__)(stride));49}5051static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal(52unsigned short row, unsigned short col0, unsigned short col1,53_tile1024i *dst0, _tile1024i *dst1, const void *base,54__SIZE_TYPE__ stride) {55__builtin_ia32_t2rpntlvwz1rs_internal(56row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,57(__SIZE_TYPE__)(stride));58}5960static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal(61unsigned short row, unsigned short col0, unsigned short col1,62_tile1024i *dst0, _tile1024i *dst1, const void *base,63__SIZE_TYPE__ stride) {64__builtin_ia32_t2rpntlvwz1rst1_internal(65row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,66(__SIZE_TYPE__)(stride));67}6869/// Converts a pair of tiles from memory into VNNI format, and places the70/// results in a pair of destinations specified by dst. The pair of tiles71/// in memory is specified via a tsib; the second tile is after the first72/// one, separated by the same stride that separates each row.73/// The tile configuration for the destination tiles indicates the amount74/// of data to read from memory. The instruction will load a number of rows75/// that is equal to twice the number of rows in tmm1. The size of each row76/// is equal to the average width of the destination tiles. If the second77/// tile is configured with zero rows and columns, only the first tile will78/// be written.79/// Provides a hint to the implementation that the data will likely become80/// read shared in the near future and the data caching can be optimized.81///82/// \headerfile <immintrin.h>83///84/// This intrinsic corresponds to the <c> T2RPNTLVWZ0RS </c> instruction.85///86/// \param dst087/// First tile of destination tile pair. Max size is 1024i*2 Bytes.88/// \param dst189/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.90/// \param base91/// A pointer to base address.92/// \param stride93/// The stride between the rows' data to be loaded in memory.94__DEFAULT_FN_ATTRS95static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1,96const void *base, __SIZE_TYPE__ stride) {97_tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,98&dst1->tile, base, stride);99}100101/// Converts a pair of tiles from memory into VNNI format, and places the102/// results in a pair of destinations specified by dst. The pair of tiles103/// in memory is specified via a tsib; the second tile is after the first104/// one, separated by the same stride that separates each row.105/// The tile configuration for the destination tiles indicates the amount106/// of data to read from memory. The instruction will load a number of rows107/// that is equal to twice the number of rows in tmm1. The size of each row108/// is equal to the average width of the destination tiles. If the second109/// tile is configured with zero rows and columns, only the first tile will110/// be written.111///112/// \headerfile <immintrin.h>113///114/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1RS </c> instruction.115///116/// \param dst0117/// First tile of destination tile pair. Max size is 1024i*2 Bytes.118/// \param dst1119/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.120/// \param base121/// A pointer to base address.122/// \param stride123/// The stride between the rows' data to be loaded in memory.124__DEFAULT_FN_ATTRS125static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1,126const void *base, __SIZE_TYPE__ stride) {127_tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,128&dst1->tile, base, stride);129}130131/// Converts a pair of tiles from memory into VNNI format, and places the132/// results in a pair of destinations specified by dst. The pair of tiles133/// in memory is specified via a tsib; the second tile is after the first134/// one, separated by the same stride that separates each row.135/// The tile configuration for the destination tiles indicates the amount136/// of data to read from memory. The instruction will load a number of rows137/// that is equal to twice the number of rows in tmm1. The size of each row138/// is equal to the average width of the destination tiles. If the second139/// tile is configured with zero rows and columns, only the first tile will140/// be written. The last row will be not be read from memory but instead141/// filled with zeros.142/// Provides a hint to the implementation that the data will likely become143/// read shared in the near future and the data caching can be optimized.144///145/// \headerfile <immintrin.h>146///147/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.148///149/// \param dst0150/// First tile of destination tile pair. Max size is 1024i*2 Bytes.151/// \param dst1152/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.153/// \param base154/// A pointer to base address.155/// \param stride156/// The stride between the rows' data to be loaded in memory.157__DEFAULT_FN_ATTRS158static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1,159const void *base, __SIZE_TYPE__ stride) {160_tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,161&dst1->tile, base, stride);162}163164/// Converts a pair of tiles from memory into VNNI format, and places the165/// results in a pair of destinations specified by dst. The pair of tiles166/// in memory is specified via a tsib; the second tile is after the first167/// one, separated by the same stride that separates each row.168/// The tile configuration for the destination tiles indicates the amount169/// of data to read from memory. The instruction will load a number of rows170/// that is equal to twice the number of rows in tmm1. The size of each row171/// is equal to the average width of the destination tiles. If the second172/// tile is configured with zero rows and columns, only the first tile will173/// be written. The last row will be not be read from memory but instead174/// filled with zeros.175/// Provides a hint to the implementation that the data will likely become176/// read shared in the near future and the data caching can be optimized.177///178/// \headerfile <immintrin.h>179///180/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1RS </c> instruction.181///182/// \param dst0183/// First tile of destination tile pair. Max size is 1024i*2 Bytes.184/// \param dst1185/// Second tile of destination tile pair. Max size is 1024i*2 Bytes.186/// \param base187/// A pointer to base address.188/// \param stride189/// The stride between the rows' data to be loaded in memory.190__DEFAULT_FN_ATTRS191static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1,192const void *base, __SIZE_TYPE__ stride) {193_tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,194&dst1->tile, base, stride);195}196197#undef __DEFAULT_FN_ATTRS198#endif /* __x86_64__ */199#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */200201202