Path: blob/main/tools/test/xregs_sig/c2x2c_amd64.S
39536 views
/*1* This file is in public domain.2* Written by Dmitry Chagin <[email protected]>3*/45#if defined(__FreeBSD__)6#include <machine/specialreg.h>7#else8#define CPUID2_OSXSAVE 0x080000009#define CPUID2_AVX 0x1000000010#define XFEATURE_ENABLED_X87 0x0000000111#define XFEATURE_ENABLED_SSE 0x0000000212#define XFEATURE_ENABLED_AVX 0x0000000413#define XFEATURE_AVX \14(XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)15#endif1617.text1819.globl xregs_banks_max20.type xregs_banks_max, @function21xregs_banks_max:22pushq %rbx23movl $1, %eax24cpuid25andl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx26cmpl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx27jne sse28xorl %ecx, %ecx29xgetbv30andl $XFEATURE_AVX, %eax31cmpl $XFEATURE_AVX, %eax32jne sse33movl $1, %eax34jmp out35sse:36xorl %eax, %eax37out:38popq %rbx39retq4041.size xregs_banks_max, . - xregs_banks_max424344.globl cpu_to_xmm45.type cpu_to_xmm, @function46cpu_to_xmm:47movdqu %xmm0, (%rdi)48movdqu %xmm1, 1 * 16(%rdi)49movdqu %xmm2, 2 * 16(%rdi)50movdqu %xmm3, 3 * 16(%rdi)51movdqu %xmm4, 4 * 16(%rdi)52movdqu %xmm5, 5 * 16(%rdi)53movdqu %xmm6, 6 * 16(%rdi)54movdqu %xmm7, 7 * 16(%rdi)55movdqu %xmm8, 8 * 16(%rdi)56movdqu %xmm9, 9 * 16(%rdi)57movdqu %xmm10, 10 * 16(%rdi)58movdqu %xmm11, 11 * 16(%rdi)59movdqu %xmm12, 12 * 16(%rdi)60movdqu %xmm13, 13 * 16(%rdi)61movdqu %xmm14, 14 * 16(%rdi)62movdqu %xmm15, 15 * 16(%rdi)63retq6465.size cpu_to_xmm, . - cpu_to_xmm666768.globl xmm_to_cpu69.type xmm_to_cpu, @function70xmm_to_cpu:71movdqu (%rdi), %xmm072movdqu 1 * 16(%rdi), %xmm173movdqu 2 * 16(%rdi), %xmm274movdqu 3 * 16(%rdi), %xmm375movdqu 4 * 16(%rdi), %xmm476movdqu 5 * 16(%rdi), %xmm577movdqu 6 * 16(%rdi), %xmm678movdqu 7 * 16(%rdi), %xmm779movdqu 8 * 16(%rdi), %xmm880movdqu 9 * 16(%rdi), %xmm981movdqu 10 * 16(%rdi), %xmm1082movdqu 11 * 16(%rdi), %xmm1183movdqu 12 * 16(%rdi), %xmm1284movdqu 13 * 16(%rdi), %xmm1385movdqu 14 * 16(%rdi), %xmm1486movdqu 15 * 16(%rdi), %xmm1587retq8889.size xmm_to_cpu, . - xmm_to_cpu909192.globl cpu_to_avx93.type cpu_to_avx, @function94cpu_to_avx:95vmovdqu %ymm0, (%rdi)96vmovdqu %ymm1, 1 * 32(%rdi)97vmovdqu %ymm2, 2 * 32(%rdi)98vmovdqu %ymm3, 3 * 32(%rdi)99vmovdqu %ymm4, 4 * 32(%rdi)100vmovdqu %ymm5, 5 * 32(%rdi)101vmovdqu %ymm6, 6 * 32(%rdi)102vmovdqu %ymm7, 7 * 32(%rdi)103vmovdqu %ymm8, 8 * 32(%rdi)104vmovdqu %ymm9, 9 * 32(%rdi)105vmovdqu %ymm10, 10 * 32(%rdi)106vmovdqu %ymm11, 11 * 32(%rdi)107vmovdqu %ymm12, 12 * 32(%rdi)108vmovdqu %ymm13, 13 * 32(%rdi)109vmovdqu %ymm14, 14 * 32(%rdi)110vmovdqu %ymm15, 15 * 32(%rdi)111retq112113.size cpu_to_avx, . - cpu_to_avx114115116.globl avx_to_cpu117.type avx_to_cpu, @function118avx_to_cpu:119vmovdqu (%rdi), %ymm0120vmovdqu 1 * 32(%rdi), %ymm1121vmovdqu 2 * 32(%rdi), %ymm2122vmovdqu 3 * 32(%rdi), %ymm3123vmovdqu 4 * 32(%rdi), %ymm4124vmovdqu 5 * 32(%rdi), %ymm5125vmovdqu 6 * 32(%rdi), %ymm6126vmovdqu 7 * 32(%rdi), %ymm7127vmovdqu 8 * 32(%rdi), %ymm8128vmovdqu 9 * 32(%rdi), %ymm9129vmovdqu 10 * 32(%rdi), %ymm10130vmovdqu 11 * 32(%rdi), %ymm11131vmovdqu 12 * 32(%rdi), %ymm12132vmovdqu 13 * 32(%rdi), %ymm13133vmovdqu 14 * 32(%rdi), %ymm14134vmovdqu 15 * 32(%rdi), %ymm15135retq136137.size avx_to_cpu, . - avx_to_cpu138139.section .note.GNU-stack,"",@progbits140141142