mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-06-10 01:16:12 +00:00
1504 lines
52 KiB
C
1504 lines
52 KiB
C
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include "tests/malloc.h"
|
|
|
|
typedef unsigned char UChar;
|
|
typedef unsigned int UInt;
|
|
typedef unsigned long int UWord;
|
|
typedef unsigned long long int ULong;
|
|
|
|
#if defined(VGO_darwin)
|
|
UChar randArray[1027] __attribute__((used));
|
|
#else
|
|
UChar _randArray[1027] __attribute__((used));
|
|
#endif
|
|
|
|
#define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
|
|
|
|
typedef union { UChar u8[32]; UInt u32[8]; } YMM;
|
|
|
|
typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block;
|
|
|
|
void showYMM ( YMM* vec )
|
|
{
|
|
int i;
|
|
assert(IS_32_ALIGNED(vec));
|
|
for (i = 31; i >= 0; i--) {
|
|
printf("%02x", (UInt)vec->u8[i]);
|
|
if (i > 0 && 0 == ((i+0) & 7)) printf(".");
|
|
}
|
|
}
|
|
|
|
void showBlock ( char* msg, Block* block )
|
|
{
|
|
printf(" %s\n", msg);
|
|
printf(" "); showYMM(&block->a1); printf("\n");
|
|
printf(" "); showYMM(&block->a2); printf("\n");
|
|
printf(" "); showYMM(&block->a3); printf("\n");
|
|
printf(" "); showYMM(&block->a4); printf("\n");
|
|
printf(" %016llx\n", block->u64);
|
|
}
|
|
|
|
UChar randUChar ( void )
|
|
{
|
|
static UInt seed = 80021;
|
|
seed = 1103515245 * seed + 12345;
|
|
return (seed >> 17) & 0xFF;
|
|
}
|
|
|
|
void randBlock ( Block* b )
|
|
{
|
|
int i;
|
|
UChar* p = (UChar*)b;
|
|
for (i = 0; i < sizeof(Block); i++)
|
|
p[i] = randUChar();
|
|
}
|
|
|
|
|
|
/* Generate a function test_NAME, that tests the given insn, in both
|
|
its mem and reg forms. The reg form of the insn may mention, as
|
|
operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of
|
|
the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9
|
|
and %r14. It's OK for the insn to clobber ymm0, as this is needed
|
|
for testing PCMPxSTRx, and ymm6, as this is needed for testing
|
|
MOVMASK variants. */
|
|
|
|
#define GEN_test_RandM(_name, _reg_form, _mem_form) \
|
|
\
|
|
__attribute__ ((noinline)) static void test_##_name ( void ) \
|
|
{ \
|
|
Block* b = memalign32(sizeof(Block)); \
|
|
randBlock(b); \
|
|
printf("%s(reg)\n", #_name); \
|
|
showBlock("before", b); \
|
|
__asm__ __volatile__( \
|
|
"vmovdqa 0(%0),%%ymm7" "\n\t" \
|
|
"vmovdqa 32(%0),%%ymm8" "\n\t" \
|
|
"vmovdqa 64(%0),%%ymm6" "\n\t" \
|
|
"vmovdqa 96(%0),%%ymm9" "\n\t" \
|
|
"movq 128(%0),%%r14" "\n\t" \
|
|
_reg_form "\n\t" \
|
|
"vmovdqa %%ymm7, 0(%0)" "\n\t" \
|
|
"vmovdqa %%ymm8, 32(%0)" "\n\t" \
|
|
"vmovdqa %%ymm6, 64(%0)" "\n\t" \
|
|
"vmovdqa %%ymm9, 96(%0)" "\n\t" \
|
|
"movq %%r14, 128(%0)" "\n\t" \
|
|
: /*OUT*/ \
|
|
: /*IN*/"r"(b) \
|
|
: /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
|
|
); \
|
|
showBlock("after", b); \
|
|
randBlock(b); \
|
|
printf("%s(mem)\n", #_name); \
|
|
showBlock("before", b); \
|
|
__asm__ __volatile__( \
|
|
"leaq 0(%0),%%rax" "\n\t" \
|
|
"vmovdqa 32(%0),%%ymm8" "\n\t" \
|
|
"vmovdqa 64(%0),%%ymm7" "\n\t" \
|
|
"vmovdqa 96(%0),%%ymm9" "\n\t" \
|
|
"movq 128(%0),%%r14" "\n\t" \
|
|
_mem_form "\n\t" \
|
|
"vmovdqa %%ymm8, 32(%0)" "\n\t" \
|
|
"vmovdqa %%ymm7, 64(%0)" "\n\t" \
|
|
"vmovdqa %%ymm9, 96(%0)" "\n\t" \
|
|
"movq %%r14, 128(%0)" "\n\t" \
|
|
: /*OUT*/ \
|
|
: /*IN*/"r"(b) \
|
|
: /*TRASH*/"xmm6", \
|
|
"xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \
|
|
); \
|
|
showBlock("after", b); \
|
|
printf("\n"); \
|
|
free(b); \
|
|
}
|
|
|
|
#define GEN_test_Ronly(_name, _reg_form) \
|
|
GEN_test_RandM(_name, _reg_form, "")
|
|
#define GEN_test_Monly(_name, _mem_form) \
|
|
GEN_test_RandM(_name, "", _mem_form)
|
|
|
|
/* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2. */
|
|
|
|
GEN_test_RandM(VPOR_256,
|
|
"vpor %%ymm6, %%ymm8, %%ymm7",
|
|
"vpor (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPXOR_256,
|
|
"vpxor %%ymm6, %%ymm8, %%ymm7",
|
|
"vpxor (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSUBB_256,
|
|
"vpsubb %%ymm6, %%ymm8, %%ymm7",
|
|
"vpsubb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSUBD_256,
|
|
"vpsubd %%ymm6, %%ymm8, %%ymm7",
|
|
"vpsubd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPADDD_256,
|
|
"vpaddd %%ymm6, %%ymm8, %%ymm7",
|
|
"vpaddd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMOVZXWD_256,
|
|
"vpmovzxwd %%xmm6, %%ymm8",
|
|
"vpmovzxwd (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPMOVZXBW_256,
|
|
"vpmovzxbw %%xmm6, %%ymm8",
|
|
"vpmovzxbw (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPBLENDVB_256,
|
|
"vpblendvb %%ymm9, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMINSD_256,
|
|
"vpminsd %%ymm6, %%ymm8, %%ymm7",
|
|
"vpminsd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMAXSD_256,
|
|
"vpmaxsd %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmaxsd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSHUFB_256,
|
|
"vpshufb %%ymm6, %%ymm8, %%ymm7",
|
|
"vpshufb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPUNPCKLBW_256,
|
|
"vpunpcklbw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpunpcklbw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPUNPCKHBW_256,
|
|
"vpunpckhbw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpunpckhbw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPABSD_256,
|
|
"vpabsd %%ymm6, %%ymm8",
|
|
"vpabsd (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPACKUSWB_256,
|
|
"vpackuswb %%ymm9, %%ymm8, %%ymm7",
|
|
"vpackuswb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPMOVMSKB_256,
|
|
"vpmovmskb %%ymm8, %%r14")
|
|
|
|
GEN_test_RandM(VPAND_256,
|
|
"vpand %%ymm9, %%ymm8, %%ymm7",
|
|
"vpand (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPCMPEQB_256,
|
|
"vpcmpeqb %%ymm9, %%ymm8, %%ymm7",
|
|
"vpcmpeqb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSHUFLW_0x39_256,
|
|
"vpshuflw $0x39, %%ymm9, %%ymm7",
|
|
"vpshuflw $0xC6, (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPSHUFHW_0x39_256,
|
|
"vpshufhw $0x39, %%ymm9, %%ymm7",
|
|
"vpshufhw $0xC6, (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPMULLW_256,
|
|
"vpmullw %%ymm9, %%ymm8, %%ymm7",
|
|
"vpmullw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPADDUSW_256,
|
|
"vpaddusw %%ymm9, %%ymm8, %%ymm7",
|
|
"vpaddusw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMULHUW_256,
|
|
"vpmulhuw %%ymm9, %%ymm8, %%ymm7",
|
|
"vpmulhuw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPADDUSB_256,
|
|
"vpaddusb %%ymm9, %%ymm8, %%ymm7",
|
|
"vpaddusb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPUNPCKLWD_256,
|
|
"vpunpcklwd %%ymm6, %%ymm8, %%ymm7",
|
|
"vpunpcklwd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPUNPCKHWD_256,
|
|
"vpunpckhwd %%ymm6, %%ymm8, %%ymm7",
|
|
"vpunpckhwd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPSLLD_0x05_256,
|
|
"vpslld $0x5, %%ymm9, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPSRLD_0x05_256,
|
|
"vpsrld $0x5, %%ymm9, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPSRAD_0x05_256,
|
|
"vpsrad $0x5, %%ymm9, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSUBUSB_256,
|
|
"vpsubusb %%ymm9, %%ymm8, %%ymm7",
|
|
"vpsubusb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSUBSB_256,
|
|
"vpsubsb %%ymm9, %%ymm8, %%ymm7",
|
|
"vpsubsb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPSRLDQ_0x05_256,
|
|
"vpsrldq $0x5, %%ymm9, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPSLLDQ_0x05_256,
|
|
"vpslldq $0x5, %%ymm9, %%ymm7")
|
|
|
|
GEN_test_RandM(VPANDN_256,
|
|
"vpandn %%ymm9, %%ymm8, %%ymm7",
|
|
"vpandn (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPUNPCKLQDQ_256,
|
|
"vpunpcklqdq %%ymm6, %%ymm8, %%ymm7",
|
|
"vpunpcklqdq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPSRLW_0x05_256,
|
|
"vpsrlw $0x5, %%ymm9, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPSLLW_0x05_256,
|
|
"vpsllw $0x5, %%ymm9, %%ymm7")
|
|
|
|
GEN_test_RandM(VPADDW_256,
|
|
"vpaddw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpaddw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPACKSSDW_256,
|
|
"vpackssdw %%ymm9, %%ymm8, %%ymm7",
|
|
"vpackssdw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPUNPCKLDQ_256,
|
|
"vpunpckldq %%ymm6, %%ymm8, %%ymm7",
|
|
"vpunpckldq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPCMPEQD_256,
|
|
"vpcmpeqd %%ymm6, %%ymm8, %%ymm7",
|
|
"vpcmpeqd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSHUFD_0x39_256,
|
|
"vpshufd $0x39, %%ymm9, %%ymm8",
|
|
"vpshufd $0xC6, (%%rax), %%ymm7")
|
|
|
|
GEN_test_RandM(VPADDQ_256,
|
|
"vpaddq %%ymm6, %%ymm8, %%ymm7",
|
|
"vpaddq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSUBQ_256,
|
|
"vpsubq %%ymm6, %%ymm8, %%ymm7",
|
|
"vpsubq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSUBW_256,
|
|
"vpsubw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpsubw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPCMPEQQ_256,
|
|
"vpcmpeqq %%ymm6, %%ymm8, %%ymm7",
|
|
"vpcmpeqq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPCMPGTQ_256,
|
|
"vpcmpgtq %%ymm6, %%ymm8, %%ymm7",
|
|
"vpcmpgtq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPSRLQ_0x05_256,
|
|
"vpsrlq $0x5, %%ymm9, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMULUDQ_256,
|
|
"vpmuludq %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmuludq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMULDQ_256,
|
|
"vpmuldq %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmuldq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPSLLQ_0x05_256,
|
|
"vpsllq $0x5, %%ymm9, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMAXUD_256,
|
|
"vpmaxud %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmaxud (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMINUD_256,
|
|
"vpminud %%ymm6, %%ymm8, %%ymm7",
|
|
"vpminud (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMULLD_256,
|
|
"vpmulld %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmulld (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMAXUW_256,
|
|
"vpmaxuw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmaxuw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMINUW_256,
|
|
"vpminuw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpminuw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMAXSW_256,
|
|
"vpmaxsw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmaxsw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMINSW_256,
|
|
"vpminsw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpminsw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMAXUB_256,
|
|
"vpmaxub %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmaxub (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMINUB_256,
|
|
"vpminub %%ymm6, %%ymm8, %%ymm7",
|
|
"vpminub (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMAXSB_256,
|
|
"vpmaxsb %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmaxsb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMINSB_256,
|
|
"vpminsb %%ymm6, %%ymm8, %%ymm7",
|
|
"vpminsb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMOVSXBW_256,
|
|
"vpmovsxbw %%xmm6, %%ymm8",
|
|
"vpmovsxbw (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPSUBUSW_256,
|
|
"vpsubusw %%ymm9, %%ymm8, %%ymm7",
|
|
"vpsubusw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSUBSW_256,
|
|
"vpsubsw %%ymm9, %%ymm8, %%ymm7",
|
|
"vpsubsw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPCMPEQW_256,
|
|
"vpcmpeqw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpcmpeqw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPADDB_256,
|
|
"vpaddb %%ymm6, %%ymm8, %%ymm7",
|
|
"vpaddb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPUNPCKHDQ_256,
|
|
"vpunpckhdq %%ymm6, %%ymm8, %%ymm7",
|
|
"vpunpckhdq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMOVSXDQ_256,
|
|
"vpmovsxdq %%xmm6, %%ymm8",
|
|
"vpmovsxdq (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPMOVSXWD_256,
|
|
"vpmovsxwd %%xmm6, %%ymm8",
|
|
"vpmovsxwd (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPMULHW_256,
|
|
"vpmulhw %%ymm9, %%ymm8, %%ymm7",
|
|
"vpmulhw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPUNPCKHQDQ_256,
|
|
"vpunpckhqdq %%ymm6, %%ymm8, %%ymm7",
|
|
"vpunpckhqdq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_Ronly(VPSRAW_0x05_256,
|
|
"vpsraw $0x5, %%ymm9, %%ymm7")
|
|
|
|
GEN_test_RandM(VPCMPGTB_256,
|
|
"vpcmpgtb %%ymm6, %%ymm8, %%ymm7",
|
|
"vpcmpgtb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPCMPGTW_256,
|
|
"vpcmpgtw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpcmpgtw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPCMPGTD_256,
|
|
"vpcmpgtd %%ymm6, %%ymm8, %%ymm7",
|
|
"vpcmpgtd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMOVZXBD_256,
|
|
"vpmovzxbd %%xmm6, %%ymm8",
|
|
"vpmovzxbd (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPMOVSXBD_256,
|
|
"vpmovsxbd %%xmm6, %%ymm8",
|
|
"vpmovsxbd (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPALIGNR_256_1of3,
|
|
"vpalignr $0, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpalignr $3, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPALIGNR_256_2of3,
|
|
"vpalignr $6, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpalignr $9, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPALIGNR_256_3of3,
|
|
"vpalignr $12, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpalignr $15, (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPBLENDW_256_0x00,
|
|
"vpblendw $0x00, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendw $0x01, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDW_256_0xFE,
|
|
"vpblendw $0xFE, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDW_256_0x30,
|
|
"vpblendw $0x30, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendw $0x03, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDW_256_0x21,
|
|
"vpblendw $0x21, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendw $0x12, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDW_256_0xD7,
|
|
"vpblendw $0xD7, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDW_256_0xB5,
|
|
"vpblendw $0xB5, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDW_256_0x85,
|
|
"vpblendw $0x85, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDW_256_0x29,
|
|
"vpblendw $0x29, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendw $0x92, (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSLLW_256,
|
|
"andl $15, %%r14d;"
|
|
"vmovd %%r14d, %%xmm6;"
|
|
"vpsllw %%xmm6, %%ymm8, %%ymm9",
|
|
"andq $15, 128(%%rax);"
|
|
"vpsllw 128(%%rax), %%ymm8, %%ymm9")
|
|
|
|
GEN_test_RandM(VPSRLW_256,
|
|
"andl $15, %%r14d;"
|
|
"vmovd %%r14d, %%xmm6;"
|
|
"vpsrlw %%xmm6, %%ymm8, %%ymm9",
|
|
"andq $15, 128(%%rax);"
|
|
"vpsrlw 128(%%rax), %%ymm8, %%ymm9")
|
|
|
|
GEN_test_RandM(VPSRAW_256,
|
|
"andl $31, %%r14d;"
|
|
"vmovd %%r14d, %%xmm6;"
|
|
"vpsraw %%xmm6, %%ymm8, %%ymm9",
|
|
"andq $15, 128(%%rax);"
|
|
"vpsraw 128(%%rax), %%ymm8, %%ymm9")
|
|
|
|
GEN_test_RandM(VPSLLD_256,
|
|
"andl $31, %%r14d;"
|
|
"vmovd %%r14d, %%xmm6;"
|
|
"vpslld %%xmm6, %%ymm8, %%ymm9",
|
|
"andq $31, 128(%%rax);"
|
|
"vpslld 128(%%rax), %%ymm8, %%ymm9")
|
|
|
|
GEN_test_RandM(VPSRLD_256,
|
|
"andl $31, %%r14d;"
|
|
"vmovd %%r14d, %%xmm6;"
|
|
"vpsrld %%xmm6, %%ymm8, %%ymm9",
|
|
"andq $31, 128(%%rax);"
|
|
"vpsrld 128(%%rax), %%ymm8, %%ymm9")
|
|
|
|
GEN_test_RandM(VPSRAD_256,
|
|
"andl $31, %%r14d;"
|
|
"vmovd %%r14d, %%xmm6;"
|
|
"vpsrad %%xmm6, %%ymm8, %%ymm9",
|
|
"andq $31, 128(%%rax);"
|
|
"vpsrad 128(%%rax), %%ymm8, %%ymm9")
|
|
|
|
GEN_test_RandM(VPSLLQ_256,
|
|
"andl $63, %%r14d;"
|
|
"vmovd %%r14d, %%xmm6;"
|
|
"vpsllq %%xmm6, %%ymm8, %%ymm9",
|
|
"andq $63, 128(%%rax);"
|
|
"vpsllq 128(%%rax), %%ymm8, %%ymm9")
|
|
|
|
GEN_test_RandM(VPSRLQ_256,
|
|
"andl $63, %%r14d;"
|
|
"vmovd %%r14d, %%xmm6;"
|
|
"vpsrlq %%xmm6, %%ymm8, %%ymm9",
|
|
"andq $63, 128(%%rax);"
|
|
"vpsrlq 128(%%rax), %%ymm8, %%ymm9")
|
|
|
|
GEN_test_RandM(VPMADDWD_256,
|
|
"vpmaddwd %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmaddwd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_Monly(VMOVNTDQA_256,
|
|
"vmovntdqa (%%rax), %%ymm9")
|
|
|
|
GEN_test_RandM(VPACKSSWB_256,
|
|
"vpacksswb %%ymm6, %%ymm8, %%ymm7",
|
|
"vpacksswb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPAVGB_256,
|
|
"vpavgb %%ymm6, %%ymm8, %%ymm7",
|
|
"vpavgb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPAVGW_256,
|
|
"vpavgw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpavgw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPADDSB_256,
|
|
"vpaddsb %%ymm6, %%ymm8, %%ymm7",
|
|
"vpaddsb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPADDSW_256,
|
|
"vpaddsw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpaddsw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPHADDW_256,
|
|
"vphaddw %%ymm6, %%ymm8, %%ymm7",
|
|
"vphaddw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPHADDD_256,
|
|
"vphaddd %%ymm6, %%ymm8, %%ymm7",
|
|
"vphaddd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPHADDSW_256,
|
|
"vphaddsw %%ymm6, %%ymm8, %%ymm7",
|
|
"vphaddsw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMADDUBSW_256,
|
|
"vpmaddubsw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmaddubsw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPHSUBW_256,
|
|
"vphsubw %%ymm6, %%ymm8, %%ymm7",
|
|
"vphsubw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPHSUBD_256,
|
|
"vphsubd %%ymm6, %%ymm8, %%ymm7",
|
|
"vphsubd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPHSUBSW_256,
|
|
"vphsubsw %%ymm6, %%ymm8, %%ymm7",
|
|
"vphsubsw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPABSB_256,
|
|
"vpabsb %%ymm6, %%ymm7",
|
|
"vpabsb (%%rax), %%ymm7")
|
|
|
|
GEN_test_RandM(VPABSW_256,
|
|
"vpabsw %%ymm6, %%ymm7",
|
|
"vpabsw (%%rax), %%ymm7")
|
|
|
|
GEN_test_RandM(VPMOVSXBQ_256,
|
|
"vpmovsxbq %%xmm6, %%ymm8",
|
|
"vpmovsxbq (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPMOVSXWQ_256,
|
|
"vpmovsxwq %%xmm6, %%ymm8",
|
|
"vpmovsxwq (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPACKUSDW_256,
|
|
"vpackusdw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpackusdw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMOVZXBQ_256,
|
|
"vpmovzxbq %%xmm6, %%ymm8",
|
|
"vpmovzxbq (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPMOVZXWQ_256,
|
|
"vpmovzxwq %%xmm6, %%ymm8",
|
|
"vpmovzxwq (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VPMOVZXDQ_256,
|
|
"vpmovzxdq %%xmm6, %%ymm8",
|
|
"vpmovzxdq (%%rax), %%ymm8")
|
|
|
|
GEN_test_RandM(VMPSADBW_256_0x0,
|
|
"vmpsadbw $0, %%ymm6, %%ymm8, %%ymm7",
|
|
"vmpsadbw $0, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VMPSADBW_256_0x39,
|
|
"vmpsadbw $0x39, %%ymm6, %%ymm8, %%ymm7",
|
|
"vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VMPSADBW_256_0x32,
|
|
"vmpsadbw $0x32, %%ymm6, %%ymm8, %%ymm7",
|
|
"vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VMPSADBW_256_0x2b,
|
|
"vmpsadbw $0x2b, %%ymm6, %%ymm8, %%ymm7",
|
|
"vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VMPSADBW_256_0x24,
|
|
"vmpsadbw $0x24, %%ymm6, %%ymm8, %%ymm7",
|
|
"vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VMPSADBW_256_0x1d,
|
|
"vmpsadbw $0x1d, %%ymm6, %%ymm8, %%ymm7",
|
|
"vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VMPSADBW_256_0x16,
|
|
"vmpsadbw $0x16, %%ymm6, %%ymm8, %%ymm7",
|
|
"vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VMPSADBW_256_0x0f,
|
|
"vmpsadbw $0x0f, %%ymm6, %%ymm8, %%ymm7",
|
|
"vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSADBW_256,
|
|
"vpsadbw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpsadbw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSIGNB_256,
|
|
"vpsignb %%ymm6, %%ymm8, %%ymm7",
|
|
"vpsignb (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSIGNW_256,
|
|
"vpsignw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpsignw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSIGND_256,
|
|
"vpsignd %%ymm6, %%ymm8, %%ymm7",
|
|
"vpsignd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPMULHRSW_256,
|
|
"vpmulhrsw %%ymm6, %%ymm8, %%ymm7",
|
|
"vpmulhrsw (%%rax), %%ymm8, %%ymm7")
|
|
|
|
/* Instructions new in AVX2. */
|
|
|
|
GEN_test_Monly(VBROADCASTI128,
|
|
"vbroadcasti128 (%%rax), %%ymm9")
|
|
|
|
GEN_test_RandM(VEXTRACTI128_0x0,
|
|
"vextracti128 $0x0, %%ymm7, %%xmm9",
|
|
"vextracti128 $0x0, %%ymm7, (%%rax)")
|
|
|
|
GEN_test_RandM(VEXTRACTI128_0x1,
|
|
"vextracti128 $0x1, %%ymm7, %%xmm9",
|
|
"vextracti128 $0x1, %%ymm7, (%%rax)")
|
|
|
|
GEN_test_RandM(VINSERTI128_0x0,
|
|
"vinserti128 $0x0, %%xmm9, %%ymm7, %%ymm8",
|
|
"vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8")
|
|
|
|
GEN_test_RandM(VINSERTI128_0x1,
|
|
"vinserti128 $0x1, %%xmm9, %%ymm7, %%ymm8",
|
|
"vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8")
|
|
|
|
GEN_test_RandM(VPERM2I128_0x00,
|
|
"vperm2i128 $0x00, %%ymm6, %%ymm8, %%ymm7",
|
|
"vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPERM2I128_0xFF,
|
|
"vperm2i128 $0xFF, %%ymm6, %%ymm8, %%ymm7",
|
|
"vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPERM2I128_0x30,
|
|
"vperm2i128 $0x30, %%ymm6, %%ymm8, %%ymm7",
|
|
"vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPERM2I128_0x21,
|
|
"vperm2i128 $0x21, %%ymm6, %%ymm8, %%ymm7",
|
|
"vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPERM2I128_0x12,
|
|
"vperm2i128 $0x12, %%ymm6, %%ymm8, %%ymm7",
|
|
"vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPERM2I128_0x03,
|
|
"vperm2i128 $0x03, %%ymm6, %%ymm8, %%ymm7",
|
|
"vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPERM2I128_0x85,
|
|
"vperm2i128 $0x85, %%ymm6, %%ymm8, %%ymm7",
|
|
"vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPERM2I128_0x5A,
|
|
"vperm2i128 $0x5A, %%ymm6, %%ymm8, %%ymm7",
|
|
"vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_Ronly(VBROADCASTSS_128,
|
|
"vbroadcastss %%xmm9, %%xmm7")
|
|
|
|
GEN_test_Ronly(VBROADCASTSS_256,
|
|
"vbroadcastss %%xmm9, %%ymm7")
|
|
|
|
GEN_test_Ronly(VBROADCASTSD_256,
|
|
"vbroadcastsd %%xmm9, %%ymm7")
|
|
|
|
GEN_test_RandM(VPERMD,
|
|
"vpermd %%ymm6, %%ymm7, %%ymm9",
|
|
"vpermd (%%rax), %%ymm7, %%ymm9")
|
|
|
|
GEN_test_RandM(VPERMQ_0x00,
|
|
"vpermq $0x00, %%ymm6, %%ymm7",
|
|
"vpermq $0x01, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMQ_0xFE,
|
|
"vpermq $0xFE, %%ymm6, %%ymm7",
|
|
"vpermq $0xFF, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMQ_0x30,
|
|
"vpermq $0x30, %%ymm6, %%ymm7",
|
|
"vpermq $0x03, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMQ_0x21,
|
|
"vpermq $0x21, %%ymm6, %%ymm7",
|
|
"vpermq $0x12, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMQ_0xD7,
|
|
"vpermq $0xD7, %%ymm6, %%ymm7",
|
|
"vpermq $0x6C, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMQ_0xB5,
|
|
"vpermq $0xB5, %%ymm6, %%ymm7",
|
|
"vpermq $0x4A, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMQ_0x85,
|
|
"vpermq $0x85, %%ymm6, %%ymm7",
|
|
"vpermq $0xDC, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMQ_0x29,
|
|
"vpermq $0x29, %%ymm6, %%ymm7",
|
|
"vpermq $0x92, (%%rax), %%ymm7")
|
|
|
|
GEN_test_RandM(VPERMPS,
|
|
"vpermps %%ymm6, %%ymm7, %%ymm9",
|
|
"vpermps (%%rax), %%ymm7, %%ymm9")
|
|
|
|
GEN_test_RandM(VPERMPD_0x00,
|
|
"vpermpd $0x00, %%ymm6, %%ymm7",
|
|
"vpermpd $0x01, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMPD_0xFE,
|
|
"vpermpd $0xFE, %%ymm6, %%ymm7",
|
|
"vpermpd $0xFF, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMPD_0x30,
|
|
"vpermpd $0x30, %%ymm6, %%ymm7",
|
|
"vpermpd $0x03, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMPD_0x21,
|
|
"vpermpd $0x21, %%ymm6, %%ymm7",
|
|
"vpermpd $0x12, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMPD_0xD7,
|
|
"vpermpd $0xD7, %%ymm6, %%ymm7",
|
|
"vpermpd $0x6C, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMPD_0xB5,
|
|
"vpermpd $0xB5, %%ymm6, %%ymm7",
|
|
"vpermpd $0x4A, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMPD_0x85,
|
|
"vpermpd $0x85, %%ymm6, %%ymm7",
|
|
"vpermpd $0xDC, (%%rax), %%ymm7")
|
|
GEN_test_RandM(VPERMPD_0x29,
|
|
"vpermpd $0x29, %%ymm6, %%ymm7",
|
|
"vpermpd $0x92, (%%rax), %%ymm7")
|
|
|
|
GEN_test_RandM(VPBLENDD_128_0x00,
|
|
"vpblendd $0x00, %%xmm6, %%xmm8, %%xmm7",
|
|
"vpblendd $0x01, (%%rax), %%xmm8, %%xmm7")
|
|
GEN_test_RandM(VPBLENDD_128_0x02,
|
|
"vpblendd $0x02, %%xmm6, %%xmm8, %%xmm7",
|
|
"vpblendd $0x03, (%%rax), %%xmm8, %%xmm7")
|
|
GEN_test_RandM(VPBLENDD_128_0x04,
|
|
"vpblendd $0x04, %%xmm6, %%xmm8, %%xmm7",
|
|
"vpblendd $0x05, (%%rax), %%xmm8, %%xmm7")
|
|
GEN_test_RandM(VPBLENDD_128_0x06,
|
|
"vpblendd $0x06, %%xmm6, %%xmm8, %%xmm7",
|
|
"vpblendd $0x07, (%%rax), %%xmm8, %%xmm7")
|
|
GEN_test_RandM(VPBLENDD_128_0x08,
|
|
"vpblendd $0x08, %%xmm6, %%xmm8, %%xmm7",
|
|
"vpblendd $0x09, (%%rax), %%xmm8, %%xmm7")
|
|
GEN_test_RandM(VPBLENDD_128_0x0A,
|
|
"vpblendd $0x0A, %%xmm6, %%xmm8, %%xmm7",
|
|
"vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7")
|
|
GEN_test_RandM(VPBLENDD_128_0x0C,
|
|
"vpblendd $0x0C, %%xmm6, %%xmm8, %%xmm7",
|
|
"vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7")
|
|
GEN_test_RandM(VPBLENDD_128_0x0E,
|
|
"vpblendd $0x0E, %%xmm6, %%xmm8, %%xmm7",
|
|
"vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7")
|
|
|
|
GEN_test_RandM(VPBLENDD_256_0x00,
|
|
"vpblendd $0x00, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendd $0x01, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDD_256_0xFE,
|
|
"vpblendd $0xFE, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDD_256_0x30,
|
|
"vpblendd $0x30, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendd $0x03, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDD_256_0x21,
|
|
"vpblendd $0x21, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendd $0x12, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDD_256_0xD7,
|
|
"vpblendd $0xD7, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDD_256_0xB5,
|
|
"vpblendd $0xB5, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDD_256_0x85,
|
|
"vpblendd $0x85, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7")
|
|
GEN_test_RandM(VPBLENDD_256_0x29,
|
|
"vpblendd $0x29, %%ymm6, %%ymm8, %%ymm7",
|
|
"vpblendd $0x92, (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSLLVD_128,
|
|
"vpslld $27, %%xmm6, %%xmm6;"
|
|
"vpsrld $27, %%xmm6, %%xmm6;"
|
|
"vpsllvd %%xmm6, %%xmm8, %%xmm7",
|
|
"andl $31, (%%rax);"
|
|
"andl $31, 4(%%rax);"
|
|
"andl $31, 8(%%rax);"
|
|
"vpsllvd (%%rax), %%xmm8, %%xmm7")
|
|
|
|
GEN_test_RandM(VPSLLVD_256,
|
|
"vpslld $27, %%ymm6, %%ymm6;"
|
|
"vpsrld $27, %%ymm6, %%ymm6;"
|
|
"vpsllvd %%ymm6, %%ymm8, %%ymm7",
|
|
"andl $31, (%%rax);"
|
|
"andl $31, 4(%%rax);"
|
|
"andl $31, 8(%%rax);"
|
|
"andl $31, 16(%%rax);"
|
|
"andl $31, 20(%%rax);"
|
|
"andl $31, 24(%%rax);"
|
|
"vpsllvd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSLLVQ_128,
|
|
"vpsllq $58, %%xmm6, %%xmm6;"
|
|
"vpsrlq $58, %%xmm6, %%xmm6;"
|
|
"vpsllvq %%xmm6, %%xmm8, %%xmm7",
|
|
"andl $63, (%%rax);"
|
|
"vpsllvq (%%rax), %%xmm8, %%xmm7")
|
|
|
|
GEN_test_RandM(VPSLLVQ_256,
|
|
"vpsllq $58, %%ymm6, %%ymm6;"
|
|
"vpsrlq $58, %%ymm6, %%ymm6;"
|
|
"vpsllvq %%ymm6, %%ymm8, %%ymm7",
|
|
"andl $63, (%%rax);"
|
|
"andl $63, 8(%%rax);"
|
|
"andl $63, 16(%%rax);"
|
|
"vpsllvq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSRLVD_128,
|
|
"vpslld $27, %%xmm6, %%xmm6;"
|
|
"vpsrld $27, %%xmm6, %%xmm6;"
|
|
"vpsrlvd %%xmm6, %%xmm8, %%xmm7",
|
|
"andl $31, (%%rax);"
|
|
"andl $31, 4(%%rax);"
|
|
"andl $31, 8(%%rax);"
|
|
"vpsrlvd (%%rax), %%xmm8, %%xmm7")
|
|
|
|
GEN_test_RandM(VPSRLVD_256,
|
|
"vpslld $27, %%ymm6, %%ymm6;"
|
|
"vpsrld $27, %%ymm6, %%ymm6;"
|
|
"vpsrlvd %%ymm6, %%ymm8, %%ymm7",
|
|
"andl $31, (%%rax);"
|
|
"andl $31, 4(%%rax);"
|
|
"andl $31, 8(%%rax);"
|
|
"andl $31, 16(%%rax);"
|
|
"andl $31, 20(%%rax);"
|
|
"andl $31, 24(%%rax);"
|
|
"vpsrlvd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSRLVQ_128,
|
|
"vpsllq $58, %%xmm6, %%xmm6;"
|
|
"vpsrlq $58, %%xmm6, %%xmm6;"
|
|
"vpsrlvq %%xmm6, %%xmm8, %%xmm7",
|
|
"andl $63, (%%rax);"
|
|
"vpsrlvq (%%rax), %%xmm8, %%xmm7")
|
|
|
|
GEN_test_RandM(VPSRLVQ_256,
|
|
"vpsllq $58, %%ymm6, %%ymm6;"
|
|
"vpsrlq $58, %%ymm6, %%ymm6;"
|
|
"vpsrlvq %%ymm6, %%ymm8, %%ymm7",
|
|
"andl $63, (%%rax);"
|
|
"andl $63, 8(%%rax);"
|
|
"andl $63, 16(%%rax);"
|
|
"vpsrlvq (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPSRAVD_128,
|
|
"vpslld $27, %%xmm6, %%xmm6;"
|
|
"vpsrld $27, %%xmm6, %%xmm6;"
|
|
"vpsravd %%xmm6, %%xmm8, %%xmm7",
|
|
"andl $31, (%%rax);"
|
|
"andl $31, 4(%%rax);"
|
|
"andl $31, 8(%%rax);"
|
|
"vpsravd (%%rax), %%xmm8, %%xmm7")
|
|
|
|
GEN_test_RandM(VPSRAVD_256,
|
|
"vpslld $27, %%ymm6, %%ymm6;"
|
|
"vpsrld $27, %%ymm6, %%ymm6;"
|
|
"vpsravd %%ymm6, %%ymm8, %%ymm7",
|
|
"andl $31, (%%rax);"
|
|
"andl $31, 4(%%rax);"
|
|
"andl $31, 8(%%rax);"
|
|
"andl $31, 16(%%rax);"
|
|
"andl $31, 20(%%rax);"
|
|
"andl $31, 24(%%rax);"
|
|
"vpsravd (%%rax), %%ymm8, %%ymm7")
|
|
|
|
GEN_test_RandM(VPBROADCASTB_128,
|
|
"vpbroadcastb %%xmm9, %%xmm7",
|
|
"vpbroadcastb (%%rax), %%xmm7")
|
|
|
|
GEN_test_RandM(VPBROADCASTB_256,
|
|
"vpbroadcastb %%xmm9, %%ymm7",
|
|
"vpbroadcastb (%%rax), %%ymm7")
|
|
|
|
GEN_test_RandM(VPBROADCASTW_128,
|
|
"vpbroadcastw %%xmm9, %%xmm7",
|
|
"vpbroadcastw (%%rax), %%xmm7")
|
|
|
|
GEN_test_RandM(VPBROADCASTW_256,
|
|
"vpbroadcastw %%xmm9, %%ymm7",
|
|
"vpbroadcastw (%%rax), %%ymm7")
|
|
|
|
GEN_test_RandM(VPBROADCASTD_128,
|
|
"vpbroadcastd %%xmm9, %%xmm7",
|
|
"vpbroadcastd (%%rax), %%xmm7")
|
|
|
|
GEN_test_RandM(VPBROADCASTD_256,
|
|
"vpbroadcastd %%xmm9, %%ymm7",
|
|
"vpbroadcastd (%%rax), %%ymm7")
|
|
|
|
GEN_test_RandM(VPBROADCASTQ_128,
|
|
"vpbroadcastq %%xmm9, %%xmm7",
|
|
"vpbroadcastq (%%rax), %%xmm7")
|
|
|
|
GEN_test_RandM(VPBROADCASTQ_256,
|
|
"vpbroadcastq %%xmm9, %%ymm7",
|
|
"vpbroadcastq (%%rax), %%ymm7")
|
|
|
|
GEN_test_Monly(VPMASKMOVD_128_LoadForm,
|
|
"vpmaskmovd (%%rax), %%xmm8, %%xmm7;"
|
|
"vxorps %%xmm6, %%xmm6, %%xmm6;"
|
|
"vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9")
|
|
|
|
GEN_test_Monly(VPMASKMOVD_256_LoadForm,
|
|
"vpmaskmovd (%%rax), %%ymm8, %%ymm7;"
|
|
"vxorps %%ymm6, %%ymm6, %%ymm6;"
|
|
"vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9")
|
|
|
|
GEN_test_Monly(VPMASKMOVQ_128_LoadForm,
|
|
"vpmaskmovq (%%rax), %%xmm8, %%xmm7;"
|
|
"vxorpd %%xmm6, %%xmm6, %%xmm6;"
|
|
"vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9")
|
|
|
|
GEN_test_Monly(VPMASKMOVQ_256_LoadForm,
|
|
"vpmaskmovq (%%rax), %%ymm8, %%ymm7;"
|
|
"vxorpd %%ymm6, %%ymm6, %%ymm6;"
|
|
"vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9")
|
|
|
|
GEN_test_Monly(VPMASKMOVD_128_StoreForm,
|
|
"vpmaskmovd %%xmm8, %%xmm7, (%%rax);"
|
|
"vxorps %%xmm6, %%xmm6, %%xmm6;"
|
|
"vpmaskmovd %%xmm9, %%xmm6, (%%rax,%%rax,4)")
|
|
|
|
GEN_test_Monly(VPMASKMOVD_256_StoreForm,
|
|
"vpmaskmovd %%ymm8, %%ymm7, (%%rax);"
|
|
"vxorps %%ymm6, %%ymm6, %%ymm6;"
|
|
"vpmaskmovd %%ymm9, %%ymm6, (%%rax,%%rax,4)")
|
|
|
|
GEN_test_Monly(VPMASKMOVQ_128_StoreForm,
|
|
"vpmaskmovq %%xmm8, %%xmm7, (%%rax);"
|
|
"vxorpd %%xmm6, %%xmm6, %%xmm6;"
|
|
"vpmaskmovq %%xmm9, %%xmm6, (%%rax,%%rax,4)")
|
|
|
|
GEN_test_Monly(VPMASKMOVQ_256_StoreForm,
|
|
"vpmaskmovq %%ymm8, %%ymm7, (%%rax);"
|
|
"vxorpd %%ymm6, %%ymm6, %%ymm6;"
|
|
"vpmaskmovq %%ymm9, %%ymm6, (%%rax,%%rax,4)")
|
|
|
|
GEN_test_Ronly(VGATHERDPS_128,
|
|
"vpslld $25, %%xmm7, %%xmm8;"
|
|
"vpsrld $25, %%xmm8, %%xmm8;"
|
|
"vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERDPS_256,
|
|
"vpslld $25, %%ymm7, %%ymm8;"
|
|
"vpsrld $25, %%ymm8, %%ymm8;"
|
|
"vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERQPS_128_1,
|
|
"vpsllq $57, %%xmm7, %%xmm8;"
|
|
"vpsrlq $57, %%xmm8, %%xmm8;"
|
|
"vpmovsxdq %%xmm6, %%xmm9;"
|
|
"vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERQPS_256_1,
|
|
"vpsllq $57, %%ymm7, %%ymm8;"
|
|
"vpsrlq $57, %%ymm8, %%ymm8;"
|
|
"vpmovsxdq %%xmm6, %%ymm9;"
|
|
"vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERQPS_128_2,
|
|
"vpsllq $57, %%xmm7, %%xmm8;"
|
|
"vpsrlq $57, %%xmm8, %%xmm8;"
|
|
"vpmovsxdq %%xmm6, %%xmm9;"
|
|
"vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vmovq %%r14, %%xmm7;"
|
|
"vpsllq $2, %%xmm8, %%xmm8;"
|
|
"vpbroadcastq %%xmm7, %%xmm7;"
|
|
"vpaddq %%xmm7, %%xmm8, %%xmm8;"
|
|
"vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;"
|
|
"vpsubq %%xmm7, %%xmm8, %%xmm8;"
|
|
"vmovdqa 0(%0), %%ymm7;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERQPS_256_2,
|
|
"vpsllq $57, %%ymm7, %%ymm8;"
|
|
"vpsrlq $57, %%ymm8, %%ymm8;"
|
|
"vpmovsxdq %%xmm6, %%ymm9;"
|
|
"vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vmovq %%r14, %%xmm7;"
|
|
"vpsllq $2, %%ymm8, %%ymm8;"
|
|
"vpbroadcastq %%xmm7, %%ymm7;"
|
|
"vpaddq %%ymm7, %%ymm8, %%ymm8;"
|
|
"vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;"
|
|
"vpsubq %%ymm7, %%ymm8, %%ymm8;"
|
|
"vmovdqa 0(%0), %%ymm7;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERDPD_128,
|
|
"vpslld $26, %%xmm7, %%xmm8;"
|
|
"vpsrld $26, %%xmm8, %%xmm8;"
|
|
"vshufps $13, %%xmm6, %%xmm6, %%xmm9;"
|
|
"vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERDPD_256,
|
|
"vpslld $26, %%ymm7, %%ymm8;"
|
|
"vpsrld $26, %%ymm8, %%ymm8;"
|
|
"vextracti128 $1, %%ymm6, %%xmm9;"
|
|
"vshufps $221, %%ymm9, %%ymm6, %%ymm9;"
|
|
"vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERQPD_128_1,
|
|
"vpsllq $58, %%xmm7, %%xmm8;"
|
|
"vpsrlq $58, %%xmm8, %%xmm8;"
|
|
"vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERQPD_256_1,
|
|
"vpsllq $58, %%ymm7, %%ymm8;"
|
|
"vpsrlq $58, %%ymm8, %%ymm8;"
|
|
"vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERQPD_128_2,
|
|
"vpsllq $58, %%xmm7, %%xmm8;"
|
|
"vpsrlq $58, %%xmm8, %%xmm8;"
|
|
"vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vmovq %%r14, %%xmm7;"
|
|
"vpsllq $2, %%xmm8, %%xmm8;"
|
|
"vpbroadcastq %%xmm7, %%xmm7;"
|
|
"vpaddq %%xmm7, %%xmm8, %%xmm8;"
|
|
"vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;"
|
|
"vpsubq %%xmm7, %%xmm8, %%xmm8;"
|
|
"vmovdqa 0(%0), %%ymm7;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VGATHERQPD_256_2,
|
|
"vpsllq $58, %%ymm7, %%ymm8;"
|
|
"vpsrlq $58, %%ymm8, %%ymm8;"
|
|
"vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vmovq %%r14, %%xmm7;"
|
|
"vpsllq $2, %%ymm8, %%ymm8;"
|
|
"vpbroadcastq %%xmm7, %%ymm7;"
|
|
"vpaddq %%ymm7, %%ymm8, %%ymm8;"
|
|
"vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;"
|
|
"vpsubq %%ymm7, %%ymm8, %%ymm8;"
|
|
"vmovdqa 0(%0), %%ymm7;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERDD_128,
|
|
"vpslld $25, %%xmm7, %%xmm8;"
|
|
"vpsrld $25, %%xmm8, %%xmm8;"
|
|
"vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERDD_256,
|
|
"vpslld $25, %%ymm7, %%ymm8;"
|
|
"vpsrld $25, %%ymm8, %%ymm8;"
|
|
"vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERQD_128_1,
|
|
"vpsllq $57, %%xmm7, %%xmm8;"
|
|
"vpsrlq $57, %%xmm8, %%xmm8;"
|
|
"vpmovsxdq %%xmm6, %%xmm9;"
|
|
"vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERQD_256_1,
|
|
"vpsllq $57, %%ymm7, %%ymm8;"
|
|
"vpsrlq $57, %%ymm8, %%ymm8;"
|
|
"vpmovsxdq %%xmm6, %%ymm9;"
|
|
"vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERQD_128_2,
|
|
"vpsllq $57, %%xmm7, %%xmm8;"
|
|
"vpsrlq $57, %%xmm8, %%xmm8;"
|
|
"vpmovsxdq %%xmm6, %%xmm9;"
|
|
"vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vmovq %%r14, %%xmm7;"
|
|
"vpsllq $2, %%xmm8, %%xmm8;"
|
|
"vpbroadcastq %%xmm7, %%xmm7;"
|
|
"vpaddq %%xmm7, %%xmm8, %%xmm8;"
|
|
"vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;"
|
|
"vpsubq %%xmm7, %%xmm8, %%xmm8;"
|
|
"vmovdqa 0(%0), %%ymm7;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERQD_256_2,
|
|
"vpsllq $57, %%ymm7, %%ymm8;"
|
|
"vpsrlq $57, %%ymm8, %%ymm8;"
|
|
"vpmovsxdq %%xmm6, %%ymm9;"
|
|
"vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vmovq %%r14, %%xmm7;"
|
|
"vpsllq $2, %%ymm8, %%ymm8;"
|
|
"vpbroadcastq %%xmm7, %%ymm7;"
|
|
"vpaddq %%ymm7, %%ymm8, %%ymm8;"
|
|
"vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;"
|
|
"vpsubq %%ymm7, %%ymm8, %%ymm8;"
|
|
"vmovdqa 0(%0), %%ymm7;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERDQ_128,
|
|
"vpslld $26, %%xmm7, %%xmm8;"
|
|
"vpsrld $26, %%xmm8, %%xmm8;"
|
|
"vshufps $13, %%xmm6, %%xmm6, %%xmm9;"
|
|
"vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERDQ_256,
|
|
"vpslld $26, %%ymm7, %%ymm8;"
|
|
"vpsrld $26, %%ymm8, %%ymm8;"
|
|
"vextracti128 $1, %%ymm6, %%xmm9;"
|
|
"vshufps $221, %%ymm9, %%ymm6, %%ymm9;"
|
|
"vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
|
|
"vmovdqa 96(%0), %%ymm9;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERQQ_128_1,
|
|
"vpsllq $58, %%xmm7, %%xmm8;"
|
|
"vpsrlq $58, %%xmm8, %%xmm8;"
|
|
"vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERQQ_256_1,
|
|
"vpsllq $58, %%ymm7, %%ymm8;"
|
|
"vpsrlq $58, %%ymm8, %%ymm8;"
|
|
"vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERQQ_128_2,
|
|
"vpsllq $58, %%xmm7, %%xmm8;"
|
|
"vpsrlq $58, %%xmm8, %%xmm8;"
|
|
"vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vmovq %%r14, %%xmm7;"
|
|
"vpsllq $2, %%xmm8, %%xmm8;"
|
|
"vpbroadcastq %%xmm7, %%xmm7;"
|
|
"vpaddq %%xmm7, %%xmm8, %%xmm8;"
|
|
"vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;"
|
|
"vpsubq %%xmm7, %%xmm8, %%xmm8;"
|
|
"vmovdqa 0(%0), %%ymm7;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
GEN_test_Ronly(VPGATHERQQ_256_2,
|
|
"vpsllq $58, %%ymm7, %%ymm8;"
|
|
"vpsrlq $58, %%ymm8, %%ymm8;"
|
|
"vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
|
|
"leaq _randArray(%%rip), %%r14;"
|
|
"vmovq %%r14, %%xmm7;"
|
|
"vpsllq $2, %%ymm8, %%ymm8;"
|
|
"vpbroadcastq %%xmm7, %%ymm7;"
|
|
"vpaddq %%ymm7, %%ymm8, %%ymm8;"
|
|
"vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;"
|
|
"vpsubq %%ymm7, %%ymm8, %%ymm8;"
|
|
"vmovdqa 0(%0), %%ymm7;"
|
|
"xorl %%r14d, %%r14d")
|
|
|
|
/* Comment duplicated above, for convenient reference:
|
|
Allowed operands in test insns:
|
|
Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14.
|
|
Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14.
|
|
Imm8 etc fields are also allowed, where they make sense.
|
|
Both forms may use ymm0 as scratch. Mem form may also use
|
|
ymm6 as scratch.
|
|
*/
|
|
|
|
#define N_DEFAULT_ITERS 3
|
|
|
|
// Do the specified test some number of times
|
|
#define DO_N(_iters, _testfn) \
|
|
do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0)
|
|
|
|
// Do the specified test the default number of times
|
|
#define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn)
|
|
|
|
|
|
int main ( void )
|
|
{
|
|
DO_D( VPOR_256 );
|
|
DO_D( VPXOR_256 );
|
|
DO_D( VPSUBB_256 );
|
|
DO_D( VPSUBD_256 );
|
|
DO_D( VPADDD_256 );
|
|
DO_D( VPMOVZXWD_256 );
|
|
DO_D( VPMOVZXBW_256 );
|
|
DO_D( VPBLENDVB_256 );
|
|
DO_D( VPMINSD_256 );
|
|
DO_D( VPMAXSD_256 );
|
|
DO_D( VPSHUFB_256 );
|
|
DO_D( VPUNPCKLBW_256 );
|
|
DO_D( VPUNPCKHBW_256 );
|
|
DO_D( VPABSD_256 );
|
|
DO_D( VPACKUSWB_256 );
|
|
DO_D( VPMOVMSKB_256 );
|
|
DO_D( VPAND_256 );
|
|
DO_D( VPCMPEQB_256 );
|
|
DO_D( VPSHUFLW_0x39_256 );
|
|
DO_D( VPSHUFHW_0x39_256 );
|
|
DO_D( VPMULLW_256 );
|
|
DO_D( VPADDUSW_256 );
|
|
DO_D( VPMULHUW_256 );
|
|
DO_D( VPADDUSB_256 );
|
|
DO_D( VPUNPCKLWD_256 );
|
|
DO_D( VPUNPCKHWD_256 );
|
|
DO_D( VPSLLD_0x05_256 );
|
|
DO_D( VPSRLD_0x05_256 );
|
|
DO_D( VPSRAD_0x05_256 );
|
|
DO_D( VPSUBUSB_256 );
|
|
DO_D( VPSUBSB_256 );
|
|
DO_D( VPSRLDQ_0x05_256 );
|
|
DO_D( VPSLLDQ_0x05_256 );
|
|
DO_D( VPANDN_256 );
|
|
DO_D( VPUNPCKLQDQ_256 );
|
|
DO_D( VPSRLW_0x05_256 );
|
|
DO_D( VPSLLW_0x05_256 );
|
|
DO_D( VPADDW_256 );
|
|
DO_D( VPACKSSDW_256 );
|
|
DO_D( VPUNPCKLDQ_256 );
|
|
DO_D( VPCMPEQD_256 );
|
|
DO_D( VPSHUFD_0x39_256 );
|
|
DO_D( VPADDQ_256 );
|
|
DO_D( VPSUBQ_256 );
|
|
DO_D( VPSUBW_256 );
|
|
DO_D( VPCMPEQQ_256 );
|
|
DO_D( VPCMPGTQ_256 );
|
|
DO_D( VPSRLQ_0x05_256 );
|
|
DO_D( VPMULUDQ_256 );
|
|
DO_D( VPMULDQ_256 );
|
|
DO_D( VPSLLQ_0x05_256 );
|
|
DO_D( VPMAXUD_256 );
|
|
DO_D( VPMINUD_256 );
|
|
DO_D( VPMULLD_256 );
|
|
DO_D( VPMAXUW_256 );
|
|
DO_D( VPMINUW_256 );
|
|
DO_D( VPMAXSW_256 );
|
|
DO_D( VPMINSW_256 );
|
|
DO_D( VPMAXUB_256 );
|
|
DO_D( VPMINUB_256 );
|
|
DO_D( VPMAXSB_256 );
|
|
DO_D( VPMINSB_256 );
|
|
DO_D( VPMOVSXBW_256 );
|
|
DO_D( VPSUBUSW_256 );
|
|
DO_D( VPSUBSW_256 );
|
|
DO_D( VPCMPEQW_256 );
|
|
DO_D( VPADDB_256 );
|
|
DO_D( VPUNPCKHDQ_256 );
|
|
DO_D( VPMOVSXDQ_256 );
|
|
DO_D( VPMOVSXWD_256 );
|
|
DO_D( VPMULHW_256 );
|
|
DO_D( VPUNPCKHQDQ_256 );
|
|
DO_D( VPSRAW_0x05_256 );
|
|
DO_D( VPCMPGTB_256 );
|
|
DO_D( VPCMPGTW_256 );
|
|
DO_D( VPCMPGTD_256 );
|
|
DO_D( VPMOVZXBD_256 );
|
|
DO_D( VPMOVSXBD_256 );
|
|
DO_D( VPALIGNR_256_1of3 );
|
|
DO_D( VPALIGNR_256_2of3 );
|
|
DO_D( VPALIGNR_256_3of3 );
|
|
DO_D( VPBLENDW_256_0x00 );
|
|
DO_D( VPBLENDW_256_0xFE );
|
|
DO_D( VPBLENDW_256_0x30 );
|
|
DO_D( VPBLENDW_256_0x21 );
|
|
DO_D( VPBLENDW_256_0xD7 );
|
|
DO_D( VPBLENDW_256_0xB5 );
|
|
DO_D( VPBLENDW_256_0x85 );
|
|
DO_D( VPBLENDW_256_0x29 );
|
|
DO_D( VPSLLW_256 );
|
|
DO_D( VPSRLW_256 );
|
|
DO_D( VPSRAW_256 );
|
|
DO_D( VPSLLD_256 );
|
|
DO_D( VPSRLD_256 );
|
|
DO_D( VPSRAD_256 );
|
|
DO_D( VPSLLQ_256 );
|
|
DO_D( VPSRLQ_256 );
|
|
DO_D( VPMADDWD_256 );
|
|
DO_D( VMOVNTDQA_256 );
|
|
DO_D( VPACKSSWB_256 );
|
|
DO_D( VPAVGB_256 );
|
|
DO_D( VPAVGW_256 );
|
|
DO_D( VPADDSB_256 );
|
|
DO_D( VPADDSW_256 );
|
|
DO_D( VPHADDW_256 );
|
|
DO_D( VPHADDD_256 );
|
|
DO_D( VPHADDSW_256 );
|
|
DO_D( VPMADDUBSW_256 );
|
|
DO_D( VPHSUBW_256 );
|
|
DO_D( VPHSUBD_256 );
|
|
DO_D( VPHSUBSW_256 );
|
|
DO_D( VPABSB_256 );
|
|
DO_D( VPABSW_256 );
|
|
DO_D( VPMOVSXBQ_256 );
|
|
DO_D( VPMOVSXWQ_256 );
|
|
DO_D( VPACKUSDW_256 );
|
|
DO_D( VPMOVZXBQ_256 );
|
|
DO_D( VPMOVZXWQ_256 );
|
|
DO_D( VPMOVZXDQ_256 );
|
|
DO_D( VMPSADBW_256_0x0 );
|
|
DO_D( VMPSADBW_256_0x39 );
|
|
DO_D( VMPSADBW_256_0x32 );
|
|
DO_D( VMPSADBW_256_0x2b );
|
|
DO_D( VMPSADBW_256_0x24 );
|
|
DO_D( VMPSADBW_256_0x1d );
|
|
DO_D( VMPSADBW_256_0x16 );
|
|
DO_D( VMPSADBW_256_0x0f );
|
|
DO_D( VPSADBW_256 );
|
|
DO_D( VPSIGNB_256 );
|
|
DO_D( VPSIGNW_256 );
|
|
DO_D( VPSIGND_256 );
|
|
DO_D( VPMULHRSW_256 );
|
|
DO_D( VBROADCASTI128 );
|
|
DO_D( VEXTRACTI128_0x0 );
|
|
DO_D( VEXTRACTI128_0x1 );
|
|
DO_D( VINSERTI128_0x0 );
|
|
DO_D( VINSERTI128_0x1 );
|
|
DO_D( VPERM2I128_0x00 );
|
|
DO_D( VPERM2I128_0xFF );
|
|
DO_D( VPERM2I128_0x30 );
|
|
DO_D( VPERM2I128_0x21 );
|
|
DO_D( VPERM2I128_0x12 );
|
|
DO_D( VPERM2I128_0x03 );
|
|
DO_D( VPERM2I128_0x85 );
|
|
DO_D( VPERM2I128_0x5A );
|
|
DO_D( VBROADCASTSS_128 );
|
|
DO_D( VBROADCASTSS_256 );
|
|
DO_D( VBROADCASTSD_256 );
|
|
DO_D( VPERMD );
|
|
DO_D( VPERMQ_0x00 );
|
|
DO_D( VPERMQ_0xFE );
|
|
DO_D( VPERMQ_0x30 );
|
|
DO_D( VPERMQ_0x21 );
|
|
DO_D( VPERMQ_0xD7 );
|
|
DO_D( VPERMQ_0xB5 );
|
|
DO_D( VPERMQ_0x85 );
|
|
DO_D( VPERMQ_0x29 );
|
|
DO_D( VPERMPS );
|
|
DO_D( VPERMPD_0x00 );
|
|
DO_D( VPERMPD_0xFE );
|
|
DO_D( VPERMPD_0x30 );
|
|
DO_D( VPERMPD_0x21 );
|
|
DO_D( VPERMPD_0xD7 );
|
|
DO_D( VPERMPD_0xB5 );
|
|
DO_D( VPERMPD_0x85 );
|
|
DO_D( VPERMPD_0x29 );
|
|
DO_D( VPBLENDD_128_0x00 );
|
|
DO_D( VPBLENDD_128_0x02 );
|
|
DO_D( VPBLENDD_128_0x04 );
|
|
DO_D( VPBLENDD_128_0x06 );
|
|
DO_D( VPBLENDD_128_0x08 );
|
|
DO_D( VPBLENDD_128_0x0A );
|
|
DO_D( VPBLENDD_128_0x0C );
|
|
DO_D( VPBLENDD_128_0x0E );
|
|
DO_D( VPBLENDD_256_0x00 );
|
|
DO_D( VPBLENDD_256_0xFE );
|
|
DO_D( VPBLENDD_256_0x30 );
|
|
DO_D( VPBLENDD_256_0x21 );
|
|
DO_D( VPBLENDD_256_0xD7 );
|
|
DO_D( VPBLENDD_256_0xB5 );
|
|
DO_D( VPBLENDD_256_0x85 );
|
|
DO_D( VPBLENDD_256_0x29 );
|
|
DO_D( VPSLLVD_128 );
|
|
DO_D( VPSLLVD_256 );
|
|
DO_D( VPSLLVQ_128 );
|
|
DO_D( VPSLLVQ_256 );
|
|
DO_D( VPSRLVD_128 );
|
|
DO_D( VPSRLVD_256 );
|
|
DO_D( VPSRLVQ_128 );
|
|
DO_D( VPSRLVQ_256 );
|
|
DO_D( VPSRAVD_128 );
|
|
DO_D( VPSRAVD_256 );
|
|
DO_D( VPBROADCASTB_128 );
|
|
DO_D( VPBROADCASTB_256 );
|
|
DO_D( VPBROADCASTW_128 );
|
|
DO_D( VPBROADCASTW_256 );
|
|
DO_D( VPBROADCASTD_128 );
|
|
DO_D( VPBROADCASTD_256 );
|
|
DO_D( VPBROADCASTQ_128 );
|
|
DO_D( VPBROADCASTQ_256 );
|
|
DO_D( VPMASKMOVD_128_LoadForm );
|
|
DO_D( VPMASKMOVD_256_LoadForm );
|
|
DO_D( VPMASKMOVQ_128_LoadForm );
|
|
DO_D( VPMASKMOVQ_256_LoadForm );
|
|
DO_D( VPMASKMOVD_128_StoreForm );
|
|
DO_D( VPMASKMOVD_256_StoreForm );
|
|
DO_D( VPMASKMOVQ_128_StoreForm );
|
|
DO_D( VPMASKMOVQ_256_StoreForm );
|
|
#if defined(VGO_darwin)
|
|
{ int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); }
|
|
#else
|
|
{ int i; for (i = 0; i < sizeof(_randArray); i++) _randArray[i] = randUChar(); }
|
|
#endif
|
|
DO_D( VGATHERDPS_128 );
|
|
DO_D( VGATHERDPS_256 );
|
|
DO_D( VGATHERQPS_128_1 );
|
|
DO_D( VGATHERQPS_256_1 );
|
|
DO_D( VGATHERQPS_128_2 );
|
|
DO_D( VGATHERQPS_256_2 );
|
|
DO_D( VGATHERDPD_128 );
|
|
DO_D( VGATHERDPD_256 );
|
|
DO_D( VGATHERQPD_128_1 );
|
|
DO_D( VGATHERQPD_256_1 );
|
|
DO_D( VGATHERQPD_128_2 );
|
|
DO_D( VGATHERQPD_256_2 );
|
|
DO_D( VPGATHERDD_128 );
|
|
DO_D( VPGATHERDD_256 );
|
|
DO_D( VPGATHERQD_128_1 );
|
|
DO_D( VPGATHERQD_256_1 );
|
|
DO_D( VPGATHERQD_128_2 );
|
|
DO_D( VPGATHERQD_256_2 );
|
|
DO_D( VPGATHERDQ_128 );
|
|
DO_D( VPGATHERDQ_256 );
|
|
DO_D( VPGATHERQQ_128_1 );
|
|
DO_D( VPGATHERQQ_256_1 );
|
|
DO_D( VPGATHERQQ_128_2 );
|
|
DO_D( VPGATHERQQ_256_2 );
|
|
return 0;
|
|
}
|