mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-06-10 09:26:15 +00:00
1276 lines
44 KiB
C
1276 lines
44 KiB
C
|
|
/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
|
|
pcmpistri to drive it. Does not check the e-vs-i or i-vs-m
|
|
aspect. */
|
|
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <assert.h>
|
|
|
|
typedef unsigned int UInt;
|
|
typedef signed int Int;
|
|
typedef unsigned char UChar;
|
|
typedef unsigned short UShort;
|
|
typedef unsigned long long int ULong;
|
|
typedef UChar Bool;
|
|
#define False ((Bool)0)
|
|
#define True ((Bool)1)
|
|
|
|
//typedef unsigned char V128[16];
|
|
typedef
|
|
union {
|
|
UChar uChar[16];
|
|
UShort uShort[8];
|
|
UInt uInt[4];
|
|
UInt w32[4];
|
|
}
|
|
V128;
|
|
|
|
#define SHIFT_O 11
|
|
#define SHIFT_S 7
|
|
#define SHIFT_Z 6
|
|
#define SHIFT_A 4
|
|
#define SHIFT_C 0
|
|
#define SHIFT_P 2
|
|
|
|
#define MASK_O (1ULL << SHIFT_O)
|
|
#define MASK_S (1ULL << SHIFT_S)
|
|
#define MASK_Z (1ULL << SHIFT_Z)
|
|
#define MASK_A (1ULL << SHIFT_A)
|
|
#define MASK_C (1ULL << SHIFT_C)
|
|
#define MASK_P (1ULL << SHIFT_P)
|
|
|
|
|
|
UInt clz32 ( UInt x )
|
|
{
|
|
Int y, m, n;
|
|
y = -(x >> 16);
|
|
m = (y >> 16) & 16;
|
|
n = 16 - m;
|
|
x = x >> m;
|
|
y = x - 0x100;
|
|
m = (y >> 16) & 8;
|
|
n = n + m;
|
|
x = x << m;
|
|
y = x - 0x1000;
|
|
m = (y >> 16) & 4;
|
|
n = n + m;
|
|
x = x << m;
|
|
y = x - 0x4000;
|
|
m = (y >> 16) & 2;
|
|
n = n + m;
|
|
x = x << m;
|
|
y = x >> 14;
|
|
m = y & ~(y >> 1);
|
|
return n + 2 - m;
|
|
}
|
|
|
|
UInt ctz32 ( UInt x )
|
|
{
|
|
return 32 - clz32((~x) & (x-1));
|
|
}
|
|
|
|
void expand ( V128* dst, char* summary )
|
|
{
|
|
Int i;
|
|
assert( strlen(summary) == 16 );
|
|
for (i = 0; i < 16; i++) {
|
|
UChar xx = 0;
|
|
UChar x = summary[15-i];
|
|
if (x >= '0' && x <= '9') { xx = x - '0'; }
|
|
else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
|
|
else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
|
|
else assert(0);
|
|
|
|
assert(xx < 16);
|
|
xx = (xx << 4) | xx;
|
|
assert(xx < 256);
|
|
dst->uChar[i] = xx;
|
|
}
|
|
}
|
|
|
|
void try_istri ( char* which,
|
|
UInt(*h_fn)(V128*,V128*),
|
|
UInt(*s_fn)(V128*,V128*),
|
|
char* summL, char* summR )
|
|
{
|
|
assert(strlen(which) == 2);
|
|
V128 argL, argR;
|
|
expand(&argL, summL);
|
|
expand(&argR, summR);
|
|
UInt h_res = h_fn(&argL, &argR);
|
|
UInt s_res = s_fn(&argL, &argR);
|
|
printf("istri %s %s %s -> %08x %08x %s\n",
|
|
which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
|
|
}
|
|
|
|
UInt zmask_from_V128 ( V128* arg )
|
|
{
|
|
UInt i, res = 0;
|
|
for (i = 0; i < 8; i++) {
|
|
res |= ((arg->uShort[i] == 0) ? 1 : 0) << i;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// GENERAL //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
|
|
/* Given partial results from a 16-bit pcmpXstrX operation (intRes1,
|
|
basically), generate an I- or M-format output value, also the new
|
|
OSZACP flags. */
|
|
static
|
|
void PCMPxSTRx_WRK_gen_output_fmt_I_wide ( /*OUT*/V128* resV,
|
|
/*OUT*/UInt* resOSZACP,
|
|
UInt intRes1,
|
|
UInt zmaskL, UInt zmaskR,
|
|
UInt validL,
|
|
UInt pol, UInt idx )
|
|
{
|
|
assert((pol >> 2) == 0);
|
|
assert((idx >> 1) == 0);
|
|
|
|
UInt intRes2 = 0;
|
|
switch (pol) {
|
|
case 0: intRes2 = intRes1; break; // pol +
|
|
case 1: intRes2 = ~intRes1; break; // pol -
|
|
case 2: intRes2 = intRes1; break; // pol m+
|
|
case 3: intRes2 = intRes1 ^ validL; break; // pol m-
|
|
}
|
|
intRes2 &= 0xFF;
|
|
|
|
// generate I-format output (an index in ECX)
|
|
// generate ecx value
|
|
UInt newECX = 0;
|
|
if (idx) {
|
|
// index of ms-1-bit
|
|
newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2));
|
|
} else {
|
|
// index of ls-1-bit
|
|
newECX = intRes2 == 0 ? 8 : ctz32(intRes2);
|
|
}
|
|
|
|
resV->w32[0] = newECX;
|
|
resV->w32[1] = 0;
|
|
resV->w32[2] = 0;
|
|
resV->w32[3] = 0;
|
|
|
|
// generate new flags, common to all ISTRI and ISTRM cases
|
|
*resOSZACP // A, P are zero
|
|
= ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
|
|
| ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
|
|
| ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
|
|
| ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
|
|
}
|
|
|
|
/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
|
|
variants on 16-bit characters.
|
|
|
|
For xSTRI variants, the new ECX value is placed in the 32 bits
|
|
pointed to by *resV, and the top 96 bits are zeroed. For xSTRM
|
|
variants, the result is a 128 bit value and is placed at *resV in
|
|
the obvious way.
|
|
|
|
For all variants, the new OSZACP value is placed at *resOSZACP.
|
|
|
|
argLV and argRV are the vector args. The caller must prepare a
|
|
8-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
|
|
must be 1 for each zero byte of of the respective arg. For ESTRx
|
|
variants this is derived from the explicit length indication, and
|
|
must be 0 in all places except at the bit index corresponding to
|
|
the valid length (0 .. 8). If the valid length is 8 then the
|
|
mask must be all zeroes. In all cases, bits 31:8 must be zero.
|
|
|
|
imm8 is the original immediate from the instruction. isSTRM
|
|
indicates whether this is a xSTRM or xSTRI variant, which controls
|
|
how much of *res is written.
|
|
|
|
If the given imm8 case can be handled, the return value is True.
|
|
If not, False is returned, and neither *res not *resOSZACP are
|
|
altered.
|
|
*/
|
|
|
|
Bool pcmpXstrX_WRK_wide ( /*OUT*/V128* resV,
|
|
/*OUT*/UInt* resOSZACP,
|
|
V128* argLV, V128* argRV,
|
|
UInt zmaskL, UInt zmaskR,
|
|
UInt imm8, Bool isxSTRM )
|
|
{
|
|
assert(imm8 < 0x80);
|
|
assert((zmaskL >> 8) == 0);
|
|
assert((zmaskR >> 8) == 0);
|
|
|
|
/* Explicitly reject any imm8 values that haven't been validated,
|
|
even if they would probably work. Life is too short to have
|
|
unvalidated cases in the code base. */
|
|
switch (imm8) {
|
|
case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D:
|
|
case 0x13: case 0x1B:
|
|
case 0x39: case 0x3B:
|
|
case 0x45: case 0x4B:
|
|
break;
|
|
default:
|
|
return False;
|
|
}
|
|
|
|
UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
|
|
UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
|
|
UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
|
|
UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
|
|
|
|
/*----------------------------------------*/
|
|
/*-- strcmp on wide data --*/
|
|
/*----------------------------------------*/
|
|
|
|
if (agg == 2/*equal each, aka strcmp*/
|
|
&& (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
|
|
Int i;
|
|
UShort* argL = (UShort*)argLV;
|
|
UShort* argR = (UShort*)argRV;
|
|
UInt boolResII = 0;
|
|
for (i = 7; i >= 0; i--) {
|
|
UShort cL = argL[i];
|
|
UShort cR = argR[i];
|
|
boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
|
|
}
|
|
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
|
|
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
|
|
|
|
// do invalidation, common to all equal-each cases
|
|
UInt intRes1
|
|
= (boolResII & validL & validR) // if both valid, use cmpres
|
|
| (~ (validL | validR)); // if both invalid, force 1
|
|
// else force 0
|
|
intRes1 &= 0xFF;
|
|
|
|
// generate I-format output
|
|
PCMPxSTRx_WRK_gen_output_fmt_I_wide(
|
|
resV, resOSZACP,
|
|
intRes1, zmaskL, zmaskR, validL, pol, idx
|
|
);
|
|
|
|
return True;
|
|
}
|
|
|
|
/*----------------------------------------*/
|
|
/*-- set membership on wide data --*/
|
|
/*----------------------------------------*/
|
|
|
|
if (agg == 0/*equal any, aka find chars in a set*/
|
|
&& (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
|
|
/* argL: the string, argR: charset */
|
|
UInt si, ci;
|
|
UShort* argL = (UShort*)argLV;
|
|
UShort* argR = (UShort*)argRV;
|
|
UInt boolRes = 0;
|
|
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
|
|
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
|
|
|
|
for (si = 0; si < 8; si++) {
|
|
if ((validL & (1 << si)) == 0)
|
|
// run off the end of the string.
|
|
break;
|
|
UInt m = 0;
|
|
for (ci = 0; ci < 8; ci++) {
|
|
if ((validR & (1 << ci)) == 0) break;
|
|
if (argR[ci] == argL[si]) { m = 1; break; }
|
|
}
|
|
boolRes |= (m << si);
|
|
}
|
|
|
|
// boolRes is "pre-invalidated"
|
|
UInt intRes1 = boolRes & 0xFF;
|
|
|
|
// generate I-format output
|
|
PCMPxSTRx_WRK_gen_output_fmt_I_wide(
|
|
resV, resOSZACP,
|
|
intRes1, zmaskL, zmaskR, validL, pol, idx
|
|
);
|
|
|
|
return True;
|
|
}
|
|
|
|
/*----------------------------------------*/
|
|
/*-- substring search on wide data --*/
|
|
/*----------------------------------------*/
|
|
|
|
if (agg == 3/*equal ordered, aka substring search*/
|
|
&& (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
|
|
|
|
/* argL: haystack, argR: needle */
|
|
UInt ni, hi;
|
|
UShort* argL = (UShort*)argLV;
|
|
UShort* argR = (UShort*)argRV;
|
|
UInt boolRes = 0;
|
|
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
|
|
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
|
|
for (hi = 0; hi < 8; hi++) {
|
|
UInt m = 1;
|
|
for (ni = 0; ni < 8; ni++) {
|
|
if ((validR & (1 << ni)) == 0) break;
|
|
UInt i = ni + hi;
|
|
if (i >= 8) break;
|
|
if (argL[i] != argR[ni]) { m = 0; break; }
|
|
}
|
|
boolRes |= (m << hi);
|
|
if ((validL & (1 << hi)) == 0)
|
|
// run off the end of the haystack
|
|
break;
|
|
}
|
|
|
|
// boolRes is "pre-invalidated"
|
|
UInt intRes1 = boolRes & 0xFF;
|
|
|
|
// generate I-format output
|
|
PCMPxSTRx_WRK_gen_output_fmt_I_wide(
|
|
resV, resOSZACP,
|
|
intRes1, zmaskL, zmaskR, validL, pol, idx
|
|
);
|
|
|
|
return True;
|
|
}
|
|
|
|
/*----------------------------------------*/
|
|
/*-- ranges, unsigned wide data --*/
|
|
/*----------------------------------------*/
|
|
|
|
if (agg == 1/*ranges*/
|
|
&& fmt == 1/*uw*/) {
|
|
|
|
/* argL: string, argR: range-pairs */
|
|
UInt ri, si;
|
|
UShort* argL = (UShort*)argLV;
|
|
UShort* argR = (UShort*)argRV;
|
|
UInt boolRes = 0;
|
|
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
|
|
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
|
|
for (si = 0; si < 8; si++) {
|
|
if ((validL & (1 << si)) == 0)
|
|
// run off the end of the string
|
|
break;
|
|
UInt m = 0;
|
|
for (ri = 0; ri < 8; ri += 2) {
|
|
if ((validR & (3 << ri)) != (3 << ri)) break;
|
|
if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
|
|
m = 1; break;
|
|
}
|
|
}
|
|
boolRes |= (m << si);
|
|
}
|
|
|
|
// boolRes is "pre-invalidated"
|
|
UInt intRes1 = boolRes & 0xFF;
|
|
|
|
// generate I-format output
|
|
PCMPxSTRx_WRK_gen_output_fmt_I_wide(
|
|
resV, resOSZACP,
|
|
intRes1, zmaskL, zmaskR, validL, pol, idx
|
|
);
|
|
|
|
return True;
|
|
}
|
|
|
|
return False;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// ISTRI_4B //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
UInt h_pcmpistri_4B ( V128* argL, V128* argR )
|
|
{
|
|
V128 block[2];
|
|
memcpy(&block[0], argL, sizeof(V128));
|
|
memcpy(&block[1], argR, sizeof(V128));
|
|
ULong res, flags;
|
|
__asm__ __volatile__(
|
|
"subq $1024, %%rsp" "\n\t"
|
|
"movdqu 0(%2), %%xmm2" "\n\t"
|
|
"movdqu 16(%2), %%xmm11" "\n\t"
|
|
"pcmpistri $0x4B, %%xmm2, %%xmm11" "\n\t"
|
|
"pushfq" "\n\t"
|
|
"popq %%rdx" "\n\t"
|
|
"movq %%rcx, %0" "\n\t"
|
|
"movq %%rdx, %1" "\n\t"
|
|
"addq $1024, %%rsp" "\n\t"
|
|
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
|
|
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
|
|
);
|
|
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
|
|
}
|
|
|
|
UInt s_pcmpistri_4B ( V128* argLU, V128* argRU )
|
|
{
|
|
V128 resV;
|
|
UInt resOSZACP, resECX;
|
|
Bool ok
|
|
= pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
|
|
zmask_from_V128(argLU),
|
|
zmask_from_V128(argRU),
|
|
0x4B, False/*!isSTRM*/
|
|
);
|
|
assert(ok);
|
|
resECX = resV.uInt[0];
|
|
return (resOSZACP << 16) | resECX;
|
|
}
|
|
|
|
void istri_4B ( void )
|
|
{
|
|
char* wot = "4B";
|
|
UInt(*h)(V128*,V128*) = h_pcmpistri_4B;
|
|
UInt(*s)(V128*,V128*) = s_pcmpistri_4B;
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
|
|
try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// ISTRI_3B //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
UInt h_pcmpistri_3B ( V128* argL, V128* argR )
|
|
{
|
|
V128 block[2];
|
|
memcpy(&block[0], argL, sizeof(V128));
|
|
memcpy(&block[1], argR, sizeof(V128));
|
|
ULong res, flags;
|
|
__asm__ __volatile__(
|
|
"subq $1024, %%rsp" "\n\t"
|
|
"movdqu 0(%2), %%xmm2" "\n\t"
|
|
"movdqu 16(%2), %%xmm11" "\n\t"
|
|
"pcmpistri $0x3B, %%xmm2, %%xmm11" "\n\t"
|
|
"pushfq" "\n\t"
|
|
"popq %%rdx" "\n\t"
|
|
"movq %%rcx, %0" "\n\t"
|
|
"movq %%rdx, %1" "\n\t"
|
|
"addq $1024, %%rsp" "\n\t"
|
|
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
|
|
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
|
|
);
|
|
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
|
|
}
|
|
|
|
UInt s_pcmpistri_3B ( V128* argLU, V128* argRU )
|
|
{
|
|
V128 resV;
|
|
UInt resOSZACP, resECX;
|
|
Bool ok
|
|
= pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
|
|
zmask_from_V128(argLU),
|
|
zmask_from_V128(argRU),
|
|
0x3B, False/*!isSTRM*/
|
|
);
|
|
assert(ok);
|
|
resECX = resV.uInt[0];
|
|
return (resOSZACP << 16) | resECX;
|
|
}
|
|
|
|
void istri_3B ( void )
|
|
{
|
|
char* wot = "3B";
|
|
UInt(*h)(V128*,V128*) = h_pcmpistri_3B;
|
|
UInt(*s)(V128*,V128*) = s_pcmpistri_3B;
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
|
|
try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// ISTRI_0D //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
__attribute__((noinline))
|
|
UInt h_pcmpistri_0D ( V128* argL, V128* argR )
|
|
{
|
|
V128 block[2];
|
|
memcpy(&block[0], argL, sizeof(V128));
|
|
memcpy(&block[1], argR, sizeof(V128));
|
|
ULong res = 0, flags = 0;
|
|
__asm__ __volatile__(
|
|
"movdqu 0(%2), %%xmm2" "\n\t"
|
|
"movdqu 16(%2), %%xmm11" "\n\t"
|
|
"pcmpistri $0x0D, %%xmm2, %%xmm11" "\n\t"
|
|
//"pcmpistrm $0x0D, %%xmm2, %%xmm11" "\n\t"
|
|
//"movd %%xmm0, %%ecx" "\n\t"
|
|
"pushfq" "\n\t"
|
|
"popq %%rdx" "\n\t"
|
|
"movq %%rcx, %0" "\n\t"
|
|
"movq %%rdx, %1" "\n\t"
|
|
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
|
|
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
|
|
);
|
|
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
|
|
}
|
|
|
|
UInt s_pcmpistri_0D ( V128* argLU, V128* argRU )
|
|
{
|
|
V128 resV;
|
|
UInt resOSZACP, resECX;
|
|
Bool ok
|
|
= pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
|
|
zmask_from_V128(argLU),
|
|
zmask_from_V128(argRU),
|
|
0x0D, False/*!isSTRM*/
|
|
);
|
|
assert(ok);
|
|
resECX = resV.uInt[0];
|
|
return (resOSZACP << 16) | resECX;
|
|
}
|
|
|
|
void istri_0D ( void )
|
|
{
|
|
char* wot = "0D";
|
|
UInt(*h)(V128*,V128*) = h_pcmpistri_0D;
|
|
UInt(*s)(V128*,V128*) = s_pcmpistri_0D;
|
|
|
|
try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef");
|
|
|
|
try_istri(wot,h,s, "11111111abcdef11", "00abcdef00abcdef");
|
|
|
|
try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef");
|
|
try_istri(wot,h,s, "1111111111abcdef", "0000000000abcdef");
|
|
try_istri(wot,h,s, "111111111111abcd", "0000000000abcdef");
|
|
|
|
try_istri(wot,h,s, "1111abcd11abcd11", "000000000000abcd");
|
|
|
|
try_istri(wot,h,s, "11abcd1111abcd11", "000000000000abcd");
|
|
try_istri(wot,h,s, "abcd111111abcd11", "000000000000abcd");
|
|
try_istri(wot,h,s, "cd11111111abcd11", "000000000000abcd");
|
|
|
|
try_istri(wot,h,s, "01abcd11abcd1111", "000000000000abcd");
|
|
try_istri(wot,h,s, "00abcd11abcd1111", "000000000000abcd");
|
|
try_istri(wot,h,s, "0000cd11abcd1111", "000000000000abcd");
|
|
|
|
try_istri(wot,h,s, "00abcd1100abcd11", "000000000000abcd");
|
|
try_istri(wot,h,s, "00abcd110000cd11", "000000000000abcd");
|
|
|
|
try_istri(wot,h,s, "1111111111111234", "0000000000000000");
|
|
try_istri(wot,h,s, "1111111111111234", "0000000000000011");
|
|
try_istri(wot,h,s, "1111111111111234", "0000000000001111");
|
|
|
|
try_istri(wot,h,s, "1111111111111234", "1111111111111234");
|
|
try_istri(wot,h,s, "0a11111111111111", "000000000000000a");
|
|
try_istri(wot,h,s, "0b11111111111111", "000000000000000a");
|
|
|
|
try_istri(wot,h,s, "b111111111111111", "0000000000000000");
|
|
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
|
|
try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
|
|
try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
|
|
}
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// ISTRI_09 //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
UInt h_pcmpistri_09 ( V128* argL, V128* argR )
|
|
{
|
|
V128 block[2];
|
|
memcpy(&block[0], argL, sizeof(V128));
|
|
memcpy(&block[1], argR, sizeof(V128));
|
|
ULong res, flags;
|
|
__asm__ __volatile__(
|
|
"subq $1024, %%rsp" "\n\t"
|
|
"movdqu 0(%2), %%xmm2" "\n\t"
|
|
"movdqu 16(%2), %%xmm11" "\n\t"
|
|
"pcmpistri $0x09, %%xmm2, %%xmm11" "\n\t"
|
|
"pushfq" "\n\t"
|
|
"popq %%rdx" "\n\t"
|
|
"movq %%rcx, %0" "\n\t"
|
|
"movq %%rdx, %1" "\n\t"
|
|
"addq $1024, %%rsp" "\n\t"
|
|
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
|
|
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
|
|
);
|
|
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
|
|
}
|
|
|
|
UInt s_pcmpistri_09 ( V128* argLU, V128* argRU )
|
|
{
|
|
V128 resV;
|
|
UInt resOSZACP, resECX;
|
|
Bool ok
|
|
= pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
|
|
zmask_from_V128(argLU),
|
|
zmask_from_V128(argRU),
|
|
0x09, False/*!isSTRM*/
|
|
);
|
|
assert(ok);
|
|
resECX = resV.uInt[0];
|
|
return (resOSZACP << 16) | resECX;
|
|
}
|
|
|
|
void istri_09 ( void )
|
|
{
|
|
char* wot = "09";
|
|
UInt(*h)(V128*,V128*) = h_pcmpistri_09;
|
|
UInt(*s)(V128*,V128*) = s_pcmpistri_09;
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
|
|
try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// ISTRI_1B //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
UInt h_pcmpistri_1B ( V128* argL, V128* argR )
|
|
{
|
|
V128 block[2];
|
|
memcpy(&block[0], argL, sizeof(V128));
|
|
memcpy(&block[1], argR, sizeof(V128));
|
|
ULong res, flags;
|
|
__asm__ __volatile__(
|
|
"subq $1024, %%rsp" "\n\t"
|
|
"movdqu 0(%2), %%xmm2" "\n\t"
|
|
"movdqu 16(%2), %%xmm11" "\n\t"
|
|
"pcmpistri $0x1B, %%xmm2, %%xmm11" "\n\t"
|
|
"pushfq" "\n\t"
|
|
"popq %%rdx" "\n\t"
|
|
"movq %%rcx, %0" "\n\t"
|
|
"movq %%rdx, %1" "\n\t"
|
|
"addq $1024, %%rsp" "\n\t"
|
|
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
|
|
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
|
|
);
|
|
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
|
|
}
|
|
|
|
UInt s_pcmpistri_1B ( V128* argLU, V128* argRU )
|
|
{
|
|
V128 resV;
|
|
UInt resOSZACP, resECX;
|
|
Bool ok
|
|
= pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
|
|
zmask_from_V128(argLU),
|
|
zmask_from_V128(argRU),
|
|
0x1B, False/*!isSTRM*/
|
|
);
|
|
assert(ok);
|
|
resECX = resV.uInt[0];
|
|
return (resOSZACP << 16) | resECX;
|
|
}
|
|
|
|
void istri_1B ( void )
|
|
{
|
|
char* wot = "1B";
|
|
UInt(*h)(V128*,V128*) = h_pcmpistri_1B;
|
|
UInt(*s)(V128*,V128*) = s_pcmpistri_1B;
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
|
|
try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// ISTRI_03 //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
UInt h_pcmpistri_03 ( V128* argL, V128* argR )
|
|
{
|
|
V128 block[2];
|
|
memcpy(&block[0], argL, sizeof(V128));
|
|
memcpy(&block[1], argR, sizeof(V128));
|
|
ULong res, flags;
|
|
__asm__ __volatile__(
|
|
"subq $1024, %%rsp" "\n\t"
|
|
"movdqu 0(%2), %%xmm2" "\n\t"
|
|
"movdqu 16(%2), %%xmm11" "\n\t"
|
|
"pcmpistri $0x03, %%xmm2, %%xmm11" "\n\t"
|
|
//"pcmpistrm $0x03, %%xmm2, %%xmm11" "\n\t"
|
|
//"movd %%xmm0, %%ecx" "\n\t"
|
|
"pushfq" "\n\t"
|
|
"popq %%rdx" "\n\t"
|
|
"movq %%rcx, %0" "\n\t"
|
|
"movq %%rdx, %1" "\n\t"
|
|
"addq $1024, %%rsp" "\n\t"
|
|
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
|
|
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
|
|
);
|
|
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
|
|
}
|
|
|
|
UInt s_pcmpistri_03 ( V128* argLU, V128* argRU )
|
|
{
|
|
V128 resV;
|
|
UInt resOSZACP, resECX;
|
|
Bool ok
|
|
= pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
|
|
zmask_from_V128(argLU),
|
|
zmask_from_V128(argRU),
|
|
0x03, False/*!isSTRM*/
|
|
);
|
|
assert(ok);
|
|
resECX = resV.uInt[0];
|
|
return (resOSZACP << 16) | resECX;
|
|
}
|
|
|
|
void istri_03 ( void )
|
|
{
|
|
char* wot = "03";
|
|
UInt(*h)(V128*,V128*) = h_pcmpistri_03;
|
|
UInt(*s)(V128*,V128*) = s_pcmpistri_03;
|
|
|
|
try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa");
|
|
try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb");
|
|
try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
|
|
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd");
|
|
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
|
|
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
|
|
try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb");
|
|
try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa");
|
|
|
|
try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00");
|
|
|
|
try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
|
|
try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
|
|
}
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// ISTRI_13 //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
UInt h_pcmpistri_13 ( V128* argL, V128* argR )
|
|
{
|
|
V128 block[2];
|
|
memcpy(&block[0], argL, sizeof(V128));
|
|
memcpy(&block[1], argR, sizeof(V128));
|
|
ULong res, flags;
|
|
__asm__ __volatile__(
|
|
"subq $1024, %%rsp" "\n\t"
|
|
"movdqu 0(%2), %%xmm2" "\n\t"
|
|
"movdqu 16(%2), %%xmm11" "\n\t"
|
|
"pcmpistri $0x13, %%xmm2, %%xmm11" "\n\t"
|
|
//"pcmpistrm $0x13, %%xmm2, %%xmm11" "\n\t"
|
|
//"movd %%xmm0, %%ecx" "\n\t"
|
|
"pushfq" "\n\t"
|
|
"popq %%rdx" "\n\t"
|
|
"movq %%rcx, %0" "\n\t"
|
|
"movq %%rdx, %1" "\n\t"
|
|
"addq $1024, %%rsp" "\n\t"
|
|
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
|
|
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
|
|
);
|
|
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
|
|
}
|
|
|
|
UInt s_pcmpistri_13 ( V128* argLU, V128* argRU )
|
|
{
|
|
V128 resV;
|
|
UInt resOSZACP, resECX;
|
|
Bool ok
|
|
= pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
|
|
zmask_from_V128(argLU),
|
|
zmask_from_V128(argRU),
|
|
0x13, False/*!isSTRM*/
|
|
);
|
|
assert(ok);
|
|
resECX = resV.uInt[0];
|
|
return (resOSZACP << 16) | resECX;
|
|
}
|
|
|
|
void istri_13 ( void )
|
|
{
|
|
char* wot = "13";
|
|
UInt(*h)(V128*,V128*) = h_pcmpistri_13;
|
|
UInt(*s)(V128*,V128*) = s_pcmpistri_13;
|
|
|
|
try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa");
|
|
try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb");
|
|
try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
|
|
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd");
|
|
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
|
|
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
|
|
try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb");
|
|
try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa");
|
|
|
|
try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00");
|
|
|
|
try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
|
|
try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// ISTRI_45 //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
UInt h_pcmpistri_45 ( V128* argL, V128* argR )
|
|
{
|
|
V128 block[2];
|
|
memcpy(&block[0], argL, sizeof(V128));
|
|
memcpy(&block[1], argR, sizeof(V128));
|
|
ULong res, flags;
|
|
__asm__ __volatile__(
|
|
"subq $1024, %%rsp" "\n\t"
|
|
"movdqu 0(%2), %%xmm2" "\n\t"
|
|
"movdqu 16(%2), %%xmm11" "\n\t"
|
|
"pcmpistri $0x45, %%xmm2, %%xmm11" "\n\t"
|
|
//"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t"
|
|
//"movd %%xmm0, %%ecx" "\n\t"
|
|
"pushfq" "\n\t"
|
|
"popq %%rdx" "\n\t"
|
|
"movq %%rcx, %0" "\n\t"
|
|
"movq %%rdx, %1" "\n\t"
|
|
"addq $1024, %%rsp" "\n\t"
|
|
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
|
|
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
|
|
);
|
|
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
|
|
}
|
|
|
|
UInt s_pcmpistri_45 ( V128* argLU, V128* argRU )
|
|
{
|
|
V128 resV;
|
|
UInt resOSZACP, resECX;
|
|
Bool ok
|
|
= pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
|
|
zmask_from_V128(argLU),
|
|
zmask_from_V128(argRU),
|
|
0x45, False/*!isSTRM*/
|
|
);
|
|
assert(ok);
|
|
resECX = resV.uInt[0];
|
|
return (resOSZACP << 16) | resECX;
|
|
}
|
|
|
|
void istri_45 ( void )
|
|
{
|
|
char* wot = "45";
|
|
UInt(*h)(V128*,V128*) = h_pcmpistri_45;
|
|
UInt(*s)(V128*,V128*) = s_pcmpistri_45;
|
|
|
|
try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000bbcc");
|
|
try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000ccbb");
|
|
try_istri(wot,h,s, "baaabbbbccccdddd", "000000000000ccbb");
|
|
try_istri(wot,h,s, "baaabbbbccccdddc", "000000000000ccbb");
|
|
|
|
try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb");
|
|
try_istri(wot,h,s, "bbbbbbbb00bbbbbb", "000000000000ccbb");
|
|
try_istri(wot,h,s, "bbbbbbbbbbbb00bb", "000000000000ccbb");
|
|
try_istri(wot,h,s, "bbbbbbbbbbbbbb00", "000000000000ccbb");
|
|
try_istri(wot,h,s, "0000000000000000", "000000000000ccbb");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
|
|
|
|
try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb");
|
|
try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000bb");
|
|
try_istri(wot,h,s, "bb44bb44bb44bb44", "000000006622ccbb");
|
|
|
|
try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000022ccbb");
|
|
try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000000ccbb");
|
|
try_istri(wot,h,s, "bb44bb44bb44bb44", "00000000000000bb");
|
|
|
|
try_istri(wot,h,s, "0011223344556677", "0000997755442211");
|
|
try_istri(wot,h,s, "1122334455667711", "0000997755442211");
|
|
|
|
try_istri(wot,h,s, "0011223344556677", "0000aa8866553322");
|
|
try_istri(wot,h,s, "1122334455667711", "0000aa8866553322");
|
|
}
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// ISTRI_01 //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
UInt h_pcmpistri_01 ( V128* argL, V128* argR )
|
|
{
|
|
V128 block[2];
|
|
memcpy(&block[0], argL, sizeof(V128));
|
|
memcpy(&block[1], argR, sizeof(V128));
|
|
ULong res, flags;
|
|
__asm__ __volatile__(
|
|
"subq $1024, %%rsp" "\n\t"
|
|
"movdqu 0(%2), %%xmm2" "\n\t"
|
|
"movdqu 16(%2), %%xmm11" "\n\t"
|
|
"pcmpistri $0x01, %%xmm2, %%xmm11" "\n\t"
|
|
//"pcmpistrm $0x01, %%xmm2, %%xmm11" "\n\t"
|
|
//"movd %%xmm0, %%ecx" "\n\t"
|
|
"pushfq" "\n\t"
|
|
"popq %%rdx" "\n\t"
|
|
"movq %%rcx, %0" "\n\t"
|
|
"movq %%rdx, %1" "\n\t"
|
|
"addq $1024, %%rsp" "\n\t"
|
|
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
|
|
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
|
|
);
|
|
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
|
|
}
|
|
|
|
UInt s_pcmpistri_01 ( V128* argLU, V128* argRU )
|
|
{
|
|
V128 resV;
|
|
UInt resOSZACP, resECX;
|
|
Bool ok
|
|
= pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
|
|
zmask_from_V128(argLU),
|
|
zmask_from_V128(argRU),
|
|
0x01, False/*!isSTRM*/
|
|
);
|
|
assert(ok);
|
|
resECX = resV.uInt[0];
|
|
return (resOSZACP << 16) | resECX;
|
|
}
|
|
|
|
void istri_01 ( void )
|
|
{
|
|
char* wot = "01";
|
|
UInt(*h)(V128*,V128*) = h_pcmpistri_01;
|
|
UInt(*s)(V128*,V128*) = s_pcmpistri_01;
|
|
|
|
try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa");
|
|
try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb");
|
|
try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
|
|
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd");
|
|
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd");
|
|
try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
|
|
try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
|
|
try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb");
|
|
try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa");
|
|
|
|
try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00");
|
|
|
|
try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
|
|
try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
|
|
}
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// ISTRI_39 //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
UInt h_pcmpistri_39 ( V128* argL, V128* argR )
|
|
{
|
|
V128 block[2];
|
|
memcpy(&block[0], argL, sizeof(V128));
|
|
memcpy(&block[1], argR, sizeof(V128));
|
|
ULong res, flags;
|
|
__asm__ __volatile__(
|
|
"subq $1024, %%rsp" "\n\t"
|
|
"movdqu 0(%2), %%xmm2" "\n\t"
|
|
"movdqu 16(%2), %%xmm11" "\n\t"
|
|
"pcmpistri $0x39, %%xmm2, %%xmm11" "\n\t"
|
|
"pushfq" "\n\t"
|
|
"popq %%rdx" "\n\t"
|
|
"movq %%rcx, %0" "\n\t"
|
|
"movq %%rdx, %1" "\n\t"
|
|
"addq $1024, %%rsp" "\n\t"
|
|
: /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
|
|
: "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
|
|
);
|
|
return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
|
|
}
|
|
|
|
UInt s_pcmpistri_39 ( V128* argLU, V128* argRU )
|
|
{
|
|
V128 resV;
|
|
UInt resOSZACP, resECX;
|
|
Bool ok
|
|
= pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
|
|
zmask_from_V128(argLU),
|
|
zmask_from_V128(argRU),
|
|
0x39, False/*!isSTRM*/
|
|
);
|
|
assert(ok);
|
|
resECX = resV.uInt[0];
|
|
return (resOSZACP << 16) | resECX;
|
|
}
|
|
|
|
void istri_39 ( void )
|
|
{
|
|
char* wot = "39";
|
|
UInt(*h)(V128*,V128*) = h_pcmpistri_39;
|
|
UInt(*s)(V128*,V128*) = s_pcmpistri_39;
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "0000000000000000");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
|
|
try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
|
|
try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
|
|
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
|
|
try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
|
|
|
|
try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
|
|
try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
// //
|
|
// main //
|
|
// //
|
|
//////////////////////////////////////////////////////////
|
|
|
|
int main ( void )
|
|
{
|
|
istri_4B();
|
|
istri_3B();
|
|
istri_09();
|
|
istri_1B();
|
|
istri_03();
|
|
istri_0D();
|
|
istri_13();
|
|
istri_45();
|
|
istri_01();
|
|
istri_39();
|
|
return 0;
|
|
}
|