mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-06-08 16:36:21 +00:00
338 lines
10 KiB
C
338 lines
10 KiB
C
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include "tests/asm.h"
|
|
#include "tests/malloc.h"
|
|
#include <string.h>
|
|
|
|
#define XSAVE_AREA_SIZE 832
|
|
|
|
typedef unsigned char UChar;
|
|
typedef unsigned int UInt;
|
|
typedef unsigned long long int ULong;
|
|
|
|
typedef unsigned long int UWord;
|
|
|
|
typedef unsigned char Bool;
|
|
#define True ((Bool)1)
|
|
#define False ((Bool)0)
|
|
|
|
const unsigned int vec0[8]
|
|
= { 0x12345678, 0x11223344, 0x55667788, 0x87654321,
|
|
0x15263748, 0x91929394, 0x19293949, 0x48372615 };
|
|
|
|
const unsigned int vec1[8]
|
|
= { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA,
|
|
0xBADCFE10, 0xFFEE9988, 0x11667722, 0x01EFCDAB };
|
|
|
|
const unsigned int vecZ[8]
|
|
= { 0, 0, 0, 0, 0, 0, 0, 0 };
|
|
|
|
/* A version of memset that doesn't use XMM or YMM registers. */
|
|
static __attribute__((noinline))
|
|
void* my_memset(void* s, int c, size_t n)
|
|
{
|
|
size_t i;
|
|
for (i = 0; i < n; i++) {
|
|
((unsigned char*)s)[i] = (unsigned char)(unsigned int)c;
|
|
/* Defeat any attempt at autovectorisation */
|
|
__asm__ __volatile__("" ::: "cc","memory");
|
|
}
|
|
return s;
|
|
}
|
|
|
|
/* Ditto for memcpy */
|
|
static __attribute__((noinline))
|
|
void* my_memcpy(void *dest, const void *src, size_t n)
|
|
{
|
|
size_t i;
|
|
for (i = 0; i < n; i++) {
|
|
((unsigned char*)dest)[i] = ((unsigned char*)src)[i];
|
|
__asm__ __volatile__("" ::: "cc","memory");
|
|
}
|
|
return dest;
|
|
}
|
|
|
|
static void* memalign_zeroed64(size_t size)
|
|
{
|
|
char* p = memalign64(size);
|
|
if (p && size > 0) {
|
|
my_memset(p, 0, size);
|
|
}
|
|
return p;
|
|
}
|
|
|
|
__attribute__((noinline))
|
|
static void do_xsave ( void* p, UInt rfbm )
|
|
{
|
|
assert(rfbm <= 7);
|
|
__asm__ __volatile__(
|
|
"movq %0, %%rax; xorq %%rdx, %%rdx; xsave (%1)"
|
|
: /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
|
|
: /*TRASH*/ "memory", "rax", "rdx"
|
|
);
|
|
}
|
|
|
|
__attribute__((noinline))
|
|
static void do_xrstor ( void* p, UInt rfbm )
|
|
{
|
|
assert(rfbm <= 7);
|
|
__asm__ __volatile__(
|
|
"movq %0, %%rax; xorq %%rdx, %%rdx; xrstor (%1)"
|
|
: /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
|
|
: /*TRASH*/ "rax", "rdx" /* FIXME plus all X87,SSE,AVX regs */
|
|
);
|
|
}
|
|
|
|
/* set up the FP, SSE and AVX state, and then dump it. */
|
|
static void do_setup_then_xsave ( void* p, UInt rfbm )
|
|
{
|
|
__asm__ __volatile__("finit");
|
|
__asm__ __volatile__("fldpi");
|
|
__asm__ __volatile__("fld1");
|
|
__asm__ __volatile__("fldln2");
|
|
__asm__ __volatile__("fldlg2");
|
|
__asm__ __volatile__("fld %st(3)");
|
|
__asm__ __volatile__("fld %st(3)");
|
|
__asm__ __volatile__("fld1");
|
|
__asm__ __volatile__("vmovups (%0), %%ymm0" : : "r"(&vec0[0]) : "xmm0" );
|
|
__asm__ __volatile__("vmovups (%0), %%ymm1" : : "r"(&vec1[0]) : "xmm1" );
|
|
__asm__ __volatile__("vxorps %ymm2, %ymm2, %ymm2");
|
|
__asm__ __volatile__("vmovaps %ymm0, %ymm3");
|
|
__asm__ __volatile__("vmovaps %ymm1, %ymm4");
|
|
__asm__ __volatile__("vmovaps %ymm2, %ymm5");
|
|
__asm__ __volatile__("vmovaps %ymm0, %ymm6");
|
|
__asm__ __volatile__("vmovaps %ymm1, %ymm7");
|
|
__asm__ __volatile__("vmovaps %ymm1, %ymm8");
|
|
__asm__ __volatile__("vmovaps %ymm2, %ymm9");
|
|
__asm__ __volatile__("vmovaps %ymm0, %ymm10");
|
|
__asm__ __volatile__("vmovaps %ymm1, %ymm11");
|
|
__asm__ __volatile__("vmovaps %ymm1, %ymm12");
|
|
__asm__ __volatile__("vmovaps %ymm2, %ymm13");
|
|
__asm__ __volatile__("vmovaps %ymm0, %ymm14");
|
|
__asm__ __volatile__("vmovaps %ymm1, %ymm15");
|
|
do_xsave(p, rfbm);
|
|
}
|
|
|
|
static int isFPLsbs ( int i )
|
|
{
|
|
int q;
|
|
q = 32; if (i == q || i == q+1) return 1;
|
|
q = 48; if (i == q || i == q+1) return 1;
|
|
q = 64; if (i == q || i == q+1) return 1;
|
|
q = 80; if (i == q || i == q+1) return 1;
|
|
q = 96; if (i == q || i == q+1) return 1;
|
|
q = 112; if (i == q || i == q+1) return 1;
|
|
q = 128; if (i == q || i == q+1) return 1;
|
|
q = 144; if (i == q || i == q+1) return 1;
|
|
return 0;
|
|
}
|
|
|
|
static void show ( unsigned char* buf, Bool hideBits64to79 )
|
|
{
|
|
int i;
|
|
for (i = 0; i < XSAVE_AREA_SIZE; i++) {
|
|
if ((i % 16) == 0)
|
|
fprintf(stderr, "%3d ", i);
|
|
if (hideBits64to79 && isFPLsbs(i))
|
|
fprintf(stderr, "xx ");
|
|
else
|
|
fprintf(stderr, "%02x ", buf[i]);
|
|
if (i > 0 && ((i % 16) == 15))
|
|
fprintf(stderr, "\n");
|
|
}
|
|
}
|
|
|
|
static void cpuid ( UInt* eax, UInt* ebx, UInt* ecx, UInt* edx,
|
|
UInt index, UInt ecx_in )
|
|
{
|
|
UInt a,b,c,d;
|
|
asm volatile ("cpuid"
|
|
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
|
|
: "0" (index), "2"(ecx_in) );
|
|
*eax = a; *ebx = b; *ecx = c; *edx = d;
|
|
//fprintf(stderr, "%08x %08x -> %08x %08x %08x %08x\n",
|
|
// index,ecx_in, a,b,c,d );
|
|
}
|
|
|
|
static void xgetbv ( UInt* eax, UInt* edx, UInt ecx_in )
|
|
{
|
|
UInt a,d;
|
|
asm volatile ("xgetbv"
|
|
: "=a" (a), "=d" (d) \
|
|
: "c"(ecx_in) );
|
|
*eax = a; *edx = d;
|
|
}
|
|
|
|
static void check_for_xsave ( void )
|
|
{
|
|
UInt eax, ebx, ecx, edx;
|
|
Bool ok = True;
|
|
|
|
eax = ebx = ecx = edx = 0;
|
|
cpuid(&eax, &ebx, &ecx, &edx, 1,0);
|
|
//fprintf(stderr, "cpuid(1).ecx[26=xsave] = %u\n", (ecx >> 26) & 1);
|
|
ok = ok && (((ecx >> 26) & 1) == 1);
|
|
|
|
eax = ebx = ecx = edx = 0;
|
|
cpuid(&eax, &ebx, &ecx, &edx, 1,0);
|
|
//fprintf(stderr, "cpuid(1).ecx[27=osxsave] = %u\n", (ecx >> 27) & 1);
|
|
ok = ok && (((ecx >> 27) & 1) == 1);
|
|
|
|
eax = ebx = ecx = edx = 0;
|
|
xgetbv(&eax, &edx, 0);
|
|
//fprintf(stderr, "xgetbv(0) = %u:%u\n", edx, eax);
|
|
ok = ok && (edx == 0) && (eax == 7);
|
|
|
|
if (ok) return;
|
|
|
|
fprintf(stderr,
|
|
"This program must be run on a CPU that supports AVX and XSAVE.\n");
|
|
exit(1);
|
|
}
|
|
|
|
|
|
void test_xsave ( Bool hideBits64to79 )
|
|
{
|
|
/* Testing XSAVE:
|
|
|
|
For RBFM in 0 .. 7 (that is, all combinations): set the x87, SSE
|
|
and AVX registers with some values, do XSAVE to dump it, and
|
|
print the resulting buffer. */
|
|
|
|
UInt rfbm;
|
|
for (rfbm = 0; rfbm <= 7; rfbm++) {
|
|
UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
|
|
|
|
my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
|
|
saved_img[512] = 0;
|
|
do_setup_then_xsave(saved_img, rfbm);
|
|
|
|
fprintf(stderr,
|
|
"------------------ XSAVE, rfbm = %u ------------------\n", rfbm);
|
|
show(saved_img, hideBits64to79);
|
|
fprintf(stderr, "\n");
|
|
|
|
free(saved_img);
|
|
}
|
|
}
|
|
|
|
|
|
void test_xrstor ( Bool hideBits64to79 )
|
|
{
|
|
/* Testing XRSTOR is more complex than testing XSAVE, because the
|
|
loaded value(s) depend not only on what bits are requested (by
|
|
RBFM) but also on what bits are actually present in the image
|
|
(defined by XSTATE_BV). So we have to test all 64 (8 x 8)
|
|
combinations.
|
|
|
|
The approach is to fill a memory buffer with data, do XRSTOR
|
|
from the buffer, them dump all components with XSAVE in a new
|
|
buffer, and print the result. This is complicated by the fact
|
|
that we need to be able to see which parts of the state (in
|
|
registers) are neither overwritten nor zeroed by the restore.
|
|
Hence the registers must be pre-filled with values which are
|
|
neither zero nor the data to be loaded. We choose to use 0x55
|
|
where possible. */
|
|
|
|
UChar* fives = memalign_zeroed64(XSAVE_AREA_SIZE);
|
|
my_memset(fives, 0x55, XSAVE_AREA_SIZE);
|
|
/* Set MXCSR so that the insn doesn't fault */
|
|
fives[24] = 0x80;
|
|
fives[25] = 0x1f;
|
|
fives[26] = 0;
|
|
fives[27] = 0;
|
|
/* Ditto for the XSAVE header area. Also set XSTATE_BV. */
|
|
fives[512] = 7;
|
|
UInt i;
|
|
for (i = 1; i <= 23; i++) fives[512+i] = 0;
|
|
/* Fill the x87 register values with something that VEX's
|
|
80-vs-64-bit kludging won't mess up -- an 80 bit number which is
|
|
representable also as 64 bit: 123456789.0123 */
|
|
for (i = 0; i <= 7; i++) {
|
|
UChar* p = &fives[32 + 16 * i];
|
|
p[0]=0x00; p[1]=0xf8; p[2]=0xc2; p[3]=0x64; p[4]=0xa0;
|
|
p[5]=0xa2; p[6]=0x79; p[7]=0xeb; p[8]=0x19; p[9]=0x40;
|
|
}
|
|
/* And mark the tags for all 8 dumped regs as "valid". */
|
|
fives[4/*FTW*/] = 0xFF;
|
|
|
|
/* (1) (see comment in loop below) */
|
|
UChar* standard_test_data = memalign_zeroed64(XSAVE_AREA_SIZE);
|
|
do_setup_then_xsave(standard_test_data, 7);
|
|
|
|
UInt xstate_bv, rfbm;
|
|
for (xstate_bv = 0; xstate_bv <= 7; xstate_bv++) {
|
|
for (rfbm = 0; rfbm <= 7; rfbm++) {
|
|
//{ xstate_bv = 7;
|
|
// { rfbm = 6;
|
|
/* 1. Copy the "standard test data" into registers, and dump
|
|
it with XSAVE. This gives us an image we can try
|
|
restoring from.
|
|
|
|
2. Set the register state to all-0x55s (as far as is
|
|
possible), so we can see which parts get overwritten
|
|
and which parts get zeroed on the test restore.
|
|
|
|
3. Do the restore from the image prepared in (1).
|
|
|
|
4. Dump the state with XSAVE and print it.
|
|
*/
|
|
|
|
/* (3a). We can't use |standard_test_data| directly, since we
|
|
need to put in the required |xstate_bv| value. So make a
|
|
copy and modify that instead. */
|
|
UChar* img_to_restore_from = memalign_zeroed64(XSAVE_AREA_SIZE);
|
|
my_memcpy(img_to_restore_from, standard_test_data, XSAVE_AREA_SIZE);
|
|
img_to_restore_from[512] = xstate_bv;
|
|
|
|
/* (4a) */
|
|
UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
|
|
my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
|
|
saved_img[512] = 0;
|
|
|
|
/* (2) */
|
|
do_xrstor(fives, 7);
|
|
|
|
// X87, SSE, AVX state LIVE
|
|
|
|
/* (3b) */
|
|
/* and this is what we're actually trying to test */
|
|
do_xrstor(img_to_restore_from, rfbm);
|
|
|
|
// X87, SSE, AVX state LIVE
|
|
|
|
/* (4b) */
|
|
do_xsave(saved_img, 7);
|
|
|
|
fprintf(stderr,
|
|
"---------- XRSTOR, xstate_bv = %u, rfbm = %u ---------\n",
|
|
xstate_bv, rfbm);
|
|
show(saved_img, hideBits64to79);
|
|
fprintf(stderr, "\n");
|
|
|
|
free(saved_img);
|
|
free(img_to_restore_from);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
int main ( int argc, char** argv )
|
|
{
|
|
Bool hideBits64to79 = argc > 1;
|
|
fprintf(stderr, "Re-run with any arg to suppress least-significant\n"
|
|
" 16 bits of 80-bit FP numbers\n");
|
|
|
|
check_for_xsave();
|
|
|
|
if (1)
|
|
test_xsave(hideBits64to79);
|
|
|
|
if (1)
|
|
test_xrstor(hideBits64to79);
|
|
|
|
return 0;
|
|
}
|