mirror of
https://github.com/tiagovignatti/intel-gpu-tools.git
synced 2025-06-08 08:26:10 +00:00
high level summary of the files: * debug_rdata - get current state from debug registers. Helpful when developing the debugger, and could serve some purpose in the future. * eudb - the debugger itself * eviction_macro - generate the proper macro to flush the EU render cache until I get control flow working * pre_cpp - an evaluating c preprocesser like thing, to be used before cpp * sr - the system routine, exception handler which runs on the EU * test - a very basic test system routine * debug.h
278 lines
8.3 KiB
Plaintext
278 lines
8.3 KiB
Plaintext
/*
|
|
* Copyright © 2011 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*
|
|
* Authors:
|
|
* Ben Widawsky <ben@bwidawsk.net>
|
|
*
|
|
*/
|
|
|
|
#include "debug.h"
|
|
#include "evict.h"
|
|
|
|
#define CR0_0_ME_STATE_CTRL (1 << 31)
|
|
#define CR0_0_BP_SUPPRESS (1 << 15)
|
|
#define CR0_0_SPF_EN (1 << 2)
|
|
#define CR0_0_ACC_DIS (1 << 1)
|
|
#define CR0_1_BES_CTRL (1 << 31)
|
|
#define CR0_1_HALT_CTRL (1 << 30)
|
|
#define CR0_1_SOFT_EXCEPTION_CTRL (1 << 29)
|
|
#define CR0_1_ILLGL_OP_STS (1 << 28)
|
|
#define CR0_1_STACK_OVRFLW_STS (1 << 27)
|
|
|
|
#define CR0_0_ENTRY_UNMASK (CR0_0_SPF_EN | CR0_0_ACC_DIS)
|
|
// TODO: Need to fix this for non breakpoint case
|
|
#define CR0_1_ENTRY_UNMASK ~(CR0_1_BES_CTRL)
|
|
#define CR0_0_RETURN_MASK ~(CR0_0_ME_STATE_CTRL | CR0_0_SPF_EN | CR0_0_ACC_DIS)
|
|
|
|
// TODO: not sure how to make this not hardcoded
|
|
#define PER_THREAD_SCRATCH_SIZE (1 << 20)
|
|
#define PER_THREAD_QWORDS (PER_THREAD_SCRATCH_SIZE >> 4)
|
|
|
|
/* Should get this from brw_defines.h */
|
|
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
|
|
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
|
|
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
|
|
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8
|
|
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
|
|
|
|
/* desc field, ie. dword3 6.3.66.2 and 2.11.2.1.4 */
|
|
#define SEND_MLEN_5 (5<<25)
|
|
#define SEND_MLEN_3 (3<<25)
|
|
#define SEND_MLEN_2 (2<<25)
|
|
#define SEND_MLEN_1 (1<<25)
|
|
#define SEND_RLEN_1 (1<<20)
|
|
#define SEND_RLEN_0 (0<<20)
|
|
#define SEND_HEADER_PRESENT (1<<19)
|
|
#define SEND_WRITE_COMMIT (1<<17)
|
|
#define SEND_TYPE_WRITE (GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE<<13)
|
|
#define SEND_TYPE_READ (BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ<<13)
|
|
#define SEND_BLOCK_SIZE1 (BRW_DATAPORT_OWORD_BLOCK_2_OWORDS<<8)
|
|
#define SEND_BLOCK_SIZE2 (BRW_DATAPORT_OWORD_BLOCK_4_OWORDS<<8)
|
|
#define SEND_BLOCK_SIZE4 (BRW_DATAPORT_OWORD_BLOCK_8_OWORDS<<8)
|
|
#define SEND_BINDING_TABLE (255<<0)
|
|
// No write commit
|
|
#define WRITE_DESC1_XXX SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_2
|
|
#define WRITE_DESC1_WC SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_2 | SEND_WRITE_COMMIT
|
|
#define WRITE_DESC2 SEND_BINDING_TABLE | SEND_BLOCK_SIZE2 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_3
|
|
#define WRITE_DESC4 SEND_BINDING_TABLE | SEND_BLOCK_SIZE4 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_5
|
|
#define RECV_DESC1 SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_READ | SEND_HEADER_PRESENT | SEND_MLEN_1 | SEND_RLEN_1
|
|
//#define SEND_DESC1 0x40902FF
|
|
#define SEND_DESC1_WC 0x40b02FF
|
|
|
|
/* ex_desc field 6.3.66.2 */
|
|
#define SEND_DP_RENDER_CACHE (5<<0)
|
|
#define SEND_EOT (1<<5)
|
|
#define SEND_EX_DESC SEND_DP_RENDER_CACHE
|
|
|
|
/**
|
|
* WRITE_SCRATCH1 - Write 2 owords.
|
|
* cdst.2 - offset
|
|
* cdst.5 - per thread scratch base, relative to gsba??
|
|
* cdst+1 - data to be written.
|
|
*/
|
|
#define WRITE_SCRATCH1(cdst) \
|
|
send (16) null cdst SEND_EX_DESC WRITE_DESC1_XXX FLAGS
|
|
#define WRITE_SCRATCH1_WC(cdst) \
|
|
send (16) g1 cdst SEND_EX_DESC WRITE_DESC1_WC FLAGS
|
|
#define WRITE_SCRATCH2(cdst) \
|
|
send (16) null cdst SEND_EX_DESC WRITE_DESC2 FLAGS
|
|
#define WRITE_SCRATCH4(cdst) \
|
|
send (16) null cdst SEND_EX_DESC WRITE_DESC4 FLAGS
|
|
|
|
/**
|
|
* READ_SCRATCH1 - Read 2 owords.
|
|
* cdst.2 - offset
|
|
* cdst.5 - per thread scratch base, relative to gsba??
|
|
* grf - register where read data is populated.
|
|
*/
|
|
#define READ_SCRATCH1(grf, cdst) \
|
|
send (16) grf:ud cdst SEND_EX_DESC RECV_DESC1 FLAGS
|
|
|
|
/**
|
|
* SET_OFFSET - setup mrf for the given offset prior to a send instruction.
|
|
* mrf - message register to be used as the header.
|
|
* offset - offset.
|
|
*
|
|
* If a WRITE_SCRATCH follows, mrf+1 -> mrf+1+n should contain the data to be
|
|
* written.
|
|
*/
|
|
#define SET_OFFSET(mrf, offset) \
|
|
mov (1) mrf.5:ud g0.5:ud FLAGS; \
|
|
mov (1) mrf.2:ud offset:ud FLAGS
|
|
|
|
/**
|
|
* SAVE_CRF - save the control register
|
|
* clobbers: m0.2, m0.5
|
|
*/
|
|
#define CR_OFFSET 0x40
|
|
#define SAVE_CRF \
|
|
SET_OFFSET(m0, CR_OFFSET); \
|
|
mov (8) m1:ud 0xdeadbeef:ud FLAGS; \
|
|
mov (1) m1.0:ud cr0.0 FLAGS; \
|
|
mov (1) m1.1:ud cr0.1 FLAGS; \
|
|
mov (1) m1.2:ud cr0.2 FLAGS; \
|
|
mov (1) m1.3:ud sr0:ud FLAGS; \
|
|
WRITE_SCRATCH1(m0)
|
|
|
|
/*
|
|
* clobbers: m0.2, m0.5
|
|
*/
|
|
#define STORE_GRF(grf, offset) \
|
|
SET_OFFSET(m0, offset); \
|
|
mov (8) m1:ud grf:ud FLAGS; \
|
|
WRITE_SCRATCH1(m0)
|
|
|
|
/*
|
|
* clobbers: m0.2, m0.5
|
|
*/
|
|
#define LOAD_GRF(grf, offset) \
|
|
SET_OFFSET(m0, offset); \
|
|
READ_SCRATCH1(grf, m0)
|
|
|
|
/*
|
|
* clobbers: mrf.2 mrf.5
|
|
*/
|
|
#define STORE_MRF(mrf, offset) \
|
|
SET_OFFSET(mrf, offset); \
|
|
WRITE_SCRATCH1(mrf)
|
|
|
|
/*
|
|
* non-quirky semantics, unlike STORE_MRF
|
|
* clobbers: g1
|
|
*/
|
|
#define LOAD_MRF(mrf, offset) \
|
|
LOAD_GRF(g1, offset); \
|
|
mov (8) mrf:ud g1:ud FLAGS
|
|
|
|
#define SAVE_ALL_MRF \
|
|
/* m1 is saved already */ \
|
|
STORE_MRF(m1, 0x2); \
|
|
STORE_MRF(m2, 0x4); \
|
|
STORE_MRF(m3, 0x6); \
|
|
STORE_MRF(m4, 0x8); \
|
|
STORE_MRF(m5, 0xa); \
|
|
STORE_MRF(m6, 0xc); \
|
|
STORE_MRF(m7, 0xe); \
|
|
STORE_MRF(m8, 0x10); \
|
|
STORE_MRF(m9, 0x12); \
|
|
STORE_MRF(m10, 0x14); \
|
|
STORE_MRF(m11, 0x16); \
|
|
STORE_MRF(m12, 0x18); \
|
|
STORE_MRF(m13, 0x1a); \
|
|
STORE_MRF(m14, 0x1c)
|
|
|
|
#define RESTORE_ALL_MRF \
|
|
LOAD_MRF(m15, 0x1c); \
|
|
LOAD_MRF(m14, 0x1a); \
|
|
LOAD_MRF(m13, 0x18); \
|
|
LOAD_MRF(m12, 0x16); \
|
|
LOAD_MRF(m11, 0x14); \
|
|
LOAD_MRF(m10, 0x12); \
|
|
LOAD_MRF(m9, 0x10); \
|
|
LOAD_MRF(m8, 0xe); \
|
|
LOAD_MRF(m7, 0xc); \
|
|
LOAD_MRF(m6, 0xa); \
|
|
LOAD_MRF(m5, 0x8); \
|
|
LOAD_MRF(m4, 0x6); \
|
|
LOAD_MRF(m3, 0x4); \
|
|
LOAD_MRF(m2, 0x2); \
|
|
LOAD_MRF(m1, 0x0)
|
|
|
|
#ifndef SANDYBRIDGE
|
|
#error Only SandyBridge is supported
|
|
#endif
|
|
|
|
/* Default flags for an instruction */
|
|
#define FLAGS { ALIGN1, SWITCH, MASK_DISABLE, ACCWRCTRL}
|
|
|
|
/*
|
|
* We can clobber m0, and g0.4, everything else must be saved.
|
|
*/
|
|
Enter:
|
|
nop;
|
|
|
|
or (1) cr0.0 cr0.0 CR0_0_ENTRY_UNMASK:ud FLAGS;
|
|
|
|
/*
|
|
* g0.5 has the per thread scratch space when running in FS or VS.
|
|
* If we don't have a valid g0.5, we can calculate a per thread scratch offset
|
|
* using the system registers. The problem is we do not have a good way to know
|
|
* the offset from GSBA. The system routine will have to be hardcoded or
|
|
* dynamically patched with the correct offset.
|
|
* TID is in sr0.0[2:0]
|
|
* EUID is in sr0.0[11:8]
|
|
*/
|
|
|
|
#ifdef GPGPU
|
|
mov (1) g0.4:ud 0:ud FLAGS;
|
|
#if 0
|
|
/* This should work according to the docs, the add blows up */
|
|
shr (1) g0.8:uw sr0.0:uw 5 FLAGS;
|
|
add (1) g0.16:ub gr0.16:ub sr0.0:ub FLAGS;
|
|
#else
|
|
shr (1) g0.8:uw sr0.0:uw 5 FLAGS;
|
|
mov (1) g0.9:uw sr0.0:uw FLAGS;
|
|
and (1) g0.9:uw g0.9:uw 0x7:uw FLAGS;
|
|
add (1) g0.8:uw g0.8:uw g0.9:uw FLAGS;
|
|
mov (1) g0.9:uw 0:uw FLAGS;
|
|
mul (1) g0.4:ud g0.4:ud PER_THREAD_QWORDS FLAGS;
|
|
#endif
|
|
#endif
|
|
|
|
mov (8) m0:ud 0:ud FLAGS;
|
|
|
|
/* Saves must occur in order so as not to clobber the next register */
|
|
STORE_MRF(m0, 0);
|
|
STORE_GRF(g0, 0x20);
|
|
STORE_GRF(g1, 0x22);
|
|
SAVE_ALL_MRF;
|
|
|
|
mov (8) g1:ud STATE_EU_MSG:ud FLAGS;
|
|
STORE_GRF(g1, STATE_QWORD);
|
|
|
|
mov (8) g1:ud DEBUG_PROTOCOL_VERSION:ud FLAGS;
|
|
STORE_GRF(g1, COMMUNICATION_QWORD);
|
|
|
|
SAVE_CRF;
|
|
|
|
EVICT_CACHE;
|
|
wait n1:ud;
|
|
EVICT_CACHE;
|
|
|
|
/* Using this to try to keep coherency */
|
|
LOAD_GRF(g1, CR_OFFSET);
|
|
LOAD_GRF(g1, COMMUNICATION_QWORD);
|
|
LOAD_GRF(g1, STATE_QWORD);
|
|
|
|
RESTORE_ALL_MRF;
|
|
LOAD_GRF(g1, 0x22);
|
|
LOAD_GRF(g0, 0x20);
|
|
|
|
/* Clear breakpoint status */
|
|
and (1) cr0.1 cr0.1 CR0_1_ENTRY_UNMASK:ud FLAGS;
|
|
|
|
/* set breakpoint suppress this should be conditional on bes */
|
|
or (1) cr0.0 cr0.0 CR0_0_BP_SUPPRESS:ud FLAGS;
|
|
|
|
and (1) cr0.0 cr0.0 CR0_0_RETURN_MASK:ud FLAGS;
|
|
nop;
|