gem_stress: split out gen3 render copy

Header conflicts with other headers for different generations.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
Daniel Vetter 2011-03-31 23:29:25 +02:00
parent 1baab2e43a
commit a6751bac8d
4 changed files with 292 additions and 269 deletions

View File

@ -1,7 +1,12 @@
bin_PROGRAMS = \
noinst_PROGRAMS = \
gem_stress \
$(NULL)
gem_stress_SOURCES = \
gem_stress.c \
gem_stress.h \
gem_stress_i915.c
TESTS = getversion \
getclient \
getstats \

View File

@ -49,25 +49,7 @@
* In short: designed for maximum evilness.
*/
#include <stdlib.h>
#include <sys/ioctl.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <fcntl.h>
#include <inttypes.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <getopt.h>
#include "drm.h"
#include "i915_drm.h"
#include "drmtest.h"
#include "intel_bufmgr.h"
#include "intel_batchbuffer.h"
#include "intel_gpu_tools.h"
#include "i915_reg.h"
#include "i915_3d.h"
#include "gem_stress.h"
#define CMD_POLY_STIPPLE_OFFSET 0x7906
@ -92,36 +74,15 @@ static uint64_t gem_aperture_size(int fd)
return aperture.aper_size;
}
struct scratch_buf {
drm_intel_bo *bo;
uint32_t stride;
uint32_t tiling;
uint32_t *data;
unsigned num_tiles;
};
static drm_intel_bufmgr *bufmgr;
drm_intel_bufmgr *bufmgr;
struct intel_batchbuffer *batch;
static int drm_fd;
static int devid;
static int num_fences;
int drm_fd;
int devid;
int num_fences;
drm_intel_bo *busy_bo;
static struct {
unsigned scratch_buf_size;
unsigned num_buffers;
int trace_tile;
int no_hw;
int gpu_busy_load;
int use_render;
} options;
#define MAX_BUFS 4096
#define SCRATCH_BUF_SIZE 1024*1024
#define BUSY_BUF_SIZE (256*4096)
#define TILE_SIZE 16
#define TILE_BYTES (TILE_SIZE*TILE_SIZE*sizeof(uint32_t))
struct option_struct options;
static struct scratch_buf buffers[2][MAX_BUFS];
/* tile i is at logical position tile_permutation[i] */
@ -133,7 +94,7 @@ static unsigned num_total_tiles = 0;
#define TILES_PER_BUF (num_total_tiles / num_buffers)
static int fence_storm = 0;
int fence_storm = 0;
static int gpu_busy_load = 10;
static void tile2xy(struct scratch_buf *buf, unsigned tile, unsigned *x, unsigned *y)
@ -145,7 +106,7 @@ static void tile2xy(struct scratch_buf *buf, unsigned tile, unsigned *x, unsigne
/* All this gem trashing wastes too much cpu time, so give the gpu something to
* do to increase changes for races. */
static void keep_gpu_busy(void)
void keep_gpu_busy(void)
{
uint32_t src_pitch, dst_pitch, cmd_bits;
int tmp;
@ -318,227 +279,6 @@ static void blitter_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned s
}
}
static unsigned buf_width(struct scratch_buf *buf)
{
return buf->stride/sizeof(uint32_t);
}
static unsigned buf_height(struct scratch_buf *buf)
{
return options.scratch_buf_size/buf->stride;
}
static void emit_vertex(float f)
{
union { float f; uint32_t ui; } u;
u.f = f;
OUT_BATCH(u.ui);
}
static void emit_vertex_normalized(float f, float total)
{
union { float f; uint32_t ui; } u;
u.f = f / total;
OUT_BATCH(u.ui);
}
static void gen3_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
unsigned logical_tile_no)
{
uint32_t src_pitch, dst_pitch, cmd_bits;
src_pitch = src->stride;
dst_pitch = dst->stride;
cmd_bits = 0;
static unsigned keep_gpu_busy_counter = 0;
/* check both edges of the fence usage */
if (keep_gpu_busy_counter & 1 && !fence_storm)
keep_gpu_busy();
/* invariant state */
{
OUT_BATCH(_3DSTATE_AA_CMD |
AA_LINE_ECAAR_WIDTH_ENABLE |
AA_LINE_ECAAR_WIDTH_1_0 |
AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
IAB_MODIFY_ENABLE |
IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
IAB_SRC_FACTOR_SHIFT) |
IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
IAB_DST_FACTOR_SHIFT));
OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
CSB_TCB(0, 0) |
CSB_TCB(1, 1) |
CSB_TCB(2, 2) |
CSB_TCB(3, 3) |
CSB_TCB(4, 4) |
CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
ENABLE_POINT_RASTER_RULE |
OGL_POINT_RASTER_RULE |
ENABLE_LINE_STRIP_PROVOKE_VRTX |
ENABLE_TRI_FAN_PROVOKE_VRTX |
LINE_STRIP_PROVOKE_VRTX(1) |
TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
OUT_BATCH(_3DSTATE_MODES_4_CMD |
ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */
OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
S4_LINE_WIDTH_ONE |
S4_CULLMODE_NONE |
S4_VFMT_XY);
OUT_BATCH(0x00000000); /* Stencil. */
OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_STIPPLE);
OUT_BATCH(0x00000000);
OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
}
/* samler state */
{
#define TEX_COUNT 1
uint32_t tiling_bits = 0;
if (src->tiling != I915_TILING_NONE)
tiling_bits = MS3_TILED_SURFACE;
if (src->tiling == I915_TILING_Y)
tiling_bits |= MS3_TILE_WALK;
OUT_BATCH(_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
OUT_BATCH((1 << TEX_COUNT) - 1);
OUT_RELOC(src->bo, I915_GEM_DOMAIN_SAMPLER, 0, 0);
OUT_BATCH(MAPSURF_32BIT | MT_32BIT_ARGB8888 |
tiling_bits |
(buf_height(src) - 1) << MS3_HEIGHT_SHIFT |
(buf_width(src) - 1) << MS3_WIDTH_SHIFT);
OUT_BATCH((src->stride/4-1) << MS4_PITCH_SHIFT);
OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
OUT_BATCH((1 << TEX_COUNT) - 1);
OUT_BATCH(MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
OUT_BATCH(SS3_NORMALIZED_COORDS |
TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
0 << SS3_TEXTUREMAP_INDEX_SHIFT);
OUT_BATCH(0x00000000);
}
/* render target state */
{
uint32_t tiling_bits = 0;
if (dst->tiling != I915_TILING_NONE)
tiling_bits = BUF_3D_TILED_SURFACE;
if (dst->tiling == I915_TILING_Y)
tiling_bits |= BUF_3D_TILE_WALK_Y;
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits |
BUF_3D_PITCH(dst->stride));
OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
OUT_BATCH(COLR_BUF_ARGB8888 |
DSTORG_HORT_BIAS(0x8) |
DSTORG_VERT_BIAS(0x8));
/* draw rect is unconditional */
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
OUT_BATCH(0x00000000);
OUT_BATCH(0x00000000); /* ymin, xmin */
OUT_BATCH(DRAW_YMAX(buf_height(dst) - 1) |
DRAW_XMAX(buf_width(dst) - 1));
/* yorig, xorig (relate to color buffer?) */
OUT_BATCH(0x00000000);
}
/* texfmt */
{
uint32_t ss2 = ~0;
ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
OUT_BATCH(ss2);
OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
OUT_BATCH(0); /* no vbo */
OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) |
(4 << S1_VERTEX_PITCH_SHIFT));
}
/* frage shader */
{
OUT_BATCH(_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
/* decl FS_T0 */
OUT_BATCH(D0_DCL |
REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
REG_NR(FS_T0) << D0_NR_SHIFT |
((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
OUT_BATCH(0);
OUT_BATCH(0);
/* decl FS_S0 */
OUT_BATCH(D0_DCL |
(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
(REG_NR(FS_S0) << D0_NR_SHIFT) |
((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
OUT_BATCH(0);
OUT_BATCH(0);
/* texld(FS_OC, FS_S0, FS_T0 */
OUT_BATCH(T0_TEXLD |
(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
OUT_BATCH((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
OUT_BATCH(0);
}
OUT_BATCH(PRIM3D_RECTLIST | (3*4 - 1));
emit_vertex(dst_x + TILE_SIZE);
emit_vertex(dst_y + TILE_SIZE);
emit_vertex_normalized(src_x + TILE_SIZE, buf_width(src));
emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src));
emit_vertex(dst_x);
emit_vertex(dst_y + TILE_SIZE);
emit_vertex_normalized(src_x, buf_width(src));
emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src));
emit_vertex(dst_x);
emit_vertex(dst_y);
emit_vertex_normalized(src_x, buf_width(src));
emit_vertex_normalized(src_y, buf_height(src));
if (!(keep_gpu_busy_counter & 1) && !fence_storm)
keep_gpu_busy();
keep_gpu_busy_counter++;
intel_batchbuffer_flush(batch);
}
static void render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
unsigned logical_tile_no)

81
tests/gem_stress.h Normal file
View File

@ -0,0 +1,81 @@
#include <stdlib.h>
#include <sys/ioctl.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <fcntl.h>
#include <inttypes.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <getopt.h>
#include "drm.h"
#include "i915_drm.h"
#include "drmtest.h"
#include "intel_bufmgr.h"
#include "intel_batchbuffer.h"
#include "intel_gpu_tools.h"
struct scratch_buf {
drm_intel_bo *bo;
uint32_t stride;
uint32_t tiling;
uint32_t *data;
unsigned num_tiles;
};
struct option_struct {
unsigned scratch_buf_size;
unsigned num_buffers;
int trace_tile;
int no_hw;
int gpu_busy_load;
int use_render;
};
extern struct option_struct options;
extern drm_intel_bufmgr *bufmgr;
extern struct intel_batchbuffer *batch;
extern int drm_fd;
extern int devid;
extern int fence_storm;
#define MAX_BUFS 4096
#define SCRATCH_BUF_SIZE 1024*1024
#define BUSY_BUF_SIZE (256*4096)
#define TILE_SIZE 16
#define TILE_BYTES (TILE_SIZE*TILE_SIZE*sizeof(uint32_t))
void keep_gpu_busy(void);
static inline void emit_vertex(float f)
{
union { float f; uint32_t ui; } u;
u.f = f;
OUT_BATCH(u.ui);
}
static inline void emit_vertex_normalized(float f, float total)
{
union { float f; uint32_t ui; } u;
u.f = f / total;
OUT_BATCH(u.ui);
}
static inline unsigned buf_width(struct scratch_buf *buf)
{
return buf->stride/sizeof(uint32_t);
}
static inline unsigned buf_height(struct scratch_buf *buf)
{
return options.scratch_buf_size/buf->stride;
}
void gen3_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
unsigned logical_tile_no);
void gen2_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
unsigned logical_tile_no);

197
tests/gem_stress_i915.c Normal file
View File

@ -0,0 +1,197 @@
#include "i915_reg.h"
#include "i915_3d.h"
#include "gem_stress.h"
void gen3_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
unsigned logical_tile_no)
{
static unsigned keep_gpu_busy_counter = 0;
/* check both edges of the fence usage */
if (keep_gpu_busy_counter & 1 && !fence_storm)
keep_gpu_busy();
/* invariant state */
{
OUT_BATCH(_3DSTATE_AA_CMD |
AA_LINE_ECAAR_WIDTH_ENABLE |
AA_LINE_ECAAR_WIDTH_1_0 |
AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
IAB_MODIFY_ENABLE |
IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
IAB_SRC_FACTOR_SHIFT) |
IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
IAB_DST_FACTOR_SHIFT));
OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
CSB_TCB(0, 0) |
CSB_TCB(1, 1) |
CSB_TCB(2, 2) |
CSB_TCB(3, 3) |
CSB_TCB(4, 4) |
CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
ENABLE_POINT_RASTER_RULE |
OGL_POINT_RASTER_RULE |
ENABLE_LINE_STRIP_PROVOKE_VRTX |
ENABLE_TRI_FAN_PROVOKE_VRTX |
LINE_STRIP_PROVOKE_VRTX(1) |
TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
OUT_BATCH(_3DSTATE_MODES_4_CMD |
ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */
OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
S4_LINE_WIDTH_ONE |
S4_CULLMODE_NONE |
S4_VFMT_XY);
OUT_BATCH(0x00000000); /* Stencil. */
OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_STIPPLE);
OUT_BATCH(0x00000000);
OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
}
/* samler state */
{
#define TEX_COUNT 1
uint32_t tiling_bits = 0;
if (src->tiling != I915_TILING_NONE)
tiling_bits = MS3_TILED_SURFACE;
if (src->tiling == I915_TILING_Y)
tiling_bits |= MS3_TILE_WALK;
OUT_BATCH(_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
OUT_BATCH((1 << TEX_COUNT) - 1);
OUT_RELOC(src->bo, I915_GEM_DOMAIN_SAMPLER, 0, 0);
OUT_BATCH(MAPSURF_32BIT | MT_32BIT_ARGB8888 |
tiling_bits |
(buf_height(src) - 1) << MS3_HEIGHT_SHIFT |
(buf_width(src) - 1) << MS3_WIDTH_SHIFT);
OUT_BATCH((src->stride/4-1) << MS4_PITCH_SHIFT);
OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
OUT_BATCH((1 << TEX_COUNT) - 1);
OUT_BATCH(MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
OUT_BATCH(SS3_NORMALIZED_COORDS |
TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
0 << SS3_TEXTUREMAP_INDEX_SHIFT);
OUT_BATCH(0x00000000);
}
/* render target state */
{
uint32_t tiling_bits = 0;
if (dst->tiling != I915_TILING_NONE)
tiling_bits = BUF_3D_TILED_SURFACE;
if (dst->tiling == I915_TILING_Y)
tiling_bits |= BUF_3D_TILE_WALK_Y;
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits |
BUF_3D_PITCH(dst->stride));
OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
OUT_BATCH(COLR_BUF_ARGB8888 |
DSTORG_HORT_BIAS(0x8) |
DSTORG_VERT_BIAS(0x8));
/* draw rect is unconditional */
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
OUT_BATCH(0x00000000);
OUT_BATCH(0x00000000); /* ymin, xmin */
OUT_BATCH(DRAW_YMAX(buf_height(dst) - 1) |
DRAW_XMAX(buf_width(dst) - 1));
/* yorig, xorig (relate to color buffer?) */
OUT_BATCH(0x00000000);
}
/* texfmt */
{
uint32_t ss2 = ~0;
ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
OUT_BATCH(ss2);
OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
OUT_BATCH(0); /* no vbo */
OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) |
(4 << S1_VERTEX_PITCH_SHIFT));
}
/* frage shader */
{
OUT_BATCH(_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
/* decl FS_T0 */
OUT_BATCH(D0_DCL |
REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
REG_NR(FS_T0) << D0_NR_SHIFT |
((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
OUT_BATCH(0);
OUT_BATCH(0);
/* decl FS_S0 */
OUT_BATCH(D0_DCL |
(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
(REG_NR(FS_S0) << D0_NR_SHIFT) |
((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
OUT_BATCH(0);
OUT_BATCH(0);
/* texld(FS_OC, FS_S0, FS_T0 */
OUT_BATCH(T0_TEXLD |
(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
OUT_BATCH((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
OUT_BATCH(0);
}
OUT_BATCH(PRIM3D_RECTLIST | (3*4 - 1));
emit_vertex(dst_x + TILE_SIZE);
emit_vertex(dst_y + TILE_SIZE);
emit_vertex_normalized(src_x + TILE_SIZE, buf_width(src));
emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src));
emit_vertex(dst_x);
emit_vertex(dst_y + TILE_SIZE);
emit_vertex_normalized(src_x, buf_width(src));
emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src));
emit_vertex(dst_x);
emit_vertex(dst_y);
emit_vertex_normalized(src_x, buf_width(src));
emit_vertex_normalized(src_y, buf_height(src));
if (!(keep_gpu_busy_counter & 1) && !fence_storm)
keep_gpu_busy();
keep_gpu_busy_counter++;
intel_batchbuffer_flush(batch);
}