mirror of
				https://github.com/tiagovignatti/intel-gpu-tools.git
				synced 2025-11-03 19:47:15 +00:00 
			
		
		
		
	rendercopy/bdw: Fix the original implementation
For posterity, I've squashed these commits against Damien's request. rendercopy/gen8: Fix the include guards rendercopy/gen8: Update the 3DSTATE_MULTISAMPLE opcode The opcode has changed in BDW. rendercopy/gen8: Add the VF_TOPOLOGY state The primitive type has moved out of the 3DPRIMITIVE to its own state, VF_TOPOLOGY. rendercopy/gen8: Fixup 3STATE_PS Update the state to the latest BSpec, in particular the thread count was using a wrong shift and we were missing kernel2 offset. rendercopy/gen8: Update 3DSTATE_BASE_ADDRESS This state has seen its fields moved around a bit, follow the BSpec. rendercopy/gen8: Allocate 64 VUEs The simulator screams at us if we try to allocate less than that. rendercopy/gen8: Surface states have to be 64 bytes a aligned rendercopy/gen8: Vertical/horizontal align 2 does not exist any more So set them to 4. This should not matter with rendercopy (which is not using compressed textures), but it makes the simulator moan. rendercopy/gen8: Make sure the vertex buffer is 8 bytes aligned rendercopy/gen8: Adjust 3DSTATE_VERTEX_BUFFERS for gen8 The address of the buffer is now on 48 bits. Also the size was computed as offset + size where the field is really the size of the buffer itself, not the end address. rendercopy/gen8: Update the SF/SBE states for gen8 gen8 has a few changes around those states and a new ones RASTER and SBE_SWIZ. rendercopy/gen8: Add the PS_EXTRA and PS_BLEND states rendercopy/gen8: Fix building with DEBUG_RENDERCOPY defined The forward declaration was missing the final ';'. Let's move the whole function at the top instead. rendercopy/gen8: Update the PS and CONSTANT_PS states rendercopy/gen8: Fix the red channel selection Make it output red. rendercopy/gen8: Update the write -1 shader With the latest assembler changes from Haihao. rendercopy/gen8: Remove blit.g8a There is no diff between this file and blig.g7a. Remove it. rendercopy/gen8: Fix the surface relocation offset The surface base address is now at dwords 8/9 so the relocation has to mirror the change. rendercopy/gen8: Add the VF_INSTANCING state Should work without, but doesn't hurt to add it. rendercopy/gen8: Set the Attribule enable field in PS_EXTRA When the SF is set up to output some attributes, the pixel shader also have to be told there's attributes to care about. rendercopy/gen8: Set the force bits to read URB offset/length If we want to override the URB offset/length in the SBE state itself, we need to set the force bits on (new in gen8) Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> Acked-by: Kenneth Graunke <kenneth@whitecape.org> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
This commit is contained in:
		
							parent
							
								
									3f0714a860
								
							
						
					
					
						commit
						91e5897246
					
				@ -1,5 +1,5 @@
 | 
				
			|||||||
#ifndef GEN7_RENDER_H
 | 
					#ifndef GEN8_RENDER_H
 | 
				
			||||||
#define GEN7_RENDER_H
 | 
					#define GEN8_RENDER_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "gen6_render.h"
 | 
					#include "gen6_render.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -13,7 +13,16 @@
 | 
				
			|||||||
#define GEN7_3DSTATE_DEPTH_BUFFER		GEN6_3D(3, 0, 0x05)
 | 
					#define GEN7_3DSTATE_DEPTH_BUFFER		GEN6_3D(3, 0, 0x05)
 | 
				
			||||||
#define GEN7_3DSTATE_STENCIL_BUFFER		GEN6_3D(3, 0, 0x06)
 | 
					#define GEN7_3DSTATE_STENCIL_BUFFER		GEN6_3D(3, 0, 0x06)
 | 
				
			||||||
#define GEN7_3DSTATE_HIER_DEPTH_BUFFER		GEN6_3D(3, 0, 0x07)
 | 
					#define GEN7_3DSTATE_HIER_DEPTH_BUFFER		GEN6_3D(3, 0, 0x07)
 | 
				
			||||||
 | 
					#define GEN8_3DSTATE_MULTISAMPLE		GEN6_3D(3, 0, 0x0d)
 | 
				
			||||||
 | 
					# define GEN8_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER		(0 << 4)
 | 
				
			||||||
 | 
					# define GEN8_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT	(1 << 4)
 | 
				
			||||||
 | 
					# define GEN8_3DSTATE_MULTISAMPLE_NUMSAMPLES_1			(0 << 1)
 | 
				
			||||||
 | 
					# define GEN8_3DSTATE_MULTISAMPLE_NUMSAMPLES_2			(1 << 1)
 | 
				
			||||||
 | 
					# define GEN8_3DSTATE_MULTISAMPLE_NUMSAMPLES_4			(2 << 1)
 | 
				
			||||||
 | 
					# define GEN8_3DSTATE_MULTISAMPLE_NUMSAMPLES_8			(3 << 1)
 | 
				
			||||||
 | 
					# define GEN9_3DSTATE_MULTISAMPLE_NUMSAMPLES_16			(4 << 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GEN8_3DSTATE_VF_INSTANCING		GEN6_3D(3, 0, 0x49)
 | 
				
			||||||
#define GEN7_3DSTATE_GS				GEN6_3D(3, 0, 0x11)
 | 
					#define GEN7_3DSTATE_GS				GEN6_3D(3, 0, 0x11)
 | 
				
			||||||
#define GEN7_3DSTATE_CONSTANT_GS		GEN6_3D(3, 0, 0x16)
 | 
					#define GEN7_3DSTATE_CONSTANT_GS		GEN6_3D(3, 0, 0x16)
 | 
				
			||||||
#define GEN7_3DSTATE_CONSTANT_HS		GEN6_3D(3, 0, 0x19)
 | 
					#define GEN7_3DSTATE_CONSTANT_HS		GEN6_3D(3, 0, 0x19)
 | 
				
			||||||
@ -23,9 +32,24 @@
 | 
				
			|||||||
#define GEN7_3DSTATE_DS				GEN6_3D(3, 0, 0x1d)
 | 
					#define GEN7_3DSTATE_DS				GEN6_3D(3, 0, 0x1d)
 | 
				
			||||||
#define GEN7_3DSTATE_STREAMOUT			GEN6_3D(3, 0, 0x1e)
 | 
					#define GEN7_3DSTATE_STREAMOUT			GEN6_3D(3, 0, 0x1e)
 | 
				
			||||||
#define GEN7_3DSTATE_SBE			GEN6_3D(3, 0, 0x1f)
 | 
					#define GEN7_3DSTATE_SBE			GEN6_3D(3, 0, 0x1f)
 | 
				
			||||||
 | 
					# define GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH	(1 << 29)
 | 
				
			||||||
 | 
					# define GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET	(1 << 28)
 | 
				
			||||||
 | 
					# define GEN7_SBE_NUM_OUTPUTS_SHIFT             22
 | 
				
			||||||
 | 
					# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT   11
 | 
				
			||||||
 | 
					# define GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT   5
 | 
				
			||||||
 | 
					#define GEN8_3DSTATE_SBE_SWIZ			GEN6_3D(3, 0, 0x51)
 | 
				
			||||||
 | 
					#define GEN8_3DSTATE_RASTER			GEN6_3D(3, 0, 0x50)
 | 
				
			||||||
 | 
					# define GEN8_RASTER_FRONT_WINDING_CCW			(1 << 21)
 | 
				
			||||||
 | 
					# define GEN8_RASTER_CULL_NONE                          (1 << 16)
 | 
				
			||||||
#define GEN7_3DSTATE_PS				GEN6_3D(3, 0, 0x20)
 | 
					#define GEN7_3DSTATE_PS				GEN6_3D(3, 0, 0x20)
 | 
				
			||||||
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP	\
 | 
					#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP	\
 | 
				
			||||||
						GEN6_3D(3, 0, 0x21)
 | 
											GEN6_3D(3, 0, 0x21)
 | 
				
			||||||
 | 
					#define GEN8_3DSTATE_PS_BLEND			GEN6_3D(3, 0, 0x4d)
 | 
				
			||||||
 | 
					# define GEN8_PS_BLEND_HAS_WRITEABLE_RT			(1 << 30)
 | 
				
			||||||
 | 
					#define GEN8_3DSTATE_PS_EXTRA			GEN6_3D(3,0, 0x4f)
 | 
				
			||||||
 | 
					# define GEN8_PSX_PIXEL_SHADER_VALID			(1 << 31)
 | 
				
			||||||
 | 
					# define GEN8_PSX_ATTRIBUTE_ENABLE			(1 << 8)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC	GEN6_3D(3, 0, 0x23)
 | 
					#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC	GEN6_3D(3, 0, 0x23)
 | 
				
			||||||
#define GEN7_3DSTATE_BLEND_STATE_POINTERS	GEN6_3D(3, 0, 0x24)
 | 
					#define GEN7_3DSTATE_BLEND_STATE_POINTERS	GEN6_3D(3, 0, 0x24)
 | 
				
			||||||
#define GEN7_3DSTATE_DS_STATE_POINTERS		GEN6_3D(3, 0, 0x25)
 | 
					#define GEN7_3DSTATE_DS_STATE_POINTERS		GEN6_3D(3, 0, 0x25)
 | 
				
			||||||
@ -41,6 +65,8 @@
 | 
				
			|||||||
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS	GEN6_3D(3, 0, 0x2e)
 | 
					#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS	GEN6_3D(3, 0, 0x2e)
 | 
				
			||||||
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS	GEN6_3D(3, 0, 0x2f)
 | 
					#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS	GEN6_3D(3, 0, 0x2f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GEN8_3DSTATE_VF_TOPOLOGY		GEN6_3D(3, 0, 0x4b)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS	GEN6_3D(3, 1, 0x12)
 | 
					#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS	GEN6_3D(3, 1, 0x12)
 | 
				
			||||||
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS	GEN6_3D(3, 1, 0x13)
 | 
					#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS	GEN6_3D(3, 1, 0x13)
 | 
				
			||||||
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS	GEN6_3D(3, 1, 0x14)
 | 
					#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS	GEN6_3D(3, 1, 0x14)
 | 
				
			||||||
@ -49,13 +75,11 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
/* Some random bits that we care about */
 | 
					/* Some random bits that we care about */
 | 
				
			||||||
#define GEN7_VB0_BUFFER_ADDR_MOD_EN		(1 << 14)
 | 
					#define GEN7_VB0_BUFFER_ADDR_MOD_EN		(1 << 14)
 | 
				
			||||||
#define GEN7_WM_DISPATCH_ENABLE			(1 << 29)
 | 
					 | 
				
			||||||
#define GEN7_3DSTATE_PS_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11)
 | 
					#define GEN7_3DSTATE_PS_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11)
 | 
				
			||||||
#define GEN7_3DSTATE_PS_ATTRIBUTE_ENABLED	 (1 << 10)
 | 
					#define GEN7_3DSTATE_PS_ATTRIBUTE_ENABLED	 (1 << 10)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Random shifts */
 | 
					/* Random shifts */
 | 
				
			||||||
#define GEN7_3DSTATE_WM_MAX_THREADS_SHIFT 24
 | 
					#define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23
 | 
				
			||||||
#define HSW_3DSTATE_WM_MAX_THREADS_SHIFT 23
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Shamelessly ripped from mesa */
 | 
					/* Shamelessly ripped from mesa */
 | 
				
			||||||
struct gen8_surface_state
 | 
					struct gen8_surface_state
 | 
				
			||||||
 | 
				
			|||||||
@ -7,7 +7,13 @@
 | 
				
			|||||||
#define VERTEX_SIZE (3*4)
 | 
					#define VERTEX_SIZE (3*4)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if DEBUG_RENDERCPY
 | 
					#if DEBUG_RENDERCPY
 | 
				
			||||||
static void dump_batch(struct intel_batchbuffer *batch)
 | 
					static void dump_batch(struct intel_batchbuffer *batch) {
 | 
				
			||||||
 | 
						int fd = open("/tmp/i965-batchbuffers.dump", O_WRONLY | O_CREAT,  0666);
 | 
				
			||||||
 | 
						if (fd != -1) {
 | 
				
			||||||
 | 
							write(fd, batch->buffer, 4096);
 | 
				
			||||||
 | 
							fd = close(fd);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#define dump_batch(x) do { } while(0)
 | 
					#define dump_batch(x) do { } while(0)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@ -33,15 +39,15 @@ static const uint32_t ps_kernel[][4] = {
 | 
				
			|||||||
   { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
 | 
					   { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
   /* Write all -1 */
 | 
					   /* Write all -1 */
 | 
				
			||||||
   { 0x00600001, 0x2e000061, 0x00000000, 0x3f800000 },
 | 
					   { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 },
 | 
				
			||||||
   { 0x00600001, 0x2e200061, 0x00000000, 0x3f800000 },
 | 
					   { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 },
 | 
				
			||||||
   { 0x00600001, 0x2e400061, 0x00000000, 0x3f800000 },
 | 
					   { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 },
 | 
				
			||||||
   { 0x00600001, 0x2e600061, 0x00000000, 0x3f800000 },
 | 
					   { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 },
 | 
				
			||||||
   { 0x00600001, 0x2e800061, 0x00000000, 0x3f800000 },
 | 
					   { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 },
 | 
				
			||||||
   { 0x00600001, 0x2ea00061, 0x00000000, 0x3f800000 },
 | 
					   { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 },
 | 
				
			||||||
   { 0x00600001, 0x2ec00061, 0x00000000, 0x3f800000 },
 | 
					   { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 },
 | 
				
			||||||
   { 0x00600001, 0x2ee00061, 0x00000000, 0x3f800000 },
 | 
					   { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 },
 | 
				
			||||||
   { 0x05800031, 0x20001e3c, 0x00000e00, 0x90031000 },
 | 
					   { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -107,10 +113,12 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct scratch_buf *buf,
 | 
				
			|||||||
		read_domain = I915_GEM_DOMAIN_SAMPLER;
 | 
							read_domain = I915_GEM_DOMAIN_SAMPLER;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ss = batch_alloc(batch, sizeof(*ss), 32);
 | 
						ss = batch_alloc(batch, sizeof(*ss), 64);
 | 
				
			||||||
	ss->ss0.surface_type = GEN6_SURFACE_2D;
 | 
						ss->ss0.surface_type = GEN6_SURFACE_2D;
 | 
				
			||||||
	ss->ss0.surface_format = format;
 | 
						ss->ss0.surface_format = format;
 | 
				
			||||||
	ss->ss0.render_cache_read_write = 1;
 | 
						ss->ss0.render_cache_read_write = 1;
 | 
				
			||||||
 | 
						ss->ss0.vertical_alignment = 1; /* align 4 */
 | 
				
			||||||
 | 
						ss->ss0.horizontal_alignment = 1; /* align 4 */
 | 
				
			||||||
	if (buf->tiling == I915_TILING_X)
 | 
						if (buf->tiling == I915_TILING_X)
 | 
				
			||||||
		ss->ss0.tiled_mode = 2;
 | 
							ss->ss0.tiled_mode = 2;
 | 
				
			||||||
	else if (buf->tiling == I915_TILING_Y)
 | 
						else if (buf->tiling == I915_TILING_Y)
 | 
				
			||||||
@ -119,7 +127,7 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct scratch_buf *buf,
 | 
				
			|||||||
	ss->ss8.base_addr = buf->bo->offset;
 | 
						ss->ss8.base_addr = buf->bo->offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = drm_intel_bo_emit_reloc(batch->bo,
 | 
						ret = drm_intel_bo_emit_reloc(batch->bo,
 | 
				
			||||||
				      batch_offset(batch, ss) + 4,
 | 
									      batch_offset(batch, ss) + 8 * 4,
 | 
				
			||||||
				      buf->bo, 0,
 | 
									      buf->bo, 0,
 | 
				
			||||||
				      read_domain, write_domain);
 | 
									      read_domain, write_domain);
 | 
				
			||||||
	assert(ret == 0);
 | 
						assert(ret == 0);
 | 
				
			||||||
@ -128,7 +136,7 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct scratch_buf *buf,
 | 
				
			|||||||
	ss->ss2.width  = buf_width(buf) - 1;
 | 
						ss->ss2.width  = buf_width(buf) - 1;
 | 
				
			||||||
	ss->ss3.pitch  = buf->stride - 1;
 | 
						ss->ss3.pitch  = buf->stride - 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ss->ss7.shader_chanel_select_a = 4;
 | 
						ss->ss7.shader_chanel_select_r = 4;
 | 
				
			||||||
	ss->ss7.shader_chanel_select_g = 5;
 | 
						ss->ss7.shader_chanel_select_g = 5;
 | 
				
			||||||
	ss->ss7.shader_chanel_select_b = 6;
 | 
						ss->ss7.shader_chanel_select_b = 6;
 | 
				
			||||||
	ss->ss7.shader_chanel_select_a = 7;
 | 
						ss->ss7.shader_chanel_select_a = 7;
 | 
				
			||||||
@ -190,6 +198,7 @@ gen7_fill_vertex_buffer_data(struct intel_batchbuffer *batch,
 | 
				
			|||||||
			     uint32_t width, uint32_t height) {
 | 
								     uint32_t width, uint32_t height) {
 | 
				
			||||||
	void *ret;
 | 
						void *ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						batch_align(batch, 8);
 | 
				
			||||||
	ret = batch->ptr;
 | 
						ret = batch->ptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	emit_vertex_2s(batch, dst_x + width, dst_y + height);
 | 
						emit_vertex_2s(batch, dst_x + width, dst_y + height);
 | 
				
			||||||
@ -272,14 +281,13 @@ gen6_emit_vertex_elements(struct intel_batchbuffer *batch) {
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch,
 | 
					static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch,
 | 
				
			||||||
				    uint32_t offset) {
 | 
									    uint32_t offset) {
 | 
				
			||||||
	OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | (4 * 1 - 1));
 | 
						OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | (1 + (4 * 1) - 2));
 | 
				
			||||||
	OUT_BATCH(0 << VB0_BUFFER_INDEX_SHIFT | /* VB 0th index */
 | 
						OUT_BATCH(0 << VB0_BUFFER_INDEX_SHIFT | /* VB 0th index */
 | 
				
			||||||
		  VB0_VERTEXDATA |
 | 
					 | 
				
			||||||
		  GEN7_VB0_BUFFER_ADDR_MOD_EN | /* Address Modify Enable */
 | 
							  GEN7_VB0_BUFFER_ADDR_MOD_EN | /* Address Modify Enable */
 | 
				
			||||||
		  VERTEX_SIZE << VB0_BUFFER_PITCH_SHIFT);
 | 
							  VERTEX_SIZE << VB0_BUFFER_PITCH_SHIFT);
 | 
				
			||||||
	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
 | 
						OUT_RELOC(batch->bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
 | 
				
			||||||
	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_VERTEX, 0, offset + (VERTEX_SIZE * 3) - 1);
 | 
					 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
						OUT_BATCH(3 * VERTEX_SIZE);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static uint32_t
 | 
					static uint32_t
 | 
				
			||||||
@ -361,30 +369,47 @@ gen7_emit_push_constants(struct intel_batchbuffer *batch) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
gen7_emit_state_base_address(struct intel_batchbuffer *batch) {
 | 
					gen8_emit_state_base_address(struct intel_batchbuffer *batch) {
 | 
				
			||||||
	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
 | 
						OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2));
 | 
				
			||||||
	/* general (stateless) */
 | 
					
 | 
				
			||||||
	/* surface */
 | 
						/* general */
 | 
				
			||||||
	/* instruction */
 | 
					 | 
				
			||||||
	/* indirect */
 | 
					 | 
				
			||||||
	/* dynamic */
 | 
					 | 
				
			||||||
	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 | 
						OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 | 
				
			||||||
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* stateless data port */
 | 
				
			||||||
 | 
						OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* surface */
 | 
				
			||||||
	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
 | 
						OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
 | 
				
			||||||
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* dynamic */
 | 
				
			||||||
	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
 | 
						OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
 | 
				
			||||||
		  0, BASE_ADDRESS_MODIFY);
 | 
							  0, BASE_ADDRESS_MODIFY);
 | 
				
			||||||
	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 | 
						/* indirect */
 | 
				
			||||||
	OUT_BATCH(0xfffff000 | BASE_ADDRESS_MODIFY); // copied from mesa
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 | 
					
 | 
				
			||||||
 | 
						/* instruction */
 | 
				
			||||||
 | 
						OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
 | 
				
			||||||
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* general state buffer size */
 | 
				
			||||||
 | 
						OUT_BATCH(0xfffff000 | 1);
 | 
				
			||||||
 | 
						/* dynamic state buffer size */
 | 
				
			||||||
 | 
						OUT_BATCH(1 << 12 | 1);
 | 
				
			||||||
 | 
						/* indirect object buffer size */
 | 
				
			||||||
 | 
						OUT_BATCH(0xfffff000 | 1);
 | 
				
			||||||
 | 
						/* intruction buffer size */
 | 
				
			||||||
 | 
						OUT_BATCH(1 << 12);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
gen7_emit_urb(struct intel_batchbuffer *batch) {
 | 
					gen7_emit_urb(struct intel_batchbuffer *batch) {
 | 
				
			||||||
	/* XXX: Min valid values from mesa */
 | 
						/* XXX: Min valid values from mesa */
 | 
				
			||||||
	const int vs_entries = 32;
 | 
						const int vs_entries = 64;
 | 
				
			||||||
	const int vs_size = 2;
 | 
						const int vs_size = 2;
 | 
				
			||||||
	const int vs_start = 2;
 | 
						const int vs_start = 2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -408,8 +433,8 @@ gen8_emit_cc(struct intel_batchbuffer *batch) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
gen7_emit_multisample(struct intel_batchbuffer *batch) {
 | 
					gen8_emit_multisample(struct intel_batchbuffer *batch) {
 | 
				
			||||||
	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | 2);
 | 
						OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE | 2);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
@ -537,49 +562,52 @@ gen7_emit_clip(struct intel_batchbuffer *batch) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
gen7_emit_sf(struct intel_batchbuffer *batch) {
 | 
					gen8_emit_sf(struct intel_batchbuffer *batch)
 | 
				
			||||||
	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
 | 
					{
 | 
				
			||||||
#ifdef GPU_HANG
 | 
						int i;
 | 
				
			||||||
	OUT_BATCH(0 << 22 | 1 << 11 | 1 << 4);
 | 
					
 | 
				
			||||||
#else
 | 
						OUT_BATCH(GEN7_3DSTATE_SBE | (4 - 2));
 | 
				
			||||||
	OUT_BATCH(1 << 22 | 1 << 11 | 1 << 4);
 | 
						OUT_BATCH(1 << GEN7_SBE_NUM_OUTPUTS_SHIFT |
 | 
				
			||||||
#endif
 | 
							  GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH |
 | 
				
			||||||
	OUT_BATCH(0);
 | 
							  GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET |
 | 
				
			||||||
	OUT_BATCH(0);
 | 
							  1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
 | 
				
			||||||
	OUT_BATCH(0);
 | 
							  1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
					 | 
				
			||||||
	OUT_BATCH(0);
 | 
					 | 
				
			||||||
	OUT_BATCH(0);
 | 
					 | 
				
			||||||
	OUT_BATCH(0);
 | 
					 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						OUT_BATCH(GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
 | 
				
			||||||
 | 
						for (i = 0; i < 8; i++)
 | 
				
			||||||
		OUT_BATCH(0);
 | 
							OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	OUT_BATCH(GEN6_3DSTATE_SF | (7 - 2));
 | 
						OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2));
 | 
				
			||||||
 | 
						OUT_BATCH(GEN8_RASTER_FRONT_WINDING_CCW | GEN8_RASTER_CULL_NONE);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
 | 
					 | 
				
			||||||
//	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
 | 
					 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						OUT_BATCH(GEN6_3DSTATE_SF | (4 - 2));
 | 
				
			||||||
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) {
 | 
					gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) {
 | 
				
			||||||
	const int max_threads = 86;
 | 
						const int max_threads = 63;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2));
 | 
						OUT_BATCH(GEN6_3DSTATE_WM | (2 - 2));
 | 
				
			||||||
	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
 | 
						OUT_BATCH(/* XXX: I don't understand the BARYCENTRIC stuff, but it
 | 
				
			||||||
		  /* XXX: I don't understand the BARYCENTRIC stuff, but it
 | 
					 | 
				
			||||||
		   * appears we need it to put our setup data in the place we
 | 
							   * appears we need it to put our setup data in the place we
 | 
				
			||||||
		   * expect (g6, see below) */
 | 
							   * expect (g6, see below) */
 | 
				
			||||||
		  GEN7_3DSTATE_PS_PERSPECTIVE_PIXEL_BARYCENTRIC);
 | 
							  GEN7_3DSTATE_PS_PERSPECTIVE_PIXEL_BARYCENTRIC);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (7-2));
 | 
						OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (11-2));
 | 
				
			||||||
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
@ -587,19 +615,26 @@ gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) {
 | 
				
			|||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	OUT_BATCH(GEN7_3DSTATE_PS | (10-2));
 | 
						OUT_BATCH(GEN7_3DSTATE_PS | (12-2));
 | 
				
			||||||
	OUT_BATCH(kernel);
 | 
						OUT_BATCH(kernel);
 | 
				
			||||||
	OUT_BATCH(0); /* kernel hi */
 | 
						OUT_BATCH(0); /* kernel hi */
 | 
				
			||||||
	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF |
 | 
						OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF |
 | 
				
			||||||
		  2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
 | 
							  2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
 | 
				
			||||||
	OUT_BATCH(0); /* scratch space stuff */
 | 
						OUT_BATCH(0); /* scratch space stuff */
 | 
				
			||||||
	OUT_BATCH(0); /* scratch hi */
 | 
						OUT_BATCH(0); /* scratch hi */
 | 
				
			||||||
	OUT_BATCH((max_threads - 1) << GEN7_3DSTATE_WM_MAX_THREADS_SHIFT |
 | 
						OUT_BATCH((max_threads - 1) << GEN8_3DSTATE_PS_MAX_THREADS_SHIFT |
 | 
				
			||||||
		  GEN7_3DSTATE_PS_ATTRIBUTE_ENABLED |
 | 
					 | 
				
			||||||
		  GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
 | 
							  GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
 | 
				
			||||||
	OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT);
 | 
						OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT);
 | 
				
			||||||
	OUT_BATCH(0); // kernel 1
 | 
						OUT_BATCH(0); // kernel 1
 | 
				
			||||||
	OUT_BATCH(0); /* kernel 1 hi */
 | 
						OUT_BATCH(0); /* kernel 1 hi */
 | 
				
			||||||
 | 
						OUT_BATCH(0); // kernel 2
 | 
				
			||||||
 | 
						OUT_BATCH(0); /* kernel 2 hi */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2));
 | 
				
			||||||
 | 
						OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2));
 | 
				
			||||||
 | 
						OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
@ -637,11 +672,21 @@ gen6_emit_drawing_rectangle(struct intel_batchbuffer *batch, struct scratch_buf
 | 
				
			|||||||
	OUT_BATCH(0);
 | 
						OUT_BATCH(0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Vertex elements MUST be defined before this according to spec */
 | 
					static void gen8_emit_vf_topology(struct intel_batchbuffer *batch)
 | 
				
			||||||
static void gen7_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	OUT_BATCH(GEN6_3DPRIMITIVE | (7-2));
 | 
						OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY);
 | 
				
			||||||
	OUT_BATCH(_3DPRIM_RECTLIST);
 | 
						OUT_BATCH(_3DPRIM_RECTLIST);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Vertex elements MUST be defined before this according to spec */
 | 
				
			||||||
 | 
					static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						OUT_BATCH(GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
 | 
				
			||||||
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
						OUT_BATCH(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						OUT_BATCH(GEN6_3DPRIMITIVE | (7-2));
 | 
				
			||||||
 | 
						OUT_BATCH(0);	/* gen8+ ignore the topology type field */
 | 
				
			||||||
	OUT_BATCH(3);	/* vertex count */
 | 
						OUT_BATCH(3);	/* vertex count */
 | 
				
			||||||
	OUT_BATCH(0);	/*  We're specifying this instead with offset in GEN6_3DSTATE_VERTEX_BUFFERS */
 | 
						OUT_BATCH(0);	/*  We're specifying this instead with offset in GEN6_3DSTATE_VERTEX_BUFFERS */
 | 
				
			||||||
	OUT_BATCH(1);	/* single instance */
 | 
						OUT_BATCH(1);	/* single instance */
 | 
				
			||||||
@ -719,7 +764,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	gen7_emit_push_constants(batch);
 | 
						gen7_emit_push_constants(batch);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	gen7_emit_state_base_address(batch);
 | 
						gen8_emit_state_base_address(batch);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
 | 
						OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
 | 
				
			||||||
	OUT_BATCH(viewport.cc_state);
 | 
						OUT_BATCH(viewport.cc_state);
 | 
				
			||||||
@ -730,7 +775,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	gen8_emit_cc(batch);
 | 
						gen8_emit_cc(batch);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	gen7_emit_multisample(batch);
 | 
						gen8_emit_multisample(batch);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	gen7_emit_null_state(batch);
 | 
						gen7_emit_null_state(batch);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -740,7 +785,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	gen7_emit_clip(batch);
 | 
						gen7_emit_clip(batch);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	gen7_emit_sf(batch);
 | 
						gen8_emit_sf(batch);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
 | 
						OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
 | 
				
			||||||
	OUT_BATCH(ps_binding_table);
 | 
						OUT_BATCH(ps_binding_table);
 | 
				
			||||||
@ -762,7 +807,8 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
 | 
				
			|||||||
	gen7_emit_vertex_buffer(batch, vertex_buffer);
 | 
						gen7_emit_vertex_buffer(batch, vertex_buffer);
 | 
				
			||||||
	gen6_emit_vertex_elements(batch);
 | 
						gen6_emit_vertex_elements(batch);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	gen7_emit_primitive(batch, vertex_buffer);
 | 
						gen8_emit_vf_topology(batch);
 | 
				
			||||||
 | 
						gen8_emit_primitive(batch, vertex_buffer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	OUT_BATCH(MI_BATCH_BUFFER_END);
 | 
						OUT_BATCH(MI_BATCH_BUFFER_END);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -774,13 +820,3 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
 | 
				
			|||||||
	gen6_render_flush(batch, batch_end);
 | 
						gen6_render_flush(batch, batch_end);
 | 
				
			||||||
	intel_batchbuffer_reset(batch);
 | 
						intel_batchbuffer_reset(batch);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
#if DEBUG_RENDERCPY
 | 
					 | 
				
			||||||
static void dump_batch(struct intel_batchbuffer *batch) {
 | 
					 | 
				
			||||||
	int fd = open("/tmp/i965-batchbuffers.dump", O_WRONLY | O_CREAT,  0666);
 | 
					 | 
				
			||||||
	if (fd != -1) {
 | 
					 | 
				
			||||||
		write(fd, batch->buffer, 4096);
 | 
					 | 
				
			||||||
		fd = close(fd);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -1,66 +0,0 @@
 | 
				
			|||||||
/* Assemble with  ".../intel-gen4asm/src/intel-gen4asm -g 7" */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Move pixels into g10-g13. The pixel shaader does not load what you want. It
 | 
					 | 
				
			||||||
 * loads the input data for a plane function to calculate what you want. The
 | 
					 | 
				
			||||||
 * following is boiler plate code to move our normalized texture coordinates
 | 
					 | 
				
			||||||
 * (u,v) into g10-g13. It does this 4 subspans (16 pixels) at a time.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * This should do the same thing, but it doesn't work for some reason.
 | 
					 | 
				
			||||||
 *   pln(16) g10 g6<0,1,0>F g2<8,8,1>F	{ align1 };
 | 
					 | 
				
			||||||
 *   pln(16) g12 g6.16<1>F g2<8,8,1>F	{ align1 };
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
/* U */
 | 
					 | 
				
			||||||
pln (8) g10<1>F g6.0<0,1,0>F g2.0<8,8,1>F { align1 }; /* pixel 0-7 */
 | 
					 | 
				
			||||||
pln (8) g11<1>F g6.0<0,1,0>F g4.0<8,8,1>F { align1 }; /* pixel 8-15 */
 | 
					 | 
				
			||||||
/* V */
 | 
					 | 
				
			||||||
pln (8) g12<1>F g6.16<0,1,0> g2.0<8,8,1>F { align1 }; /* pixel 0-7 */
 | 
					 | 
				
			||||||
pln (8) g13<1>F g6.16<0,1,0> g4.0<8,8,1>F { align1 }; /* pixel 8-15 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Next the we want the sampler to fetch the src texture (ie. src buffer). This
 | 
					 | 
				
			||||||
 * is done with a pretty simple send message. The output goes to g112, which is
 | 
					 | 
				
			||||||
 * exactly what we're supposed to use in our final send message.
 | 
					 | 
				
			||||||
 * In intel-gen4asm, we should end up parsed by the following rule:
 | 
					 | 
				
			||||||
 *   predicate SEND execsize dst sendleadreg sndopr directsrcoperand instoptions
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Send message descriptor:
 | 
					 | 
				
			||||||
 * 28:25 = message len = 4 // our 4 registers have 16 pixels
 | 
					 | 
				
			||||||
 * 24:20 = response len = 8 // Each pixel is RGBA32, so we need 8 registers
 | 
					 | 
				
			||||||
 * 19:19 = header present = 0
 | 
					 | 
				
			||||||
 * 18:17 = SIMD16 = 2
 | 
					 | 
				
			||||||
 * 16:12 = TYPE = 0  (regular sample)
 | 
					 | 
				
			||||||
 * 11:08 = Sampler index = ignored/0
 | 
					 | 
				
			||||||
 * 7:0 = binding table index = src = 1
 | 
					 | 
				
			||||||
 * 0x8840001
 | 
					 | 
				
			||||||
 * 
 | 
					 | 
				
			||||||
 * Send message extra descriptor
 | 
					 | 
				
			||||||
 * 5:5 = End of Thread = 0
 | 
					 | 
				
			||||||
 * 3:0 = Target Function ID = SFID_SAMPLER (2)
 | 
					 | 
				
			||||||
 * 0x2
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
send(16) g112 g10 0x2 0x8840001 { align1 };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* g112-g119 now contains the sample source input, and all we must do is write
 | 
					 | 
				
			||||||
 * it out to the destination render target. This is done with the send message
 | 
					 | 
				
			||||||
 * as well. The only extra bits are to terminate the pixel shader.
 | 
					 | 
				
			||||||
 * 
 | 
					 | 
				
			||||||
 * Send message descriptor:
 | 
					 | 
				
			||||||
 * 28:25 = message len = 8 // 16 pixels RGBA32
 | 
					 | 
				
			||||||
 * 24:20 = response len = 0
 | 
					 | 
				
			||||||
 * 19:19 = header present = 0
 | 
					 | 
				
			||||||
 * 17:14 = message type = Render Target Write (12)
 | 
					 | 
				
			||||||
 * 12:12 = Last Render Target Select = 1
 | 
					 | 
				
			||||||
 * 10:08 = Message Type = SIMD16 (0)
 | 
					 | 
				
			||||||
 * 07:00 = Binding Table Index = dest = 0
 | 
					 | 
				
			||||||
 * 0x10031000
 | 
					 | 
				
			||||||
 * 
 | 
					 | 
				
			||||||
 * Send message extra descriptor
 | 
					 | 
				
			||||||
 * 5:5 = End of Thread = 1
 | 
					 | 
				
			||||||
 * 3:0 = Target Function ID = SFID_DP_RC (5)
 | 
					 | 
				
			||||||
 * 0x25
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
send(16) null g112  0x25 0x10031000 { align1, EOT };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* vim: set ft=c ts=4 sw=2 tw=80 et: */
 | 
					 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user