diff options
Diffstat (limited to 'src/gallium/drivers')
66 files changed, 1967 insertions, 628 deletions
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index e80067f3b19..3c935792465 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -64,13 +64,11 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch) } struct brw_batchbuffer * -brw_batchbuffer_alloc(struct brw_winsys_screen *sws, - struct brw_chipset chipset) +brw_batchbuffer_alloc(struct brw_winsys_screen *sws) { struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer); batch->sws = sws; - batch->chipset = chipset; brw_batchbuffer_reset(batch); return batch; diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 6ca9f617f5e..6ecb91857dd 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -26,7 +26,6 @@ struct brw_batchbuffer { struct brw_winsys_screen *sws; struct brw_winsys_buffer *buf; - struct brw_chipset chipset; /** * Values exported to speed up the writing the batchbuffer, @@ -47,8 +46,8 @@ struct brw_batchbuffer { /*@}*/ }; -struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws, - struct brw_chipset chipset ); +struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws ); + void brw_batchbuffer_free(struct brw_batchbuffer *batch); diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index ccba205e8c7..66b13ea58e2 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -66,16 +66,14 @@ compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; - c.chipset = brw->chipset; c.key = *key; - c.need_ff_sync = c.chipset.is_igdng; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; - if (c.chipset.is_igdng) + if (brw->gen == 5) delta = 3 * REG_SIZE; else delta = REG_SIZE; @@ -97,7 +95,7 @@ compile_clip_prog( struct brw_context *brw, if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT) c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 80e3a11a370..f123b73c063 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -125,12 +125,10 @@ struct brw_clip_compile { GLuint last_tmp; GLboolean need_direction; - struct brw_chipset chipset; GLuint last_mrf; GLuint header_position_offset; - GLboolean need_ff_sync; GLuint nr_color_attrs; GLuint offset_color0; diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index 66caadc4d53..4ed7362171b 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -32,6 +32,7 @@ #include "util/u_debug.h" #include "brw_defines.h" +#include "brw_context.h" #include "brw_eu.h" #include "brw_clip.h" @@ -41,7 +42,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) { GLuint i = 0,j; - + struct brw_context *brw = c->func.brw; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; @@ -79,7 +80,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) i++; } - if (c->need_ff_sync) { + if (brw->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -120,6 +121,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) static void clip_and_emit_line( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_indirect vtx0 = brw_indirect(0, 0); struct brw_indirect vtx1 = brw_indirect(1, 0); struct brw_indirect newvtx0 = brw_indirect(2, 0); @@ -146,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -183,7 +185,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); { @@ -208,7 +210,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* If both are positive, do nothing */ /* Only on GM965/G965 */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); } @@ -223,7 +225,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_ENDIF(p, is_neg2); } } diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 5c3ccfd8d0d..f56edf3177c 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -109,7 +109,7 @@ clip_unit_create_from_key(struct brw_context *brw, /* Although up to 16 concurrent Clip threads are allowed on IGDNG, * only 2 threads can output VUEs at a time. */ - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) clip.thread4.max_threads = 16 - 1; else clip.thread4.max_threads = 2 - 1; @@ -134,7 +134,7 @@ clip_unit_create_from_key(struct brw_context *brw, clip.clip5.api_mode = BRW_CLIP_API_OGL; clip.clip5.clip_mode = key->clip_mode; - if (BRW_IS_G4X(brw)) + if (brw->is_g4x) clip.clip5.negative_w_clip_test = 1; clip.clip6.clipper_viewport_state_ptr = 0; diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 069524bc14f..7d400e6028b 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -30,6 +30,7 @@ */ #include "brw_defines.h" +#include "brw_context.h" #include "brw_eu.h" #include "brw_clip.h" @@ -43,6 +44,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { GLuint i = 0,j; + struct brw_context *brw = c->func.brw; /* Register usage is static, precompute here: */ @@ -69,7 +71,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, for (j = 0; j < 3; j++) { GLuint delta = c->key.nr_attrs*16 + 32; - if (c->chipset.is_igdng) + if (brw->gen == 5) delta = c->key.nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); @@ -110,7 +112,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, i++; } - if (c->need_ff_sync) { + if (brw->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -563,7 +565,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) /* if -ve rhw workaround bit is set, do cliptest */ - if (c->chipset.is_965) { + if (p->brw->has_negative_rhw_bug) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 23e51ee9bcd..5713f25da7c 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -31,6 +31,7 @@ #include "brw_defines.h" +#include "brw_context.h" #include "brw_eu.h" #include "brw_clip.h" @@ -126,6 +127,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, GLboolean force_edgeflag) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_reg tmp = get_tmp(c); GLuint i; @@ -142,7 +144,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, for (i = 0; i < c->key.nr_attrs; i++) { GLuint delta = i*16 + 32; - if (c->chipset.is_igdng) + if (brw->gen == 5) delta = i * 16 + 32 * 3; if (delta == c->offset_edgeflag) { @@ -176,7 +178,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, if (i & 1) { GLuint delta = i*16 + 32; - if (c->chipset.is_igdng) + if (brw->gen == 5) delta = i * 16 + 32 * 3; brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); @@ -350,7 +352,8 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) void brw_clip_ff_sync(struct brw_clip_compile *c) { - if (c->need_ff_sync) { + struct brw_context *brw = c->func.brw; + if (brw->needs_ff_sync) { struct brw_compile *p = &c->func; struct brw_instruction *need_ff_sync; @@ -379,7 +382,8 @@ void brw_clip_ff_sync(struct brw_clip_compile *c) void brw_clip_init_ff_sync(struct brw_clip_compile *c) { - if (c->need_ff_sync) { + struct brw_context *brw = c->func.brw; + if (brw->needs_ff_sync) { struct brw_compile *p = &c->func; brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index a2736f783d5..41a468a32f0 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -107,7 +107,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, void *priv) { struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); - + struct brw_screen *brs = brw_screen(screen); if (!brw) { debug_printf("%s: failed to alloc context\n", __FUNCTION__); return NULL; @@ -117,7 +117,10 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, brw->base.priv = priv; brw->base.destroy = brw_destroy_context; brw->sws = brw_screen(screen)->sws; - brw->chipset = brw_screen(screen)->chipset; + brw->is_g4x = brs->is_g4x; + brw->needs_ff_sync = brs->needs_ff_sync; + brw->has_negative_rhw_bug = brs->has_negative_rhw_bug; + brw->gen = brs->gen; brw_init_resource_functions( brw ); brw_pipe_blend_init( brw ); @@ -145,7 +148,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, make_empty_list(&brw->query.active_head); - brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset ); + brw->batch = brw_batchbuffer_alloc( brw->sws ); if (brw->batch == NULL) goto fail; diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index d927f382d5f..45fc26dd7d8 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -529,7 +529,14 @@ struct brw_query_object { struct brw_context { struct pipe_context base; - struct brw_chipset chipset; + int gen; + boolean has_negative_rhw_bug; + boolean needs_ff_sync; + boolean is_g4x; + + int urb_size; + int vs_max_threads; + int wm_max_threads; struct brw_winsys_screen *sws; @@ -854,11 +861,5 @@ brw_context( struct pipe_context *ctx ) return (struct brw_context *)ctx; } - -#define BRW_IS_965(brw) ((brw)->chipset.is_965) -#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng) -#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x) - - #endif diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index e201ce4d7ce..7547eae97c7 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -463,6 +463,13 @@ #define BRW_COMPRESSION_2NDHALF 1 #define BRW_COMPRESSION_COMPRESSED 2 +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + #define BRW_CONDITIONAL_NONE 0 #define BRW_CONDITIONAL_Z 1 #define BRW_CONDITIONAL_NZ 2 @@ -502,6 +509,27 @@ #define BRW_MASK_ENABLE 0 #define BRW_MASK_DISABLE 1 +/** @{ + * + * Gen6 has replaced "mask enable/disable" with WECtrl, which is + * effectively the same but much simpler to think about. Now, there + * are two contributors ANDed together to whether channels are + * executed: The predication on the instruction, and the channel write + * enable. + */ +/** + * This is the default value. It means that a channel's write enable is set + * if the per-channel IP is pointing at this instruction. + */ +#define BRW_WE_NORMAL 0 +/** + * This is used like BRW_MASK_DISABLE, and causes all channels to have + * their write enable set. Note that predication still contributes to + * whether the channel actually gets written. + */ +#define BRW_WE_ALL 1 +/** @} */ + #define BRW_OPCODE_MOV 1 #define BRW_OPCODE_SEL 2 #define BRW_OPCODE_NOT 4 @@ -531,6 +559,8 @@ #define BRW_OPCODE_POP 47 #define BRW_OPCODE_WAIT 48 #define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_SENDC 50 +#define BRW_OPCODE_MATH 56 #define BRW_OPCODE_ADD 64 #define BRW_OPCODE_MUL 65 #define BRW_OPCODE_AVG 66 @@ -550,6 +580,7 @@ #define BRW_OPCODE_DP2 87 #define BRW_OPCODE_DPA2 88 #define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_PLN 90 #define BRW_OPCODE_NOP 126 #define BRW_PREDICATE_NONE 0 @@ -599,6 +630,8 @@ #define BRW_ARF_NOTIFICATION_COUNT 0x90 #define BRW_ARF_IP 0xA0 +#define BRW_MRF_COMPR4 (1 << 7) + #define BRW_AMASK 0 #define BRW_IMASK 1 #define BRW_LMASK 2 @@ -645,13 +678,14 @@ #define BRW_POLYGON_FACING_BACK 1 #define BRW_MESSAGE_TARGET_NULL 0 -#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */ #define BRW_MESSAGE_TARGET_SAMPLER 2 #define BRW_MESSAGE_TARGET_GATEWAY 3 -#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 -#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 /* sampler cache on GEN6 */ +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 /* render cache on Gen6 */ #define BRW_MESSAGE_TARGET_URB 6 #define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 +#define BRW_MESSAGE_TARGET_CONST_CACHE 9 /* GEN6 */ #define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 #define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 @@ -674,20 +708,15 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 - -/* for IGDNG only */ +#define BRW_SAMPLER_MESSAGE_SAMPLE_GEN5 0 +#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5 1 +#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5 2 +#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5 3 +#define BRW_SAMPLER_MESSAGE_SAMPLE_DERIVS_GEN5 4 +#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5 5 +#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5 6 + +/* for GEN5 only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 #define BRW_SAMPLER_SIMD_MODE_SIMD8 1 #define BRW_SAMPLER_SIMD_MODE_SIMD16 2 @@ -705,10 +734,24 @@ #define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 #define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 +/* This one stays the same across generations. */ #define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +/* GEN4 */ #define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 -#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 #define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 +/* G45, GEN5 */ +#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 +#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 +/* GEN6 */ +#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 +#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 #define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 #define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 @@ -728,6 +771,16 @@ #define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 #define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 +/* GEN6 */ +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE_GEN6 7 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE_GEN6 8 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE_GEN6 9 +#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE_GEN6 10 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORLD_SCATTERED_WRITE_GEN6 11 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6 12 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE_GEN6 13 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE_GEN6 14 + #define BRW_MATH_FUNCTION_INV 1 #define BRW_MATH_FUNCTION_LOG 2 #define BRW_MATH_FUNCTION_EXP 3 @@ -736,7 +789,8 @@ #define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ #define BRW_MATH_FUNCTION_COS 7 /* was 8 */ #define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ -#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ #define BRW_MATH_FUNCTION_POW 10 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 @@ -787,17 +841,33 @@ #define CMD_PIPELINED_STATE_POINTERS 0x7800 #define CMD_BINDING_TABLE_PTRS 0x7801 +# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) +# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) + +#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ +# define PS_SAMPLER_STATE_CHANGE (1 << 12) +# define GS_SAMPLER_STATE_CHANGE (1 << 9) +# define VS_SAMPLER_STATE_CHANGE (1 << 8) +/* DW1: VS */ +/* DW2: GS */ +/* DW3: PS */ #define CMD_VERTEX_BUFFER 0x7808 # define BRW_VB0_INDEX_SHIFT 27 +# define GEN6_VB0_INDEX_SHIFT 26 # define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) # define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) +# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) # define BRW_VB0_PITCH_SHIFT 0 #define CMD_VERTEX_ELEMENT 0x7809 # define BRW_VE0_INDEX_SHIFT 27 +# define GEN6_VE0_INDEX_SHIFT 26 # define BRW_VE0_FORMAT_SHIFT 16 # define BRW_VE0_VALID (1 << 26) +# define GEN6_VE0_VALID (1 << 25) # define BRW_VE0_SRC_OFFSET_SHIFT 0 # define BRW_VE1_COMPONENT_NOSTORE 0 # define BRW_VE1_COMPONENT_STORE_SRC 1 @@ -816,6 +886,236 @@ #define CMD_INDEX_BUFFER 0x780a #define CMD_VF_STATISTICS_965 0x780b #define CMD_VF_STATISTICS_GM45 0x680b +#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */ + +#define CMD_URB 0x7805 /* GEN6+ */ +# define GEN6_URB_VS_SIZE_SHIFT 16 +# define GEN6_URB_VS_ENTRIES_SHIFT 0 +# define GEN6_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_URB_GS_SIZE_SHIFT 0 + +#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) +# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) +# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) + +#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ + +#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */ +/* DW2 */ +# define GEN6_VS_SPF_MODE (1 << 31) +# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW4 */ +# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 +# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW5 */ +# define GEN6_VS_MAX_THREADS_SHIFT 25 +# define GEN6_VS_STATISTICS_ENABLE (1 << 10) +# define GEN6_VS_CACHE_DISABLE (1 << 1) +# define GEN6_VS_ENABLE (1 << 0) + +#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */ +/* DW2 */ +# define GEN6_GS_SPF_MODE (1 << 31) +# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 +/* DW5 */ +# define GEN6_GS_MAX_THREADS_SHIFT 25 +# define GEN6_GS_STATISTICS_ENABLE (1 << 10) +# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) +# define GEN6_GS_RENDERING_ENABLE (1 << 8) +/* DW6 */ +# define GEN6_GS_ENABLE (1 << 15) + +#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ +/* DW1 */ +# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/** + * Just does cheap culling based on the clip distance. Bits must be + * disjoint with USER_CLIP_CLIP_DISTANCE bits. + */ +# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0 +/* DW2 */ +# define GEN6_CLIP_ENABLE (1 << 31) +# define GEN6_CLIP_API_OGL (0 << 30) +# define GEN6_CLIP_API_D3D (1 << 30) +# define GEN6_CLIP_XY_TEST (1 << 28) +# define GEN6_CLIP_Z_TEST (1 << 27) +# define GEN6_CLIP_GB_TEST (1 << 26) +/** 8-bit field of which user clip distances to clip aganist. */ +# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 +# define GEN6_CLIP_MODE_NORMAL (0 << 13) +# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) +# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) +# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) +# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8) +# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 +# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 +# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 +/* DW3 */ +# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 +# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 +# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5) + +#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */ +/* DW1 */ +# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) +# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) +# define GEN6_SF_STATISTICS_ENABLE (1 << 10) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) +# define GEN6_SF_FRONT_SOLID (0 << 5) +# define GEN6_SF_FRONT_WIREFRAME (1 << 5) +# define GEN6_SF_FRONT_POINT (2 << 5) +# define GEN6_SF_BACK_SOLID (0 << 3) +# define GEN6_SF_BACK_WIREFRAME (1 << 3) +# define GEN6_SF_BACK_POINT (2 << 3) +# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) +# define GEN6_SF_WINDING_CCW (1 << 0) +/* DW3 */ +# define GEN6_SF_LINE_AA_ENABLE (1 << 31) +# define GEN6_SF_CULL_BOTH (0 << 29) +# define GEN6_SF_CULL_NONE (1 << 29) +# define GEN6_SF_CULL_FRONT (2 << 29) +# define GEN6_SF_CULL_BACK (3 << 29) +# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ +# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) +# define GEN6_SF_SCISSOR_ENABLE (1 << 11) +# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) +# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) +# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) +# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) +/* DW4 */ +# define GEN6_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 +# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) +# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) +# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) +# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) +# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) +# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ +/* DW5: depth offset constant */ +/* DW6: depth offset scale */ +/* DW7: depth offset clamp */ +/* DW8 */ +# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) +# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) +# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) +# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) +# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 +# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 +# define ATTRIBUTE_1_SOURCE_SHIFT 16 +# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) +# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) +# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) +# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) +# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 +# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 +# define ATTRIBUTE_0_SOURCE_SHIFT 0 + +# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 +# define ATTRIBUTE_SWIZZLE_SHIFT 6 + +/* DW16: Point sprite texture coordinate enables */ +/* DW17: Constant interpolation enables */ +/* DW18: attr 0-7 wrap shortest enables */ +/* DW19: attr 8-16 wrap shortest enables */ + +#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN6_WM_SPF_MODE (1 << 31) +# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW3: scratch space */ +/* DW4 */ +# define GEN6_WM_STATISTICS_ENABLE (1 << 31) +# define GEN6_WM_DEPTH_CLEAR (1 << 30) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 +/* DW5 */ +# define GEN6_WM_MAX_THREADS_SHIFT 25 +# define GEN6_WM_KILL_ENABLE (1 << 22) +# define GEN6_WM_COMPUTED_DEPTH (1 << 21) +# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN6_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) +# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) +# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) +# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) +# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) +# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11) +# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN6_WM_USES_SOURCE_W (1 << 8) +# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) +# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_WM_POSOFFSET_NONE (0 << 18) +# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) +# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) +# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) +# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) +# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) +# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) +# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) +# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) +# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) +# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) +# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) +# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) +/* DW7: kernel 1 pointer */ +/* DW8: kernel 2 pointer */ + +#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */ +#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */ +#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */ +# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) +# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) +# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) +# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) + +#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */ #define CMD_DRAW_RECT 0x7900 #define CMD_BLEND_CONSTANT_COLOR 0x7901 @@ -827,6 +1127,25 @@ #define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 #define CMD_AA_LINE_PARAMETERS 0x790a +#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */ +/* DW1 */ +# define SVB_INDEX_SHIFT 29 +# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ +/* DW2: SVB index */ +/* DW3: SVB maximum index */ + +#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */ +/* DW1 */ +# define MS_PIXEL_LOCATION_CENTER (0 << 4) +# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define MS_NUMSAMPLES_1 (0 << 1) +# define MS_NUMSAMPLES_4 (2 << 1) +# define MS_NUMSAMPLES_8 (3 << 1) + +#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */ +# define DEPTH_CLEAR_VALID (1 << 15) +/* DW1: depth clear value */ + #define CMD_PIPE_CONTROL 0x7a00 #define CMD_3D_PRIM 0x7b00 @@ -839,8 +1158,8 @@ #define R02_PRIM_END 0x1 #define R02_PRIM_START 0x2 -#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ - (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ +#define URB_SIZES(brw) (brw->gen == 5 ? 1024 : \ + (brw->is_g4x ? 384 : 256)) /* 512 bit units */ diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 28c83515ba9..b093569f0cf 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -49,12 +49,14 @@ struct { [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, @@ -69,13 +71,14 @@ struct { [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, - [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, @@ -144,7 +147,6 @@ char *chan_sel[4] = { }; char *dest_condmod[16] = { - [0] = NULL }; char *debug_ctrl[2] = { @@ -157,6 +159,16 @@ char *saturate[2] = { [1] = ".sat" }; +char *accwr[2] = { + [0] = "", + [1] = "AccWrEnable" +}; + +char *wectrl[2] = { + [0] = "WE_normal", + [1] = "WE_all" +}; + char *exec_size[8] = { [0] = "1", [1] = "2", @@ -204,6 +216,7 @@ char *compr_ctrl[4] = { [0] = "", [1] = "sechalf", [2] = "compr", + [3] = "compr4", }; char *dep_ctrl[4] = { @@ -233,6 +246,16 @@ char *reg_encoding[8] = { [7] = "F" }; +int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + char *imm_encoding[8] = { [0] = "UD", [1] = "D", @@ -321,6 +344,11 @@ char *math_precision[2] = { [1] = "partial_precision" }; +char *urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + char *urb_swizzle[4] = { [BRW_URB_SWIZZLE_NONE] = "", [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", @@ -416,6 +444,11 @@ static int print_opcode (FILE *file, int id) static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) { int err = 0; + + /* Clear the Compr4 instruction compression bit. */ + if (_reg_file == BRW_MESSAGE_REGISTER_FILE) + _reg_nr &= ~(1 << 7); + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { switch (_reg_nr & 0xf0) { case BRW_ARF_NULL: @@ -427,6 +460,9 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) case BRW_ARF_ACCUMULATOR: format (file, "acc%d", _reg_nr & 0x0f); break; + case BRW_ARF_FLAG: + format (file, "f%d", _reg_nr & 0x0f); + break; case BRW_ARF_MASK: format (file, "mask%d", _reg_nr & 0x0f); break; @@ -457,7 +493,7 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) return err; } -static int dest (FILE *file, const struct brw_instruction *inst) +static int dest (FILE *file, struct brw_instruction *inst) { int err = 0; @@ -469,7 +505,8 @@ static int dest (FILE *file, const struct brw_instruction *inst) if (err == -1) return 0; if (inst->bits1.da1.dest_subreg_nr) - format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, ".%d", inst->bits1.da1.dest_subreg_nr / + reg_type_size[inst->bits1.da1.dest_reg_type]); format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); } @@ -477,7 +514,8 @@ static int dest (FILE *file, const struct brw_instruction *inst) { string (file, "g[a0"); if (inst->bits1.ia1.dest_subreg_nr) - format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr / + reg_type_size[inst->bits1.ia1.dest_reg_type]); if (inst->bits1.ia1.dest_indirect_offset) format (file, " %d", inst->bits1.ia1.dest_indirect_offset); string (file, "]"); @@ -493,7 +531,8 @@ static int dest (FILE *file, const struct brw_instruction *inst) if (err == -1) return 0; if (inst->bits1.da16.dest_subreg_nr) - format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + format (file, ".%d", inst->bits1.da16.dest_subreg_nr / + reg_type_size[inst->bits1.da16.dest_reg_type]); string (file, "<1>"); err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); @@ -534,7 +573,7 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, if (err == -1) return 0; if (sub_reg_num) - format (file, ".%d", sub_reg_num); + format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ src_align1_region (file, _vert_stride, _width, _horiz_stride); err |= control (file, "src reg encoding", reg_encoding, type, NULL); return err; @@ -588,11 +627,12 @@ static int src_da16 (FILE *file, if (err == -1) return 0; if (_subreg_nr) - format (file, ".%d", _subreg_nr); + /* bit4 for subreg number byte addressing. Make this same meaning as + in da1 case, so output looks consistent. */ + format (file, ".%d", 16 / reg_type_size[_reg_type]); string (file, "<"); err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); - string (file, ",1,1>"); - err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + string (file, ",4,1>"); /* * Three kinds of swizzle display: * identity - nothing printed @@ -619,11 +659,12 @@ static int src_da16 (FILE *file, err |= control (file, "channel select", chan_sel, swz_z, NULL); err |= control (file, "channel select", chan_sel, swz_w, NULL); } + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); return err; } -static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) { +static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { switch (type) { case BRW_REGISTER_TYPE_UD: format (file, "0x%08xUD", inst->bits3.ud); @@ -652,7 +693,7 @@ static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) { return 0; } -static int src0 (FILE *file, const struct brw_instruction *inst) +static int src0 (FILE *file, struct brw_instruction *inst) { if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) return imm (file, inst->bits1.da1.src0_reg_type, @@ -712,7 +753,7 @@ static int src0 (FILE *file, const struct brw_instruction *inst) } } -static int src1 (FILE *file, const struct brw_instruction *inst) +static int src1 (FILE *file, struct brw_instruction *inst) { if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) return imm (file, inst->bits1.da1.src1_reg_type, @@ -772,7 +813,7 @@ static int src1 (FILE *file, const struct brw_instruction *inst) } } -int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) +int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen) { int err = 0; int space = 0; @@ -822,7 +863,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) err |= src1 (file, inst); } - if (inst->header.opcode == BRW_OPCODE_SEND) { + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) { newline (file); pad (file, 16); space = 0; @@ -842,24 +884,70 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) inst->bits3.math.precision, &space); break; case BRW_MESSAGE_TARGET_SAMPLER: - format (file, " (%d, %d, ", - inst->bits3.sampler.binding_table_index, - inst->bits3.sampler.sampler); - err |= control (file, "sampler target format", sampler_target_format, - inst->bits3.sampler.return_format, NULL); - string (file, ")"); + if (gen >= 5) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen5.binding_table_index, + inst->bits3.sampler_gen5.sampler, + inst->bits3.sampler_gen5.msg_type, + inst->bits3.sampler_gen5.simd_mode); + } else if (0 /* FINISHME: is_g4x */) { + format (file, " (%d, %d)", + inst->bits3.sampler_g4x.binding_table_index, + inst->bits3.sampler_g4x.sampler); + } else { + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + } + break; + case BRW_MESSAGE_TARGET_DATAPORT_READ: + if (gen >= 6) { + format (file, " (%d, %d, %d, %d, %d, %d)", + inst->bits3.dp_render_cache.binding_table_index, + inst->bits3.dp_render_cache.msg_control, + inst->bits3.dp_render_cache.msg_type, + inst->bits3.dp_render_cache.send_commit_msg, + inst->bits3.dp_render_cache.msg_length, + inst->bits3.dp_render_cache.response_length); + } else if (gen >= 5 /* FINISHME: || is_g4x */) { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read_gen5.binding_table_index, + inst->bits3.dp_read_gen5.msg_control, + inst->bits3.dp_read_gen5.msg_type); + } else { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read.binding_table_index, + inst->bits3.dp_read.msg_control, + inst->bits3.dp_read.msg_type); + } break; case BRW_MESSAGE_TARGET_DATAPORT_WRITE: - format (file, " (%d, %d, %d, %d)", - inst->bits3.dp_write.binding_table_index, - (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | - inst->bits3.dp_write.msg_control, - inst->bits3.dp_write.msg_type, - inst->bits3.dp_write.send_commit_msg); + if (gen >= 6) { + format (file, " (%d, %d, %d, %d, %d, %d)", + inst->bits3.dp_render_cache.binding_table_index, + inst->bits3.dp_render_cache.msg_control, + inst->bits3.dp_render_cache.msg_type, + inst->bits3.dp_render_cache.send_commit_msg, + inst->bits3.dp_render_cache.msg_length, + inst->bits3.dp_render_cache.response_length); + } else { + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + } break; case BRW_MESSAGE_TARGET_URB: - format (file, " %d", inst->bits3.urb.offset); - space = 1; + if (gen >= 5) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } err |= control (file, "urb swizzle", urb_swizzle, inst->bits3.urb.swizzle_control, &space); err |= control (file, "urb allocate", urb_allocate, @@ -868,6 +956,11 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) inst->bits3.urb.used, &space); err |= control (file, "urb complete", urb_complete, inst->bits3.urb.complete, &space); + if (gen >= 5) { + format (file, " mlen %d, rlen %d\n", + inst->bits3.urb_gen5.msg_length, + inst->bits3.urb_gen5.response_length); + } break; case BRW_MESSAGE_TARGET_THREAD_SPAWNER: break; @@ -877,10 +970,17 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) } if (space) string (file, " "); - format (file, "mlen %d", - inst->bits3.generic.msg_length); - format (file, " rlen %d", - inst->bits3.generic.response_length); + if (gen >= 5) { + format (file, "mlen %d", + inst->bits3.generic_gen5.msg_length); + format (file, " rlen %d", + inst->bits3.generic_gen5.response_length); + } else { + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } } pad (file, 64); if (inst->header.opcode != BRW_OPCODE_NOP) { @@ -891,7 +991,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); - if (inst->header.opcode == BRW_OPCODE_SEND) + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) err |= control (file, "end of thread", end_of_thread, inst->bits3.generic.end_of_thread, &space); if (space) @@ -905,13 +1006,13 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) int brw_disasm (FILE *file, - const struct brw_instruction *inst, - unsigned count) + struct brw_instruction *inst, + unsigned count, int gen) { int i, err; for (i = 0; i < count; i++) { - err = brw_disasm_insn(stderr, &inst[i]); + err = brw_disasm_insn(stderr, &inst[i], gen); if (err) return err; } diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h index ba5b109c483..ce451ed5a06 100644 --- a/src/gallium/drivers/i965/brw_disasm.h +++ b/src/gallium/drivers/i965/brw_disasm.h @@ -27,10 +27,10 @@ struct brw_instruction; -int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); +int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen); int brw_disasm (FILE *file, - const struct brw_instruction *inst, - unsigned count); + struct brw_instruction *inst, + unsigned count, int gen); #endif diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index cf9405470c8..04ec5c81a6b 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -170,7 +170,7 @@ static int brw_emit_vertex_buffers( struct brw_context *brw ) OUT_RELOC(brw->vb.vb[i].bo, BRW_USAGE_VERTEX, brw->vb.vb[i].offset); - if (BRW_IS_IGDNG(brw)) { + if (brw->gen == 5) { OUT_RELOC(brw->vb.vb[i].bo, BRW_USAGE_VERTEX, brw->vb.vb[i].bo->size - 1); diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 00d8eaccbc4..ba1159e4c32 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -255,19 +255,19 @@ static void brw_set_math_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.math_igdng.function = function; - insn->bits3.math_igdng.int_type = integer_type; - insn->bits3.math_igdng.precision = low_precision; - insn->bits3.math_igdng.saturate = saturate; - insn->bits3.math_igdng.data_type = dataType; - insn->bits3.math_igdng.snapshot = 0; - insn->bits3.math_igdng.header_present = 0; - insn->bits3.math_igdng.response_length = response_length; - insn->bits3.math_igdng.msg_length = msg_length; - insn->bits3.math_igdng.end_of_thread = 0; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; - insn->bits2.send_igdng.end_of_thread = 0; + if (brw->gen == 5) { + insn->bits3.math_gen5.function = function; + insn->bits3.math_gen5.int_type = integer_type; + insn->bits3.math_gen5.precision = low_precision; + insn->bits3.math_gen5.saturate = saturate; + insn->bits3.math_gen5.data_type = dataType; + insn->bits3.math_gen5.snapshot = 0; + insn->bits3.math_gen5.header_present = 0; + insn->bits3.math_gen5.response_length = response_length; + insn->bits3.math_gen5.msg_length = msg_length; + insn->bits3.math_gen5.end_of_thread = 0; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; + insn->bits2.send_gen5.end_of_thread = 0; } else { insn->bits3.math.function = function; insn->bits3.math.int_type = integer_type; @@ -295,18 +295,18 @@ static void brw_set_ff_sync_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.urb_igdng.opcode = 1; - insn->bits3.urb_igdng.offset = offset; - insn->bits3.urb_igdng.swizzle_control = swizzle_control; - insn->bits3.urb_igdng.allocate = allocate; - insn->bits3.urb_igdng.used = used; - insn->bits3.urb_igdng.complete = complete; - insn->bits3.urb_igdng.header_present = 1; - insn->bits3.urb_igdng.response_length = response_length; - insn->bits3.urb_igdng.msg_length = msg_length; - insn->bits3.urb_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + insn->bits3.urb_gen5.opcode = 1; + insn->bits3.urb_gen5.offset = offset; + insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = used; + insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.header_present = 1; + insn->bits3.urb_gen5.response_length = response_length; + insn->bits3.urb_gen5.msg_length = msg_length; + insn->bits3.urb_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } static void brw_set_urb_message( struct brw_context *brw, @@ -322,19 +322,19 @@ static void brw_set_urb_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.urb_igdng.opcode = 0; /* ? */ - insn->bits3.urb_igdng.offset = offset; - insn->bits3.urb_igdng.swizzle_control = swizzle_control; - insn->bits3.urb_igdng.allocate = allocate; - insn->bits3.urb_igdng.used = used; /* ? */ - insn->bits3.urb_igdng.complete = complete; - insn->bits3.urb_igdng.header_present = 1; - insn->bits3.urb_igdng.response_length = response_length; - insn->bits3.urb_igdng.msg_length = msg_length; - insn->bits3.urb_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (brw->gen == 5) { + insn->bits3.urb_gen5.opcode = 0; /* ? */ + insn->bits3.urb_gen5.offset = offset; + insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = used; /* ? */ + insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.header_present = 1; + insn->bits3.urb_gen5.response_length = response_length; + insn->bits3.urb_gen5.msg_length = msg_length; + insn->bits3.urb_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } else { insn->bits3.urb.opcode = 0; /* ? */ insn->bits3.urb.offset = offset; @@ -361,18 +361,18 @@ static void brw_set_dp_write_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; - insn->bits3.dp_write_igdng.msg_control = msg_control; - insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; - insn->bits3.dp_write_igdng.msg_type = msg_type; - insn->bits3.dp_write_igdng.send_commit_msg = 0; - insn->bits3.dp_write_igdng.header_present = 1; - insn->bits3.dp_write_igdng.response_length = response_length; - insn->bits3.dp_write_igdng.msg_length = msg_length; - insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (brw->gen == 5) { + insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_write_gen5.msg_control = msg_control; + insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write_gen5.msg_type = msg_type; + insn->bits3.dp_write_gen5.send_commit_msg = 0; + insn->bits3.dp_write_gen5.header_present = 1; + insn->bits3.dp_write_gen5.response_length = response_length; + insn->bits3.dp_write_gen5.msg_length = msg_length; + insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } else { insn->bits3.dp_write.binding_table_index = binding_table_index; insn->bits3.dp_write.msg_control = msg_control; @@ -398,18 +398,18 @@ static void brw_set_dp_read_message( struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; - insn->bits3.dp_read_igdng.msg_control = msg_control; - insn->bits3.dp_read_igdng.msg_type = msg_type; - insn->bits3.dp_read_igdng.target_cache = target_cache; - insn->bits3.dp_read_igdng.header_present = 1; - insn->bits3.dp_read_igdng.response_length = response_length; - insn->bits3.dp_read_igdng.msg_length = msg_length; - insn->bits3.dp_read_igdng.pad1 = 0; - insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (brw->gen == 5) { + insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_read_gen5.msg_control = msg_control; + insn->bits3.dp_read_gen5.msg_type = msg_type; + insn->bits3.dp_read_gen5.target_cache = target_cache; + insn->bits3.dp_read_gen5.header_present = 1; + insn->bits3.dp_read_gen5.response_length = response_length; + insn->bits3.dp_read_gen5.msg_length = msg_length; + insn->bits3.dp_read_gen5.pad1 = 0; + insn->bits3.dp_read_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits2.send_gen5.end_of_thread = end_of_thread; } else { insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ @@ -437,18 +437,18 @@ static void brw_set_sampler_message(struct brw_context *brw, assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { - insn->bits3.sampler_igdng.binding_table_index = binding_table_index; - insn->bits3.sampler_igdng.sampler = sampler; - insn->bits3.sampler_igdng.msg_type = msg_type; - insn->bits3.sampler_igdng.simd_mode = simd_mode; - insn->bits3.sampler_igdng.header_present = header_present; - insn->bits3.sampler_igdng.response_length = response_length; - insn->bits3.sampler_igdng.msg_length = msg_length; - insn->bits3.sampler_igdng.end_of_thread = eot; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; - insn->bits2.send_igdng.end_of_thread = eot; - } else if (BRW_IS_G4X(brw)) { + if (brw->gen == 5) { + insn->bits3.sampler_gen5.binding_table_index = binding_table_index; + insn->bits3.sampler_gen5.sampler = sampler; + insn->bits3.sampler_gen5.msg_type = msg_type; + insn->bits3.sampler_gen5.simd_mode = simd_mode; + insn->bits3.sampler_gen5.header_present = header_present; + insn->bits3.sampler_gen5.response_length = response_length; + insn->bits3.sampler_gen5.msg_length = msg_length; + insn->bits3.sampler_gen5.end_of_thread = eot; + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_gen5.end_of_thread = eot; + } else if (brw->is_g4x) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; insn->bits3.sampler_g4x.msg_type = msg_type; @@ -478,7 +478,7 @@ static struct brw_instruction *next_insn( struct brw_compile *p, if (0 && (BRW_DEBUG & DEBUG_DISASSEM)) { if (p->nr_insn) - brw_disasm_insn(stderr, &p->store[p->nr_insn-1]); + brw_disasm_insn(stderr, &p->store[p->nr_insn-1], p->brw->gen); } assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); @@ -658,7 +658,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) br = 2; if (p->single_program_flow) { @@ -699,7 +699,7 @@ void brw_ENDIF(struct brw_compile *p, { GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) br = 2; if (p->single_program_flow) { @@ -813,7 +813,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) br = 2; if (p->single_program_flow) @@ -856,7 +856,7 @@ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *landing = &p->store[p->nr_insn]; GLuint jmpi = 1; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 06826635a8a..2a8165b83ee 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -51,13 +51,13 @@ static enum pipe_error compile_gs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); c.key = *key; - c.need_ff_sync = BRW_IS_IGDNG(brw); + c.need_ff_sync = brw->gen == 5; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = c.key.nr_attrs; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index b64ec286cea..6e070f6d756 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -103,7 +103,7 @@ gs_unit_create_from_key(struct brw_context *brw, else gs.thread4.max_threads = 0; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) gs.thread4.rendering_enable = 1; if (BRW_DEBUG & DEBUG_STATS) diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index c635d696617..d53ce6ccfd4 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -239,7 +239,7 @@ static int prepare_depthbuffer(struct brw_context *brw) static int emit_depthbuffer(struct brw_context *brw) { struct pipe_surface *surface = brw->curr.fb.zsbuf; - unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; + unsigned int len = (brw->is_g4x || brw->gen == 5) ? 6 : 5; if (surface == NULL) { BEGIN_BATCH(len, IGNORE_CLIPRECTS); @@ -250,7 +250,7 @@ static int emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) OUT_BATCH(0); ADVANCE_BATCH(); @@ -298,7 +298,7 @@ static int emit_depthbuffer(struct brw_context *brw) ((surface->height - 1) << 19)); OUT_BATCH(0); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) OUT_BATCH(0); ADVANCE_BATCH(); @@ -374,7 +374,7 @@ static int upload_invariant_state( struct brw_context *brw ) struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) ps.header.opcode = CMD_PIPELINE_SELECT_GM45; else ps.header.opcode = CMD_PIPELINE_SELECT_965; @@ -413,7 +413,7 @@ static int upload_invariant_state( struct brw_context *brw ) struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (brw->is_g4x || brw->gen == 5) vfs.opcode = CMD_VF_STATISTICS_GM45; else vfs.opcode = CMD_VF_STATISTICS_965; @@ -424,7 +424,7 @@ static int upload_invariant_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &vfs); } - if (!BRW_IS_965(brw)) + if (!(brw->gen == 4)) { struct brw_aa_line_parameters balp; @@ -480,7 +480,7 @@ static int upload_state_base_address( struct brw_context *brw ) /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ - if (BRW_IS_IGDNG(brw)) { + if (brw->gen == 5) { BEGIN_BATCH(8, IGNORE_CLIPRECTS); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c index 4c1a6d7dcdf..c86681d1495 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.c +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -35,7 +35,7 @@ calculate_clip_key_rast( const struct brw_context *brw, { memset(key, 0, sizeof *key); - if (brw->chipset.is_igdng) + if (brw->gen == 5) key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP; else key->clip_mode = BRW_CLIPMODE_NORMAL; diff --git a/src/gallium/drivers/i965/brw_pipe_surface.c b/src/gallium/drivers/i965/brw_pipe_surface.c index 4deead98b19..58a610089e2 100644 --- a/src/gallium/drivers/i965/brw_pipe_surface.c +++ b/src/gallium/drivers/i965/brw_pipe_surface.c @@ -169,20 +169,15 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, surface->ss.ss1.base_addr = surface->offset - tile_offset; - if (brw_screen->chipset.is_g4x) { - if (tex->tiling == BRW_TILING_X) { - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4; - surface->ss.ss5.y_offset = tile_offset / 512 / 2; - } else { - surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4; + if (tex->tiling == BRW_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4; + surface->ss.ss5.y_offset = tile_offset / 512 / 2; + } else { + surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4; surface->ss.ss5.y_offset = tile_offset / 128 / 2; - } - } - else { - assert(tile_offset == 0); } } diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index e1697687ccc..b23454b5808 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -203,7 +203,7 @@ static void brw_translate_vertex_elements(struct brw_context *brw, brw_velems->ve[i].ve1.vfcomponent2 = comp2; brw_velems->ve[i].ve1.vfcomponent3 = comp3; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) brw_velems->ve[i].ve1.dst_offset = 0; else brw_velems->ve[i].ve1.dst_offset = i * 4; diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h index ba10f9d5df1..53c7c435713 100644 --- a/src/gallium/drivers/i965/brw_reg.h +++ b/src/gallium/drivers/i965/brw_reg.h @@ -93,18 +93,54 @@ #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 #define PCI_CHIP_B43_G 0x2E42 +#define PCI_CHIP_B43_G1 0x2E92 #define PCI_CHIP_ILD_G 0x0042 #define PCI_CHIP_ILM_G 0x0046 -struct brw_chipset { - unsigned pci_id:16; - unsigned is_965:1; - unsigned is_igdng:1; - unsigned is_g4x:1; - unsigned pad:13; -}; - +#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */ +#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 +#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 +#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */ +#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 +#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 +#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */ + +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G || \ + devid == PCI_CHIP_G41_G || \ + devid == PCI_CHIP_B43_G || \ + devid == PCI_CHIP_B43_G1) +#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \ + devid == PCI_CHIP_I965_Q || \ + devid == PCI_CHIP_I965_G_1 || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_I946_GZ || \ + IS_G4X(devid)) + +#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) +#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) +#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_IRONLAKE(devid) IS_GEN5(devid) + +#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_S) + +#define IS_965(devid) (IS_GEN4(devid) || \ + IS_G4X(devid) || \ + IS_GEN5(devid) || \ + IS_GEN6(devid)) /* XXX: hacks */ diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c index fded2da3820..0cb895f35de 100644 --- a/src/gallium/drivers/i965/brw_resource_texture.c +++ b/src/gallium/drivers/i965/brw_resource_texture.c @@ -392,7 +392,7 @@ brw_texture_create( struct pipe_screen *screen, if (tex->compressed == 0 && !bscreen->no_tiling) { - if (bscreen->chipset.is_965 && + if (bscreen->gen < 5 && util_format_is_depth_or_stencil(template->format)) tex->tiling = BRW_TILING_Y; else diff --git a/src/gallium/drivers/i965/brw_resource_texture_layout.c b/src/gallium/drivers/i965/brw_resource_texture_layout.c index 2187bdd82ce..afecc77e312 100644 --- a/src/gallium/drivers/i965/brw_resource_texture_layout.c +++ b/src/gallium/drivers/i965/brw_resource_texture_layout.c @@ -388,7 +388,7 @@ GLboolean brw_texture_layout(struct brw_screen *brw_screen, { switch (tex->b.b.target) { case PIPE_TEXTURE_CUBE: - if (brw_screen->chipset.is_igdng) + if (brw_screen->gen == 5) brw_layout_cubemap_idgng( tex ); else brw_layout_3d_cube( tex ); diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index f5b75b17e36..bf805fd080c 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -97,7 +97,7 @@ brw_get_name(struct pipe_screen *screen) static char buffer[128]; const char *chipset; - switch (brw_screen(screen)->chipset.pci_id) { + switch (brw_screen(screen)->pci_id) { case PCI_CHIP_I965_G: chipset = "I965_G"; break; @@ -405,8 +405,6 @@ struct pipe_screen * brw_screen_create(struct brw_winsys_screen *sws) { struct brw_screen *bscreen; - struct brw_chipset chipset; - #ifdef DEBUG BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); @@ -415,46 +413,30 @@ brw_screen_create(struct brw_winsys_screen *sws) BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0); #endif - memset(&chipset, 0, sizeof chipset); - - chipset.pci_id = sws->pci_id; - - switch (chipset.pci_id) { - case PCI_CHIP_I965_G: - case PCI_CHIP_I965_Q: - case PCI_CHIP_I965_G_1: - case PCI_CHIP_I946_GZ: - case PCI_CHIP_I965_GM: - case PCI_CHIP_I965_GME: - chipset.is_965 = TRUE; - break; - - case PCI_CHIP_GM45_GM: - case PCI_CHIP_IGD_E_G: - case PCI_CHIP_Q45_G: - case PCI_CHIP_G45_G: - case PCI_CHIP_G41_G: - case PCI_CHIP_B43_G: - chipset.is_g4x = TRUE; - break; - - case PCI_CHIP_ILD_G: - case PCI_CHIP_ILM_G: - chipset.is_igdng = TRUE; - break; + bscreen = CALLOC_STRUCT(brw_screen); + if (!bscreen) + return NULL; - default: + bscreen->pci_id = sws->pci_id; + if (IS_GEN6(sws->pci_id)) { + bscreen->gen = 6; + bscreen->needs_ff_sync = TRUE; + } else if (IS_GEN5(sws->pci_id)) { + bscreen->gen = 5; + bscreen->needs_ff_sync = TRUE; + } else if (IS_965(sws->pci_id)) { + bscreen->gen = 4; + if (IS_G4X(sws->pci_id)) { + bscreen->is_g4x = true; + } + } else { debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", - __FUNCTION__, chipset.pci_id); + __FUNCTION__, sws->pci_id); + free(bscreen); return NULL; } - - bscreen = CALLOC_STRUCT(brw_screen); - if (!bscreen) - return NULL; - - bscreen->chipset = chipset; + sws->gen = bscreen->gen; bscreen->sws = sws; bscreen->base.winsys = NULL; bscreen->base.destroy = brw_destroy_screen; diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 58e293bc76f..a62e1afc405 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -43,7 +43,11 @@ struct brw_winsys_screen; struct brw_screen { struct pipe_screen base; - struct brw_chipset chipset; + int gen; + boolean has_negative_rhw_bug; + boolean needs_ff_sync; + boolean is_g4x; + int pci_id; struct brw_winsys_screen *sws; boolean no_tiling; }; diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index 497634ec9ed..901c3341642 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -161,7 +161,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) jmpi = 2; brw_push_insn_state(p); @@ -205,7 +205,7 @@ static void do_flatshade_line( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (BRW_IS_IGDNG(p->brw)) + if (p->brw->gen == 5) jmpi = 2; brw_push_insn_state(p); diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 6c299a86b49..eec024650ce 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -148,7 +148,7 @@ sf_unit_create_from_key(struct brw_context *brw, sf.thread3.dispatch_grf_start_reg = 3; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) sf.thread3.urb_entry_read_offset = 3; else sf.thread3.urb_entry_read_offset = 1; @@ -161,7 +161,7 @@ sf_unit_create_from_key(struct brw_context *brw, /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or * 48(IGDNG) threads */ - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) chipset_max_threads = 48; else chipset_max_threads = 24; diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h index e97ddeb5e1c..b0d75b4f828 100644 --- a/src/gallium/drivers/i965/brw_structs.h +++ b/src/gallium/drivers/i965/brw_structs.h @@ -279,7 +279,7 @@ struct brw_aa_line_parameters struct header header; struct { - GLuint aa_coverage_scope:8; + GLuint aa_coverage_slope:8; GLuint pad0:8; GLuint aa_coverage_bias:8; GLuint pad1:8; @@ -659,7 +659,105 @@ struct brw_clip_unit_state GLfloat viewport_ymax; }; +struct gen6_blend_state +{ + struct { + GLuint dest_blend_factor:5; + GLuint source_blend_factor:5; + GLuint pad3:1; + GLuint blend_func:3; + GLuint pad2:1; + GLuint ia_dest_blend_factor:5; + GLuint ia_source_blend_factor:5; + GLuint pad1:1; + GLuint ia_blend_func:3; + GLuint pad0:1; + GLuint ia_blend_enable:1; + GLuint blend_enable:1; + } blend0; + + struct { + GLuint post_blend_clamp_enable:1; + GLuint pre_blend_clamp_enable:1; + GLuint clamp_range:2; + GLuint pad0:4; + GLuint x_dither_offset:2; + GLuint y_dither_offset:2; + GLuint dither_enable:1; + GLuint alpha_test_func:3; + GLuint alpha_test_enable:1; + GLuint pad1:1; + GLuint logic_op_func:4; + GLuint logic_op_enable:1; + GLuint pad2:1; + GLuint write_disable_b:1; + GLuint write_disable_g:1; + GLuint write_disable_r:1; + GLuint write_disable_a:1; + GLuint pad3:1; + GLuint alpha_to_coverage_dither:1; + GLuint alpha_to_one:1; + GLuint alpha_to_coverage:1; + } blend1; +}; +struct gen6_color_calc_state +{ + struct { + GLuint alpha_test_format:1; + GLuint pad0:14; + GLuint round_disable:1; + GLuint bf_stencil_ref:8; + GLuint stencil_ref:8; + } cc0; + + union { + GLfloat alpha_ref_f; + struct { + GLuint ui:8; + GLuint pad0:24; + } alpha_ref_fi; + } cc1; + + GLfloat constant_r; + GLfloat constant_g; + GLfloat constant_b; + GLfloat constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + GLuint pad0:3; + GLuint bf_stencil_pass_depth_pass_op:3; + GLuint bf_stencil_pass_depth_fail_op:3; + GLuint bf_stencil_fail_op:3; + GLuint bf_stencil_func:3; + GLuint bf_stencil_enable:1; + GLuint pad1:2; + GLuint stencil_write_enable:1; + GLuint stencil_pass_depth_pass_op:3; + GLuint stencil_pass_depth_fail_op:3; + GLuint stencil_fail_op:3; + GLuint stencil_func:3; + GLuint stencil_enable:1; + } ds0; + + struct { + GLuint bf_stencil_write_mask:8; + GLuint bf_stencil_test_mask:8; + GLuint stencil_write_mask:8; + GLuint stencil_test_mask:8; + } ds1; + + struct { + GLuint pad0:26; + GLuint depth_write_enable:1; + GLuint depth_test_func:3; + GLuint pad1:1; + GLuint depth_test_enable:1; + } ds2; +}; struct brw_cc_unit_state { @@ -814,6 +912,13 @@ struct brw_sf_unit_state }; +struct gen6_scissor_rect +{ + GLuint xmin:16; + GLuint ymin:16; + GLuint xmax:16; + GLuint ymax:16; +}; struct brw_gs_unit_state { @@ -825,7 +930,7 @@ struct brw_gs_unit_state struct { GLuint pad0:8; - GLuint rendering_enable:1; /* for IGDNG */ + GLuint rendering_enable:1; /* for Ironlake */ GLuint pad4:1; GLuint stats_enable:1; GLuint nr_urb_entries:7; @@ -935,7 +1040,7 @@ struct brw_wm_unit_state GLfloat global_depth_offset_constant; GLfloat global_depth_offset_scale; - /* for IGDNG only */ + /* for Ironlake only */ struct { GLuint pad0:1; GLuint grf_reg_count_1:3; @@ -962,6 +1067,15 @@ struct brw_sampler_default_color { GLfloat color[4]; }; +struct gen5_sampler_default_color { + uint8_t ub[4]; + float f[4]; + uint16_t hf[4]; + uint16_t us[4]; + int16_t s[4]; + uint8_t b[4]; +}; + struct brw_sampler_state { @@ -973,7 +1087,7 @@ struct brw_sampler_state GLuint mag_filter:3; GLuint mip_filter:2; GLuint base_level:5; - GLuint pad:1; + GLuint min_mag_neq:1; GLuint lod_preclamp:1; GLuint default_color_mode:1; GLuint pad0:1; @@ -985,7 +1099,8 @@ struct brw_sampler_state GLuint r_wrap_mode:3; GLuint t_wrap_mode:3; GLuint s_wrap_mode:3; - GLuint pad:3; + GLuint cube_control_mode:1; + GLuint pad:2; GLuint max_lod:10; GLuint min_lod:10; } ss1; @@ -999,7 +1114,9 @@ struct brw_sampler_state struct brw_ss3 { - GLuint pad:19; + GLuint non_normalized_coord:1; + GLuint pad:12; + GLuint address_round:6; GLuint max_aniso:3; GLuint chroma_key_mode:1; GLuint chroma_key_index:2; @@ -1044,6 +1161,15 @@ struct brw_sf_viewport } scissor; }; +struct gen6_sf_viewport { + GLfloat m00; + GLfloat m11; + GLfloat m22; + GLfloat m30; + GLfloat m31; + GLfloat m32; +}; + /* Documented in the subsystem/shared-functions/sampler chapter... */ struct brw_surface_state @@ -1055,7 +1181,12 @@ struct brw_surface_state GLuint cube_neg_y:1; GLuint cube_pos_x:1; GLuint cube_neg_x:1; - GLuint pad:4; + GLuint pad:2; + /* Required on gen6 for surfaces accessed through render cache messages. + */ + GLuint render_cache_read_write:1; + /* Ironlake and newer: instead of replicating one of the texels */ + GLuint cube_corner_average:1; GLuint mipmap_layout_mode:1; GLuint vert_line_stride_ofs:1; GLuint vert_line_stride:1; @@ -1202,7 +1333,8 @@ struct brw_instruction GLuint predicate_inverse:1; GLuint execution_size:3; GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ - GLuint pad0:2; + GLuint acc_wr_control:1; + GLuint cmpt_control:1; GLuint debug_control:1; GLuint saturate:1; } header; @@ -1250,7 +1382,7 @@ struct brw_instruction GLuint dest_writemask:4; GLuint dest_subreg_nr:1; GLuint dest_reg_nr:8; - GLuint pad1:2; + GLuint dest_horiz_stride:2; GLuint dest_address_mode:1; } da16; @@ -1264,9 +1396,21 @@ struct brw_instruction GLuint dest_writemask:4; GLint dest_indirect_offset:6; GLuint dest_subreg_nr:3; - GLuint pad1:2; + GLuint dest_horiz_stride:2; GLuint dest_address_mode:1; } ia16; + + struct { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; + GLuint src1_reg_type:3; + GLuint pad:1; + + GLint jump_count:16; + } branch_gen6; } bits1; @@ -1339,7 +1483,7 @@ struct brw_instruction GLuint end_of_thread:1; GLuint pad1:1; GLuint sfid:4; - } send_igdng; /* for IGDNG only */ + } send_gen5; /* for Ironlake only */ } bits2; @@ -1413,6 +1557,21 @@ struct brw_instruction GLuint pad0:12; } if_else; + struct + { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + struct { GLuint function:4; GLuint int_type:1; @@ -1440,7 +1599,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } math_igdng; + } math_gen5; struct { GLuint binding_table_index:8; @@ -1476,7 +1635,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } sampler_igdng; + } sampler_gen5; struct brw_urb_immediate urb; @@ -1494,7 +1653,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } urb_igdng; + } urb_gen5; struct { GLuint binding_table_index:8; @@ -1510,6 +1669,18 @@ struct brw_instruction struct { GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint msg_type:3; + GLuint target_cache:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_read_g4x; + + struct { + GLuint binding_table_index:8; GLuint msg_control:3; GLuint msg_type:3; GLuint target_cache:2; @@ -1519,7 +1690,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } dp_read_igdng; + } dp_read_gen5; struct { GLuint binding_table_index:8; @@ -1546,10 +1717,38 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } dp_write_igdng; + } dp_write_gen5; + + /* Sandybridge DP for sample cache, constant cache, render cache */ + struct { + GLuint binding_table_index:8; + GLuint msg_control:5; + GLuint msg_type:3; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_sampler_const_cache; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint slot_group_select:1; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:4; + GLuint send_commit_msg:1; + GLuint pad0:1; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_render_cache; struct { - GLuint pad:16; + GLuint function_control:16; GLuint response_length:4; GLuint msg_length:4; GLuint msg_target:4; @@ -1557,14 +1756,15 @@ struct brw_instruction GLuint end_of_thread:1; } generic; + /* Of this struct, only end_of_thread is not present for gen6. */ struct { - GLuint pad:19; + GLuint function_control:19; GLuint header_present:1; GLuint response_length:5; GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } generic_igdng; + } generic_gen5; GLint d; GLuint ud; diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c index cd40fc6d618..f3de2f995b3 100644 --- a/src/gallium/drivers/i965/brw_structs_dump.c +++ b/src/gallium/drivers/i965/brw_structs_dump.c @@ -72,7 +72,7 @@ brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr) { debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); - debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope); + debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_slope); debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias); debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope); debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias); diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index 907ec56c6ca..b630752809e 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -147,7 +147,7 @@ static int recalculate_urb_fence( struct brw_context *brw ) brw->urb.constrained = 0; - if (BRW_IS_IGDNG(brw)) { + if (brw->gen == 5) { brw->urb.nr_vs_entries = 128; brw->urb.nr_sf_entries = 48; if (check_urb_layout(brw)) { @@ -157,7 +157,7 @@ static int recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; } - } else if (BRW_IS_G4X(brw)) { + } else if (brw->is_g4x) { brw->urb.nr_vs_entries = 64; if (check_urb_layout(brw)) { goto done; diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index 944d88c84cc..b6d1091618e 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -56,7 +56,6 @@ struct brw_vs_compile { struct brw_compile func; struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; - struct brw_chipset chipset; struct brw_vertex_shader *vp; diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 5dcbd597ddc..559f0c61d8d 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -116,6 +116,7 @@ static boolean find_output_slot( struct brw_vs_compile *c, */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { + struct brw_context *brw = c->func.brw; GLuint i, reg = 0, subreg = 0, mrf; int attributes_in_vue; @@ -218,7 +219,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ c->nr_outputs = c->prog_data.nr_outputs; - if (c->chipset.is_igdng) + if (brw->gen == 5) mrf = 8; else mrf = 4; @@ -333,7 +334,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); - if (c->chipset.is_igdng) + if (brw->gen == 5) c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; @@ -1124,6 +1125,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, static void emit_vertex_write( struct brw_vs_compile *c) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_reg m0 = brw_message_reg(0); struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; @@ -1143,7 +1145,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ if (c->prog_data.writes_psiz || c->key.nr_userclip || - c->chipset.is_965) + brw->has_negative_rhw_bug) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1174,7 +1176,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (c->chipset.is_965) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -1202,7 +1204,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, offset(m0, 2), ndc); - if (c->chipset.is_igdng) { + if (brw->gen == 5) { /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ /* m4, m5 contain the distances from vertex to the user clip planeXXX. @@ -1339,6 +1341,7 @@ static void emit_insn(struct brw_vs_compile *c, unsigned opcode = inst->Instruction.Opcode; unsigned label = inst->Label.Label; struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_reg args[3], dst; GLuint i; @@ -1514,7 +1517,7 @@ static void emit_insn(struct brw_vs_compile *c, c->loop_depth--; - if (c->chipset.is_igdng) + if (brw->gen == 5) br = 2; inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]); @@ -1652,6 +1655,6 @@ void brw_vs_emit(struct brw_vs_compile *c) if (BRW_DEBUG & DEBUG_VS) { debug_printf("vs-native:\n"); - brw_disasm(stderr, p->store, p->nr_insn); + brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen); } } diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index dadbb622e4d..6d2ccfd6d98 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -100,7 +100,7 @@ vs_unit_create_from_key(struct brw_context *brw, */ vs.thread1.single_program_flow = 0; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ else vs.thread1.binding_table_entry_count = key->nr_surfaces; @@ -111,16 +111,16 @@ vs_unit_create_from_key(struct brw_context *brw, vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; else vs.thread4.nr_urb_entries = key->nr_urb_entries; vs.thread4.urb_entry_allocation_size = key->urb_size - 1; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) chipset_max_threads = 72; - else if (BRW_IS_G4X(brw)) + else if (brw->is_g4x) chipset_max_threads = 32; else chipset_max_threads = 16; diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index a06f8bb7d61..038f6f788a0 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -148,7 +148,7 @@ static INLINE void make_reloc(struct brw_winsys_reloc *reloc, struct brw_winsys_screen { unsigned pci_id; - + int gen; /** * Buffer functions. */ @@ -282,7 +282,7 @@ void brw_dump_data( unsigned pci_id, enum brw_buffer_data_type data_type, unsigned offset, const void *data, - size_t size ); + size_t size, int gen ); #endif diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c index f8f6a539bc9..b66b1cfccb6 100644 --- a/src/gallium/drivers/i965/brw_winsys_debug.c +++ b/src/gallium/drivers/i965/brw_winsys_debug.c @@ -9,7 +9,7 @@ void brw_dump_data( unsigned pci_id, enum brw_buffer_data_type data_type, unsigned offset, const void *data, - size_t size ) + size_t size, int gen ) { if (BRW_DUMP & DUMP_ASM) { switch (data_type) { @@ -18,7 +18,7 @@ void brw_dump_data( unsigned pci_id, case BRW_DATA_GS_VS_PROG: case BRW_DATA_GS_GS_PROG: case BRW_DATA_GS_CLIP_PROG: - brw_disasm( stderr, data, size / sizeof(struct brw_instruction) ); + brw_disasm( stderr, (struct brw_instruction *)data, size / sizeof(struct brw_instruction), gen ); break; default: break; @@ -77,7 +77,7 @@ void brw_dump_data( unsigned pci_id, if (BRW_DUMP & DUMP_BATCH) { switch (data_type) { case BRW_DATA_BATCH_BUFFER: - intel_decode(data, size / 4, offset, pci_id); + intel_decode(data, size / 4, offset, pci_id, 0); break; default: break; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 8f983a60ae8..6301062fd79 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -848,11 +848,11 @@ static void emit_tex( struct brw_wm_compile *c, responseLength = 8; /* always */ - if (BRW_IS_IGDNG(p->brw)) { + if (p->brw->gen == 5) { if (shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; } else { if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; @@ -917,8 +917,8 @@ static void emit_txb( struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(8), coord[3]); msgLength = 9; - if (BRW_IS_IGDNG(p->brw)) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; + if (p->brw->gen == 5) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; @@ -1516,6 +1516,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) if (BRW_DEBUG & DEBUG_WM) { debug_printf("wm-native:\n"); - brw_disasm(stderr, p->store, p->nr_insn); + brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen); } } diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index 3b3afc39d3c..fb8e40d928e 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -1607,7 +1607,7 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ - if (BRW_IS_IGDNG(p->brw)) { + if (p->brw->gen == 5) { msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; } else { /* Does it work well on SIMD8? */ @@ -1688,7 +1688,7 @@ static void emit_tex(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } - if (BRW_IS_IGDNG(p->brw)) { + if (p->brw->gen == 5) { if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; else @@ -1970,7 +1970,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_ struct brw_instruction *inst0, *inst1; GLuint br = 1; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) br = 2; loop_depth--; diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index efc2d96be13..a690003ecbd 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -70,9 +70,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->max_threads = 1; else { /* WM maximum threads is number of EUs times number of threads per EU. */ - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) key->max_threads = 12 * 6; - else if (BRW_IS_G4X(brw)) + else if (brw->is_g4x) key->max_threads = 10 * 5; else key->max_threads = 8 * 4; @@ -155,7 +155,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm.thread1.binding_table_entry_count = key->nr_surfaces; @@ -174,7 +174,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) + if (brw->gen == 5) wm.wm4.sampler_count = 0; /* hardware requirement */ else wm.wm4.sampler_count = (key->sampler_count + 1) / 4; @@ -277,7 +277,7 @@ static enum pipe_error upload_wm_unit( struct brw_context *brw ) grf_reg_count = (align(key.total_grf, 16) / 16 - 1); per_thread_scratch_space = key.total_scratch / 1024 - 1; stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm; - sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4; + sampler_count = brw->gen == 5 ? 0 :(key.sampler_count + 1) / 4; /* Emit WM program relocation */ make_reloc(&reloc[nr_reloc++], diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c index 36c04a31655..1abe869f1ae 100644 --- a/src/gallium/drivers/i965/intel_decode.c +++ b/src/gallium/drivers/i965/intel_decode.c @@ -42,10 +42,11 @@ #include "util/u_memory.h" #include "util/u_string.h" + #include "intel_decode.h" +#include "brw_reg.h" /*#include "intel_chipset.h"*/ -#define IS_965(x) 1 /* XXX */ #define IS_9XX(x) 1 /* XXX */ #define BUFFER_FAIL(_count, _len, _name) do { \ @@ -99,10 +100,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) } opcodes_mi[] = { { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" }, { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" }, + { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" }, { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" }, { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" }, { 0x04, 0, 1, 1, "MI_FLUSH" }, - { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" }, + { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" }, { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" }, { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" }, { 0x00, 0, 1, 1, "MI_NOOP" }, @@ -116,6 +118,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" }, }; + switch ((data[0] & 0x1f800000) >> 23) { + case 0x0a: + instr_out(data, hw_offset, 0, "MI_BATCH_BUFFER_END\n"); + return -1; + } for (opcode = 0; opcode < Elements(opcodes_mi); opcode++) { if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { @@ -308,9 +315,13 @@ decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) static int decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { - switch ((data[0] & 0x00f80000) >> 19) { + uint32_t opcode; + + opcode = (data[0] & 0x00f80000) >> 19; + + switch (opcode) { case 0x11: - instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n"); + instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n"); return 1; case 0x10: instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n"); @@ -326,7 +337,8 @@ decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures) return 1; } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n", + opcode); (*failures)++; return 1; } @@ -384,7 +396,7 @@ i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask sprintf(dstname, "oD%s%s", dstmask, sat); break; case 6: - if (dst_nr > 2) + if (dst_nr > 3) fprintf(out, "bad destination reg U%d\n", dst_nr); sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat); break; @@ -455,7 +467,7 @@ i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) break; case 6: sprintf(name, "U%d", src_nr); - if (src_nr > 2) + if (src_nr > 3) fprintf(out, "bad src reg %s\n", name); break; default: @@ -800,10 +812,14 @@ i915_decode_instruction(const uint32_t *data, uint32_t hw_offset, } static int -decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830) +decode_3d_1d(const uint32_t *data, int count, + uint32_t hw_offset, + uint32_t devid, + int *failures) { - unsigned int len, i, c, opcode, word, map, sampler, instr; + unsigned int len, i, c, idx, word, map, sampler, instr; char *format; + uint32_t opcode; struct { uint32_t opcode; @@ -814,7 +830,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, } opcodes_3d_1d[] = { { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" }, { 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" }, - { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" }, + { 0x9c, 0, 7, 7, "3DSTATE_CLEAR_PARAMETERS" }, { 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, { 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, { 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, @@ -822,7 +838,6 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, { 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, { 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" }, { 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" }, - { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" }, { 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, { 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" }, { 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" }, @@ -834,9 +849,11 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, { 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" }, { 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" }, { 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" }, - }; + }, *opcode_3d_1d; - switch ((data[0] & 0x00ff0000) >> 16) { + opcode = (data[0] & 0x00ff0000) >> 16; + + switch (opcode) { case 0x07: /* This instruction is unusual. A 0 length means just 1 DWORD instead of * 2. The 0 length is specified in one place to be unsupported, but @@ -891,26 +908,56 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); len = (data[0] & 0x0000000f) + 2; i = 1; - for (word = 0; word <= 7; word++) { + for (word = 0; word <= 8; word++) { if (data[0] & (1 << (4 + word))) { if (i >= count) BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1"); /* save vertex state for decode */ - if (word == 2) { - saved_s2_set = 1; - saved_s2 = data[i]; - } - if (word == 4) { - saved_s4_set = 1; - saved_s4 = data[i]; + if (IS_9XX(devid)) { + if (word == 2) { + saved_s2_set = 1; + saved_s2 = data[i]; + } + if (word == 4) { + saved_s4_set = 1; + saved_s4 = data[i]; + } } instr_out(data, hw_offset, i++, "S%d\n", word); } } if (len != i) { - fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n"); + fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + (*failures)++; + } + return len; + case 0x03: + instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 6; word <= 14; word++) { + if (data[0] & (1 << word)) { + if (i >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_2"); + + if (word == 6) + instr_out(data, hw_offset, i++, "TBCF\n"); + else if (word >= 7 && word <= 10) { + instr_out(data, hw_offset, i++, "TB%dC\n", word - 7); + instr_out(data, hw_offset, i++, "TB%dA\n", word - 7); + } else if (word >= 11 && word <= 14) { + instr_out(data, hw_offset, i++, "TM%dS0\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS1\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS2\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS3\n", word - 11); + instr_out(data, hw_offset, i++, "TM%dS4\n", word - 11); + } + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); (*failures)++; } return len; @@ -922,11 +969,27 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, i = 2; for (map = 0; map <= 15; map++) { if (data[1] & (1 << map)) { + int width, height, pitch, dword; + const char *tiling; + if (i + 3 >= count) BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE"); instr_out(data, hw_offset, i++, "map %d MS2\n", map); - instr_out(data, hw_offset, i++, "map %d MS3\n", map); - instr_out(data, hw_offset, i++, "map %d MS4\n", map); + + dword = data[i]; + width = ((dword >> 10) & ((1 << 11) - 1))+1; + height = ((dword >> 21) & ((1 << 11) - 1))+1; + + tiling = "none"; + if (dword & (1 << 2)) + tiling = "fenced"; + else if (dword & (1 << 1)) + tiling = dword & (1 << 0) ? "Y" : "X"; + instr_out(data, hw_offset, i++, "map %d MS3 [width=%d, height=%d, tiling=%s]\n", map, width, height, tiling); + + dword = data[i]; + pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1); + instr_out(data, hw_offset, i++, "map %d MS4 [pitch=%d]\n", map, pitch); } } if (len != i) { @@ -982,8 +1045,8 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, } return len; case 0x01: - if (i830) - break; + if (!IS_9XX(devid)) + break; instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n"); instr_out(data, hw_offset, 1, "mask\n"); len = (data[0] & 0x0000003f) + 2; @@ -1034,30 +1097,61 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, format, (data[1] & (1 << 31)) ? "en" : "dis"); return len; + + case 0x8e: + { + const char *name, *tiling; + + len = (data[0] & 0x0000000f) + 2; + if (len != 3) + fprintf(out, "Bad count in 3DSTATE_BUFFER_INFO\n"); + if (count < 3) + BUFFER_FAIL(count, len, "3DSTATE_BUFFER_INFO"); + + switch((data[1] >> 24) & 0x7) { + case 0x3: name = "color"; break; + case 0x7: name = "depth"; break; + default: name = "unknown"; break; + } + + tiling = "none"; + if (data[1] & (1 << 23)) + tiling = "fenced"; + else if (data[1] & (1 << 22)) + tiling = data[1] & (1 << 21) ? "Y" : "X"; + + instr_out(data, hw_offset, 0, "3DSTATE_BUFFER_INFO\n"); + instr_out(data, hw_offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff); + + instr_out(data, hw_offset, 2, "address\n"); + return len; + } } - for (opcode = 0; opcode < Elements(opcodes_3d_1d); opcode++) { - if (opcodes_3d_1d[opcode].i830_only && !i830) + for (idx = 0; idx < Elements(opcodes_3d_1d); idx++) + { + opcode_3d_1d = &opcodes_3d_1d[idx]; + if (opcode_3d_1d->i830_only && IS_9XX(devid)) continue; - if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) { + if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) { len = 1; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name); - if (opcodes_3d_1d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d_1d->name); + if (opcode_3d_1d->max_len > 1) { len = (data[0] & 0x0000ffff) + 2; - if (len < opcodes_3d_1d[opcode].min_len || - len > opcodes_3d_1d[opcode].max_len) + if (len < opcode_3d_1d->min_len || + len > opcode_3d_1d->max_len) { fprintf(out, "Bad count in %s\n", - opcodes_3d_1d[opcode].name); + opcode_3d_1d->name); (*failures)++; } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d_1d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } @@ -1065,7 +1159,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode); (*failures)++; return 1; } @@ -1075,8 +1169,10 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { char immediate = (data[0] & (1 << 23)) == 0; - unsigned int len, i; + unsigned int len, i, ret; char *primtype; + int original_s2 = saved_s2; + int original_s4 = saved_s4; switch ((data[0] >> 18) & 0xf) { case 0x0: primtype = "TRILIST"; break; @@ -1089,7 +1185,7 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, case 0x7: primtype = "RECTLIST"; break; case 0x8: primtype = "POINTLIST"; break; case 0x9: primtype = "DIB"; break; - case 0xa: primtype = "CLEAR_RECT"; break; + case 0xa: primtype = "CLEAR_RECT"; saved_s4 = 3 << 6; saved_s2 = ~0; break; default: primtype = "unknown"; break; } @@ -1193,6 +1289,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, vertex++; } } + + ret = len; } else { /* indirect vertices */ len = data[0] & 0x0000ffff; /* index count */ @@ -1210,13 +1308,15 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, if ((data[i] & 0xffff) == 0xffff) { instr_out(data, hw_offset, i, " indices: (terminator)\n"); - return i; + ret = i; + goto out; } else if ((data[i] >> 16) == 0xffff) { instr_out(data, hw_offset, i, " indices: 0x%04x, " "(terminator)\n", data[i] & 0xffff); - return i; + ret = i; + goto out; } else { instr_out(data, hw_offset, i, " indices: 0x%04x, 0x%04x\n", @@ -1226,7 +1326,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, fprintf(out, "3DPRIMITIVE: no terminator found in index buffer\n"); (*failures)++; - return count; + ret = count; + goto out; } else { /* fixed size vertex index buffer */ for (i = 0; i < len; i += 2) { @@ -1241,7 +1342,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, } } } - return (len + 1) / 2 + 1; + ret = (len + 1) / 2 + 1; + goto out; } else { /* sequential vertex access */ if (count < 2) @@ -1250,17 +1352,22 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, "3DPRIMITIVE sequential indirect %s, %d starting from " "%d\n", primtype, len, data[1] & 0xffff); instr_out(data, hw_offset, 1, " start\n"); - return 2; + ret = 2; + goto out; } } - return len; +out: + saved_s2 = original_s2; + saved_s4 = original_s4; + return ret; } static int -decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures) { - unsigned int opcode; + uint32_t opcode; + unsigned int idx; struct { uint32_t opcode; @@ -1277,41 +1384,44 @@ decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { 0x0d, 1, 1, "3DSTATE_MODES_4" }, { 0x0c, 1, 1, "3DSTATE_MODES_5" }, { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, - }; + }, *opcode_3d; + + opcode = (data[0] & 0x1f000000) >> 24; - switch ((data[0] & 0x1f000000) >> 24) { + switch (opcode) { case 0x1f: return decode_3d_primitive(data, count, hw_offset, failures); case 0x1d: - return decode_3d_1d(data, count, hw_offset, failures, 0); + return decode_3d_1d(data, count, hw_offset, devid, failures); case 0x1c: return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { - if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + for (idx = 0; idx < Elements(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if (opcode == opcode_3d->opcode) { unsigned int len = 1, i; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); - if (opcodes_3d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { len = (data[0] & 0xff) + 2; - if (len < opcodes_3d[opcode].min_len || - len > opcodes_3d[opcode].max_len) + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { - fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + fprintf(out, "Bad count in %s\n", opcode_3d->name); } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } return len; } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode); (*failures)++; return 1; } @@ -1401,12 +1511,87 @@ get_965_prim_type(uint32_t data) default: return "fail"; } } +static int +i965_decode_urb_fence(const uint32_t *data, uint32_t hw_offset, int len, int count, + int *failures) +{ + uint32_t vs_fence, clip_fence, gs_fence, sf_fence, vfe_fence, cs_fence; + + if (len != 3) + fprintf(out, "Bad count in URB_FENCE\n"); + if (count < 3) + BUFFER_FAIL(count, len, "URB_FENCE"); + + vs_fence = data[1] & 0x3ff; + gs_fence = (data[1] >> 10) & 0x3ff; + clip_fence = (data[1] >> 20) & 0x3ff; + sf_fence = data[2] & 0x3ff; + vfe_fence = (data[2] >> 10) & 0x3ff; + cs_fence = (data[2] >> 20) & 0x7ff; + + instr_out(data, hw_offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + instr_out(data, hw_offset, 1, + "vs fence: %d, clip_fence: %d, gs_fence: %d\n", + vs_fence, clip_fence, gs_fence); + instr_out(data, hw_offset, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + sf_fence, vfe_fence, cs_fence); + if (gs_fence < vs_fence) + fprintf(out, "gs fence < vs fence!\n"); + if (clip_fence < gs_fence) + fprintf(out, "clip fence < gs fence!\n"); + if (sf_fence < clip_fence) + fprintf(out, "sf fence < clip fence!\n"); + if (cs_fence < sf_fence) + fprintf(out, "cs fence < sf fence!\n"); + + return len; +} + +static void +state_base_out(const uint32_t *data, uint32_t hw_offset, unsigned int index, + char *name) +{ + if (data[index] & 1) { + instr_out(data, hw_offset, index, "%s state base address 0x%08x\n", + name, data[index] & ~1); + } else { + instr_out(data, hw_offset, index, "%s state base not updated\n", + name); + } +} + +static void +state_max_out(const uint32_t *data, uint32_t hw_offset, unsigned int index, + char *name) +{ + if (data[index] & 1) { + if (data[index] == 1) { + instr_out(data, hw_offset, index, + "%s state upper bound disabled\n", name); + } else { + instr_out(data, hw_offset, index, "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + } + } else { + instr_out(data, hw_offset, index, "%s state upper bound not updated\n", + name); + } +} static int -decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures) { - unsigned int opcode, len; - int i; + uint32_t opcode; + unsigned int idx, len; + int i, sba_len; + char *desc1 = NULL; struct { uint32_t opcode; @@ -1435,51 +1620,78 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, { 0x7b00, 6, 6, "3DPRIMITIVE" }, - }; + { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" }, + { 0x7810, 6, 6, "3DSTATE_VS_STATE" }, + { 0x7811, 7, 7, "3DSTATE_GS_STATE" }, + { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" }, + { 0x7813, 20, 20, "3DSTATE_SF_STATE" }, + { 0x7814, 9, 9, "3DSTATE_WM_STATE" }, + { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }, *opcode_3d; len = (data[0] & 0x0000ffff) + 2; - switch ((data[0] & 0xffff0000) >> 16) { + opcode = (data[0] & 0xffff0000) >> 16; + switch (opcode) { + case 0x6000: + len = (data[0] & 0x000000ff) + 2; + return i965_decode_urb_fence(data, hw_offset, len, count, failures); + case 0x6001: + instr_out(data, hw_offset, 0, "CS_URB_STATE\n"); + instr_out(data, hw_offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, + data[1] & 0x7); + return len; + case 0x6002: + len = (data[0] & 0x000000ff) + 2; + instr_out(data, hw_offset, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + instr_out(data, hw_offset, 1, "offset: 0x%08x, length: %d bytes\n", + data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); + return len; case 0x6101: - if (len != 6) + if (IS_GEN6(devid)) + sba_len = 10; + else if (IS_IRONLAKE(devid)) + sba_len = 8; + else + sba_len = 6; + if (len != sba_len) fprintf(out, "Bad count in STATE_BASE_ADDRESS\n"); - if (count < 6) + if (len != sba_len) BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS"); + i = 0; instr_out(data, hw_offset, 0, "STATE_BASE_ADDRESS\n"); - - if (data[1] & 1) { - instr_out(data, hw_offset, 1, "General state at 0x%08x\n", - data[1] & ~1); - } else - instr_out(data, hw_offset, 1, "General state not updated\n"); - - if (data[2] & 1) { - instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n", - data[2] & ~1); - } else - instr_out(data, hw_offset, 2, "Surface state not updated\n"); - - if (data[3] & 1) { - instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n", - data[3] & ~1); - } else - instr_out(data, hw_offset, 3, "Indirect state not updated\n"); - - if (data[4] & 1) { - instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n", - data[4] & ~1); - } else - instr_out(data, hw_offset, 4, "General state not updated\n"); - - if (data[5] & 1) { - instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n", - data[5] & ~1); - } else - instr_out(data, hw_offset, 5, "Indirect state not updated\n"); + i++; + + state_base_out(data, hw_offset, i++, "general"); + state_base_out(data, hw_offset, i++, "surface"); + if (IS_GEN6(devid)) + state_base_out(data, hw_offset, i++, "dynamic"); + state_base_out(data, hw_offset, i++, "indirect"); + if (IS_IRONLAKE(devid) || IS_GEN6(devid)) + state_base_out(data, hw_offset, i++, "instruction"); + + state_max_out(data, hw_offset, i++, "general"); + if (IS_GEN6(devid)) + state_max_out(data, hw_offset, i++, "dynamic"); + state_max_out(data, hw_offset, i++, "indirect"); + if (IS_IRONLAKE(devid) || IS_GEN6(devid)) + state_max_out(data, hw_offset, i++, "instruction"); return len; case 0x7800: @@ -1498,18 +1710,33 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures instr_out(data, hw_offset, 6, "CC state\n"); return len; case 0x7801: - if (len != 6) + len = (data[0] & 0x000000ff) + 2; + if (len != 6 && len != 4) fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n"); - if (count < 6) - BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); + if (len == 6) { + if (count < 6) + BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); + instr_out(data, hw_offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + instr_out(data, hw_offset, 1, "VS binding table\n"); + instr_out(data, hw_offset, 2, "GS binding table\n"); + instr_out(data, hw_offset, 3, "Clip binding table\n"); + instr_out(data, hw_offset, 4, "SF binding table\n"); + instr_out(data, hw_offset, 5, "WM binding table\n"); + } else { + if (count < 4) + BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); - instr_out(data, hw_offset, 0, - "3DSTATE_BINDING_TABLE_POINTERS\n"); - instr_out(data, hw_offset, 1, "VS binding table\n"); - instr_out(data, hw_offset, 2, "GS binding table\n"); - instr_out(data, hw_offset, 3, "Clip binding table\n"); - instr_out(data, hw_offset, 4, "SF binding table\n"); - instr_out(data, hw_offset, 5, "WM binding table\n"); + instr_out(data, hw_offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, " + "GS mod %d, PS mod %d\n", + (data[0] & (1 << 8)) != 0, + (data[0] & (1 << 9)) != 0, + (data[0] & (1 << 10)) != 0); + instr_out(data, hw_offset, 1, "VS binding table\n"); + instr_out(data, hw_offset, 2, "GS binding table\n"); + instr_out(data, hw_offset, 3, "WM binding table\n"); + } return len; @@ -1560,6 +1787,18 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures } return len; + case 0x780d: + len = (data[0] & 0xff) + 2; + if (len != 4) + fprintf(out, "Bad count in 3DSTATE_VIEWPORT_STATE_POINTERS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VIEWPORT_STATE_POINTERS"); + instr_out(data, hw_offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n"); + instr_out(data, hw_offset, 1, "clip\n"); + instr_out(data, hw_offset, 2, "sf\n"); + instr_out(data, hw_offset, 3, "cc\n"); + return len; + case 0x780a: len = (data[0] & 0xff) + 2; if (len != 3) @@ -1592,7 +1831,7 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures return len; case 0x7905: - if (len != 5 && len != 6) + if (len < 5 || len > 7) fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n"); if (count < len) BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER"); @@ -1609,9 +1848,36 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures ((data[3] & 0x0007ffc0) >> 6) + 1, ((data[3] & 0xfff80000) >> 19) + 1); instr_out(data, hw_offset, 4, "volume depth\n"); - if (len == 6) + if (len >= 6) instr_out(data, hw_offset, 5, "\n"); + if (len >= 7) + instr_out(data, hw_offset, 6, "render target view extent\n"); + + return len; + case 0x7a00: + len = (data[0] & 0xff) + 2; + if (len != 4) + fprintf(out, "Bad count in PIPE_CONTROL\n"); + if (count < len) + BUFFER_FAIL(count, len, "PIPE_CONTROL"); + + switch ((data[0] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + instr_out(data, hw_offset, 0, + "PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, " + "%sinst flush\n", + desc1, + data[0] & (1 << 13) ? "" : "no ", + data[0] & (1 << 12) ? "" : "no ", + data[0] & (1 << 11) ? "" : "no "); + instr_out(data, hw_offset, 1, "destination address\n"); + instr_out(data, hw_offset, 2, "immediate dword low\n"); + instr_out(data, hw_offset, 3, "immediate dword high\n"); return len; case 0x7b00: @@ -1633,39 +1899,41 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures return len; } - for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { - if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) { + for (idx = 0; idx < Elements(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if ((data[0] & 0xffff0000) >> 16 == opcode_3d->opcode) { unsigned int i; len = 1; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); - if (opcodes_3d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { len = (data[0] & 0xff) + 2; - if (len < opcodes_3d[opcode].min_len || - len > opcodes_3d[opcode].max_len) + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { - fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + fprintf(out, "Bad count in %s\n", opcode_3d->name); } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } return len; } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_965 opcode = 0x%x\n", opcode); (*failures)++; return 1; } static int -decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures) { - unsigned int opcode; + unsigned int idx; + uint32_t opcode; struct { uint32_t opcode; @@ -1689,41 +1957,44 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure { 0x0f, 1, 1, "3DSTATE_MODES_2" }, { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, { 0x16, 1, 1, "3DSTATE_MODES_4" }, - }; + }, *opcode_3d; + + opcode = (data[0] & 0x1f000000) >> 24; - switch ((data[0] & 0x1f000000) >> 24) { + switch (opcode) { case 0x1f: return decode_3d_primitive(data, count, hw_offset, failures); case 0x1d: - return decode_3d_1d(data, count, hw_offset, failures, 1); + return decode_3d_1d(data, count, hw_offset, devid, failures); case 0x1c: return decode_3d_1c(data, count, hw_offset, failures); } - for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) { - if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + for (idx = 0; idx < Elements(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if ((data[0] & 0x1f000000) >> 24 == opcode_3d->opcode) { unsigned int len = 1, i; - instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); - if (opcodes_3d[opcode].max_len > 1) { + instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { len = (data[0] & 0xff) + 2; - if (len < opcodes_3d[opcode].min_len || - len > opcodes_3d[opcode].max_len) + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { - fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + fprintf(out, "Bad count in %s\n", opcode_3d->name); } } for (i = 1; i < len; i++) { if (i >= count) - BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + BUFFER_FAIL(count, len, opcode_3d->name); instr_out(data, hw_offset, i, "dword %d\n", i); } return len; } } - instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_i830 opcode = 0x%x\n", opcode); (*failures)++; return 1; } @@ -1736,8 +2007,12 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure * \param hw_offset hardware address for the buffer */ int -intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid) +intel_decode(const uint32_t *data, int count, + uint32_t hw_offset, + uint32_t devid, + uint32_t ignore_end_of_batchbuffer) { + int ret; int index = 0; int failures = 0; @@ -1746,8 +2021,23 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid while (index < count) { switch ((data[index] & 0xe0000000) >> 29) { case 0x0: - index += decode_mi(data + index, count - index, + ret = decode_mi(data + index, count - index, hw_offset + index * 4, &failures); + + /* If MI_BATCHBUFFER_END happened, then dump the rest of the + * output in case we some day want it in debugging, but don't + * decode it since it'll just confuse in the common case. + */ + if (ret == -1) { + if (ignore_end_of_batchbuffer) { + index++; + } else { + for (index = index + 1; index < count; index++) { + instr_out(data, hw_offset, index, "\n"); + } + } + } else + index += ret; break; case 0x2: index += decode_2d(data + index, count - index, @@ -1756,13 +2046,16 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid case 0x3: if (IS_965(devid)) { index += decode_3d_965(data + index, count - index, - hw_offset + index * 4, &failures); + hw_offset + index * 4, + devid, &failures); } else if (IS_9XX(devid)) { index += decode_3d(data + index, count - index, - hw_offset + index * 4, &failures); + hw_offset + index * 4, + devid, &failures); } else { index += decode_3d_i830(data + index, count - index, - hw_offset + index * 4, &failures); + hw_offset + index * 4, + devid, &failures); } break; default: diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h index 6201a23d6a4..7e7c108c0c6 100644 --- a/src/gallium/drivers/i965/intel_decode.h +++ b/src/gallium/drivers/i965/intel_decode.h @@ -27,5 +27,5 @@ #include "pipe/p_compiler.h" -int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid); +int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, uint32_t ignore_end_of_batchbuffer); void intel_decode_context_reset(void); diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c index c9c463f470f..8c9efc2f536 100644 --- a/src/gallium/drivers/noop/noop_pipe.c +++ b/src/gallium/drivers/noop/noop_pipe.c @@ -30,10 +30,16 @@ #include <util/u_inlines.h> #include <util/u_format.h> #include "noop_public.h" -#include "state_tracker/sw_winsys.h" + +DEBUG_GET_ONCE_BOOL_OPTION(noop, "GALLIUM_NOOP", FALSE) void noop_init_state_functions(struct pipe_context *ctx); +struct noop_pipe_screen { + struct pipe_screen pscreen; + struct pipe_screen *oscreen; +}; + /* * query */ @@ -108,52 +114,29 @@ static struct pipe_resource *noop_resource_create(struct pipe_screen *screen, FREE(nresource); return NULL; } -#if 0 - if (nresource->base.bind & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - unsigned stride; - - nresource->dt = winsys->displaytarget_create(winsys, nresource->base.bind, - nresource->base.format, - nresource->base.width0, - nresource->base.height0, - 16, &stride); - } -#endif return &nresource->base; } -static struct pipe_resource *noop_resource_from_handle(struct pipe_screen * screen, +static struct pipe_resource *noop_resource_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, - struct winsys_handle *whandle) + struct winsys_handle *handle) { - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - struct noop_resource *nresource; - struct sw_displaytarget *dt; - unsigned stride; + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + struct pipe_resource *result; + struct noop_resource *noop_resource; - dt = winsys->displaytarget_from_handle(winsys, templ, whandle, &stride); - if (dt == NULL) { - return NULL; - } - nresource = (struct noop_resource *)noop_resource_create(screen, templ); - nresource->dt = dt; - return &nresource->base; + result = oscreen->resource_from_handle(oscreen, templ, handle); + noop_resource = noop_resource_create(screen, result); + pipe_resource_reference(&result, NULL); + return noop_resource; } static boolean noop_resource_get_handle(struct pipe_screen *screen, struct pipe_resource *resource, struct winsys_handle *handle) { - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - struct noop_resource *nresource = (struct noop_resource *)resource; - - if (nresource->dt == NULL) - return FALSE; - - return winsys->displaytarget_get_handle(winsys, nresource->dt, handle); + return FALSE; } static void noop_resource_destroy(struct pipe_screen *screen, @@ -161,11 +144,6 @@ static void noop_resource_destroy(struct pipe_screen *screen, { struct noop_resource *nresource = (struct noop_resource *)resource; - if (nresource->dt) { - /* display target */ - struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys; - winsys->displaytarget_destroy(winsys, nresource->dt); - } free(nresource->data); FREE(resource); } @@ -483,19 +461,30 @@ static boolean noop_is_format_supported(struct pipe_screen* screen, static void noop_destroy_screen(struct pipe_screen *screen) { + struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen; + struct pipe_screen *oscreen = noop_screen->oscreen; + + oscreen->destroy(oscreen); FREE(screen); } -struct pipe_screen *noop_screen_create(struct sw_winsys *winsys) +struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen) { + struct noop_pipe_screen *noop_screen; struct pipe_screen *screen; - screen = CALLOC_STRUCT(pipe_screen); - if (screen == NULL) { + if (!debug_get_option_noop()) { + return oscreen; + } + + noop_screen = CALLOC_STRUCT(noop_pipe_screen); + if (noop_screen == NULL) { return NULL; } + noop_screen->oscreen = oscreen; + screen = &noop_screen->pscreen; - screen->winsys = (struct pipe_winsys*)winsys; + screen->winsys = oscreen->winsys; screen->destroy = noop_destroy_screen; screen->get_name = noop_get_name; screen->get_vendor = noop_get_vendor; diff --git a/src/gallium/drivers/noop/noop_public.h b/src/gallium/drivers/noop/noop_public.h index 8ce82bec698..180ea597fab 100644 --- a/src/gallium/drivers/noop/noop_public.h +++ b/src/gallium/drivers/noop/noop_public.h @@ -23,8 +23,7 @@ #ifndef NOOP_PUBLIC_H #define NOOP_PUBLIC_H -struct sw_winsys; - -struct pipe_screen *noop_screen_create(struct sw_winsys *winsys); +struct pipe_screen; +struct pipe_screen *noop_screen_create(struct pipe_screen *screen); #endif diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index d97566ed7cf..b4eda0f617d 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -721,17 +721,16 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct nv50_context *nv50 = nv50_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv50->constbuf[PIPE_SHADER_VERTEX] = buf; nv50->dirty |= NV50_NEW_VERTPROG_CB; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf; nv50->dirty |= NV50_NEW_FRAGPROG_CB; - } else - if (shader == PIPE_SHADER_GEOMETRY) { - nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf; + } else { + assert(shader == PIPE_SHADER_GEOMETRY); nv50->dirty |= NV50_NEW_GEOMPROG_CB; } + + pipe_resource_reference(&nv50->constbuf[shader], buf); } static void diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile index 5c3d46d9eab..da8f9a2ab4d 100644 --- a/src/gallium/drivers/nvc0/Makefile +++ b/src/gallium/drivers/nvc0/Makefile @@ -28,6 +28,7 @@ C_SOURCES = \ nvc0_push.c \ nvc0_push2.c \ nvc0_fence.c \ - nvc0_mm.c + nvc0_mm.c \ + nvc0_query.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript index 46c0b5889a0..c49e0ddbc52 100644 --- a/src/gallium/drivers/nvc0/SConscript +++ b/src/gallium/drivers/nvc0/SConscript @@ -29,7 +29,8 @@ nvc0 = env.ConvenienceLibrary( 'nvc0_push.c', 'nvc0_push2.c', 'nvc0_fence.c', - 'nvc0_mm.c' + 'nvc0_mm.c', + 'nvc0_query.c' ]) Export('nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 1346d999409..702e58b2e3c 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -8,15 +8,15 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- nvc0_3d.xml ( 28058 bytes, from 2010-11-26 18:05:20) +- nvc0_3d.xml ( 30401 bytes, from 2011-01-08 18:09:11) - copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20) - nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) -- nv_3ddefs.xml ( 16394 bytes, from 2010-10-09 08:27:14) -- nv_object.xml ( 11547 bytes, from 2010-11-26 16:41:56) +- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40) +- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20) - nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28) -- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) +- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17) -Copyright (C) 2006-2010 by the following authors: +Copyright (C) 2006-2011 by the following authors: - Artur Huillet <[email protected]> (ahuillet) - Ben Skeggs (darktama, darktama_) - B. R. <[email protected]> (koala_br) @@ -248,6 +248,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 #define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16 +#define NVC0_3D_COUNTER_ENABLE 0x00000d68 +#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001 +#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002 +#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004 +#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008 +#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010 +#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020 +#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040 +#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080 +#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100 +#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200 +#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400 +#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800 +#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000 +#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000 +#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000 +#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000 + #define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74 #define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78 @@ -498,14 +516,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_BLEND_COLOR__LEN 0x00000004 #define NVC0_3D_TSC_FLUSH 0x00001330 -#define NVC0_3D_TSC_FLUSH_UNK0 0x00000001 -#define NVC0_3D_TSC_FLUSH_UNK1__MASK 0x03fffff0 -#define NVC0_3D_TSC_FLUSH_UNK1__SHIFT 4 +#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4 #define NVC0_3D_TIC_FLUSH 0x00001334 -#define NVC0_3D_TIC_FLUSH_UNK0 0x00000001 -#define NVC0_3D_TIC_FLUSH_UNK1__MASK 0x03fffff0 -#define NVC0_3D_TIC_FLUSH_UNK1__SHIFT 4 +#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4 #define NVC0_3D_TEX_CACHE_CTL 0x00001338 #define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 @@ -630,7 +648,25 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520 -#define NVC0_3D_SAMPLECNT_RESET 0x00001530 +#define NVC0_3D_COUNTER_RESET 0x00001530 +#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001 +#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002 +#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003 +#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004 +#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010 +#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011 +#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012 +#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013 +#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015 +#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016 +#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017 +#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018 +#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a +#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b +#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c +#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d +#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e +#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f #define NVC0_3D_MULTISAMPLE_ZETA_ENABLE 0x00001534 @@ -960,11 +996,30 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_QUERY_SEQUENCE 0x00001b08 #define NVC0_3D_QUERY_GET 0x00001b0c -#define NVC0_3D_QUERY_GET_FENCE 0x1000f010 -#define NVC0_3D_QUERY_GET_SAMPLE_COUNT 0x0100f002 -#define NVC0_3D_QUERY_GET_TFB 0x05805002 -#define NVC0_3D_QUERY_GET_GENERATED_PRIMS 0x06805002 -#define NVC0_3D_QUERY_GET_UNK00005002 0x00005002 +#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003 +#define NVC0_3D_QUERY_GET_MODE__SHIFT 0 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000 +#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002 +#define NVC0_3D_QUERY_GET_FENCE 0x00000010 +#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0 +#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5 +#define NVC0_3D_QUERY_GET_UNK8 0x00000100 +#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000 +#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12 +#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000 +#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16 +#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000 +#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000 +#define NVC0_3D_QUERY_GET_INTR 0x00100000 +#define NVC0_3D_QUERY_GET_UNK21 0x00200000 +#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000 +#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23 +#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000 +#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000 +#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000 +#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000 +#define NVC0_3D_QUERY_GET_SHORT 0x10000000 #define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0)) #define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010 @@ -1026,6 +1081,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010 #define NVC0_3D_TEX_LIMITS__LEN 0x00000005 +#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0)) +#define NVC0_3D_FIRMWARE__ESIZE 0x00000004 +#define NVC0_3D_FIRMWARE__LEN 0x00000020 + #define NVC0_3D_CB_SIZE 0x00002380 #define NVC0_3D_CB_ADDRESS_HIGH 0x00002384 diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index 06841bb19b8..f5ac6557fe9 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -52,8 +52,7 @@ static INLINE void release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) { if (fence && fence->state != NVC0_FENCE_STATE_SIGNALLED) { - (*mm)->next = fence->buffers; - fence->buffers = (*mm); + nvc0_fence_sched_release(fence, *mm); } else { nvc0_mm_free(*mm); } diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index b2b4fd62eeb..2118abb5d5d 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -92,6 +92,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) screen->base.channel->user_private = nvc0; + nvc0_init_query_functions(nvc0); nvc0_init_surface_functions(nvc0); nvc0_init_state_functions(nvc0); nvc0_init_resource_functions(&nvc0->pipe); diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index c181f15dbd8..0f340beb35a 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -166,6 +166,9 @@ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); boolean nvc0_program_translate(struct nvc0_program *); void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); +/* nvc0_query.c */ +void nvc0_init_query_functions(struct nvc0_context *); + /* nvc0_shader_state.c */ void nvc0_vertprog_validate(struct nvc0_context *); void nvc0_tctlprog_validate(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index 7c214ca9a75..9d2c48cf14d 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -59,7 +59,8 @@ nvc0_fence_emit(struct nvc0_fence *fence) OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); OUT_RING (chan, fence->sequence); - OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE); + OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT | + (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT)); ++fence->ref; diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index ccbb776447f..941be678586 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -75,13 +75,15 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->chan->cur); ctx->chan->cur += size; - count -= push; - elts += push; + count -= nr; + elts += nr; if (nr != push) { + count--; + elts++; BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); OUT_RING (ctx->chan, 0); OUT_RING (ctx->chan, ctx->prim); @@ -106,13 +108,15 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->chan->cur); ctx->chan->cur += size; - count -= push; - elts += push; + count -= nr; + elts += nr; if (nr != push) { + count--; + elts++; BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); OUT_RING (ctx->chan, 0); OUT_RING (ctx->chan, ctx->prim); @@ -137,13 +141,15 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); - ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->chan->cur); ctx->chan->cur += size; - count -= push; - elts += push; + count -= nr; + elts += nr; if (nr != push) { + count--; + elts++; BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2); OUT_RING (ctx->chan, 0); OUT_RING (ctx->chan, ctx->prim); diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c new file mode 100644 index 00000000000..cc83fbe771c --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_query.c @@ -0,0 +1,337 @@ +/* + * Copyright 2011 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Christoph Bumiller + */ + +#include "nvc0_context.h" +#include "nouveau/nv_object.xml.h" + +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts + * (since we use only a single GPU channel per screen) will not work properly. + * + * The first is not that big of an issue because OpenGL does not allow nested + * queries anyway. + */ + +struct nvc0_query { + uint32_t *data; + uint32_t type; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base; + uint32_t offset; /* base + i * 16 */ + boolean ready; + boolean is64bit; + struct nvc0_mm_allocation *mm; +}; + +#define NVC0_QUERY_ALLOC_SPACE 128 + +static INLINE struct nvc0_query * +nvc0_query(struct pipe_query *pipe) +{ + return (struct nvc0_query *)pipe; +} + +static boolean +nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size) +{ + struct nvc0_screen *screen = nvc0->screen; + int ret; + + if (q->bo) { + nouveau_bo_ref(NULL, &q->bo); + if (q->mm) { + if (q->ready) + nvc0_mm_free(q->mm); + else + nvc0_fence_sched_release(screen->fence.current, q->mm); + } + } + if (size) { + q->mm = nvc0_mm_allocate(screen->mm_GART, size, &q->bo, &q->base); + if (!q->bo) + return FALSE; + q->offset = q->base; + + ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD | + NOUVEAU_BO_NOSYNC); + if (ret) { + nvc0_query_allocate(nvc0, q, 0); + return FALSE; + } + q->data = q->bo->map; + nouveau_bo_unmap(q->bo); + } + return TRUE; +} + +static void +nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0); + FREE(nvc0_query(pq)); +} + +static struct pipe_query * +nvc0_query_create(struct pipe_context *pipe, unsigned type) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_query *q; + + q = CALLOC_STRUCT(nvc0_query); + if (!q) + return NULL; + + if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) { + FREE(q); + return NULL; + } + + q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || + type == PIPE_QUERY_PRIMITIVES_EMITTED || + type == PIPE_QUERY_SO_STATISTICS); + q->type = type; + + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset -= 16; + q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */ + } + + return (struct pipe_query *)q; +} + +static void +nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q, + unsigned offset, uint32_t get) +{ + offset += q->offset; + + MARK_RING (chan, 5, 2); + BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (chan, q->sequence); + OUT_RING (chan, get); +} + +static void +nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_query *q = nvc0_query(pq); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render conition to FALSE even *after* we re- + * initialized it to TRUE. + */ + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset += 16; + q->data += 16 / sizeof(*q->data); + if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE) + nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE); + + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + q->data[1] = 1; /* initial render condition = TRUE */ + } + if (!q->is64bit) + q->data[0] = q->sequence++; /* the previously used one */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT); + IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */ + IMMED_RING(chan, RING_3D(COUNTER_RESET), + NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + IMMED_RING(chan, RING_3D(COUNTER_RESET), + NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES); + break; + case PIPE_QUERY_SO_STATISTICS: + BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2); + OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES); + OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_TIME_ELAPSED: + nvc0_query_get(chan, q, 0x10, 0x00005002); + break; + default: + break; + } + q->ready = FALSE; +} + +static void +nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_query *q = nvc0_query(pq); + + const int index = 0; /* for multiple vertex streams */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + nvc0_query_get(chan, q, 0, 0x0100f002); + BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); + OUT_RING (chan, 0); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5)); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5)); + break; + case PIPE_QUERY_SO_STATISTICS: + nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5)); + nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5)); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_TIME_ELAPSED: + nvc0_query_get(chan, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + nvc0_query_get(chan, q, 0, 0x1000f010); + break; + default: + assert(0); + break; + } +} + +static INLINE boolean +nvc0_query_ready(struct nvc0_query *q) +{ + return q->ready || (!q->is64bit && (q->data[0] == q->sequence)); +} + +static INLINE boolean +nvc0_query_wait(struct nvc0_query *q) +{ + int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD); + if (ret) + return FALSE; + nouveau_bo_unmap(q->bo); + return TRUE; +} + +static boolean +nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, void *result) +{ + struct nvc0_query *q = nvc0_query(pq); + uint64_t *res64 = result; + uint32_t *res32 = result; + boolean *res8 = result; + uint64_t *data64 = (uint64_t *)q->data; + + if (q->type == PIPE_QUERY_GPU_FINISHED) { + res8[0] = nvc0_query_ready(q); + return TRUE; + } + + if (!q->ready) /* update ? */ + q->ready = nvc0_query_ready(q); + if (!q->ready) { + struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel; + if (!wait) { + if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */ + FIRE_RING(chan); + return FALSE; + } + if (!nvc0_query_wait(q)) + return FALSE; + } + q->ready = TRUE; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res32[0] = q->data[1]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0]; + res64[1] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ + res64[0] = 1000000000; + res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + default: + return FALSE; + } + + return TRUE; +} + +static void +nvc0_render_condition(struct pipe_context *pipe, + struct pipe_query *pq, uint mode) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_query *q; + + if (!pq) { + IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + return; + } + q = nvc0_query(pq); + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RING (chan, q->sequence); + OUT_RING (chan, 0x00001001); + } + + BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO); +} + +void +nvc0_init_query_functions(struct nvc0_context *nvc0) +{ + nvc0->pipe.create_query = nvc0_query_create; + nvc0->pipe.destroy_query = nvc0_query_destroy; + nvc0->pipe.begin_query = nvc0_query_begin; + nvc0->pipe.end_query = nvc0_query_end; + nvc0->pipe.get_query_result = nvc0_query_result; + nvc0->pipe.render_condition = nvc0_render_condition; +} diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index e149b907314..c191790ab14 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -92,9 +92,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; + case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_OCCLUSION_QUERY: return 1; - case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_STREAM_OUTPUT: return 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 5c6482be96b..1fac142e2be 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -86,6 +86,13 @@ struct nvc0_mm_allocation { uint32_t offset; }; +static INLINE void +nvc0_fence_sched_release(struct nvc0_fence *nf, struct nvc0_mm_allocation *mm) +{ + mm->next = nf->buffers; + nf->buffers = mm; +} + extern struct nvc0_mman * nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type); diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 881f0655506..fd7a7942cb8 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -584,6 +584,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (info->primitive_restart) { BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1); OUT_RING (chan, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; } nvc0_draw_elements(nvc0, shorten, diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 54619037d82..f3dcb205c61 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -304,7 +304,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, { struct nvfx_context *nvfx = nvfx_context(pipe); - nvfx->constbuf[shader] = buf; + pipe_resource_reference(&nvfx->constbuf[shader], buf); nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0; if (shader == PIPE_SHADER_VERTEX) { diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 017db48485b..a43e83c0d36 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -179,6 +179,12 @@ static void r300_clear(struct pipe_context* pipe, boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; + /* Decompress zbuffers that are bound as textures. If we didn't flush here, + * it would happen inside the blitter when updating derived state, + * causing a blitter operation to be called from inside the blitter, + * which would overwrite saved states and they would never get restored. */ + r300_flush_depth_textures(r300); + /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { @@ -274,6 +280,12 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); + /* Decompress zbuffers that are bound as textures. If we didn't flush here, + * it would happen inside the blitter when updating derived state, + * causing a blitter operation to be called from inside the blitter, + * which would overwrite saved states and they would never get restored. */ + r300_flush_depth_textures(r300); + r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_render_target(r300->blitter, dst, rgba, dstx, dsty, width, height); @@ -291,6 +303,12 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); + /* Decompress zbuffers that are bound as textures. If we didn't flush here, + * it would happen inside the blitter when updating derived state, + * causing a blitter operation to be called from inside the blitter, + * which would overwrite saved states and they would never get restored. */ + r300_flush_depth_textures(r300); + r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); @@ -298,10 +316,10 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, } /* Flush a depth stencil buffer. */ -void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned level, - unsigned layer) +static void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned level, + unsigned layer) { struct r300_context *r300 = r300_context(pipe); struct pipe_surface *dstsurf, surf_tmpl; @@ -320,6 +338,7 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl); r300->z_decomp_rd = TRUE; + r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_flush_depth_stencil(r300->blitter, dstsurf); r300_blitter_end(r300); @@ -329,6 +348,39 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, pipe_surface_reference(&dstsurf, NULL); } +/* We can't use compressed zbuffers as samplers. */ +void r300_flush_depth_textures(struct r300_context *r300) +{ + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + unsigned i, level; + unsigned count = MIN2(state->sampler_view_count, + state->sampler_state_count); + + if (r300->z_decomp_rd) + return; + + for (i = 0; i < count; i++) + if (state->sampler_views[i] && state->sampler_states[i]) { + struct pipe_resource *tex = state->sampler_views[i]->base.texture; + + if (tex->target == PIPE_TEXTURE_3D || + tex->target == PIPE_TEXTURE_CUBE) + continue; + + /* Ignore non-depth textures. + * Also ignore reinterpreted depth textures, e.g. resource_copy. */ + if (!util_format_is_depth_or_stencil(tex->format)) + continue; + + for (level = 0; level <= tex->last_level; level++) + if (r300_texture(tex)->zmask_in_use[level]) { + /* We don't handle 3D textures and cubemaps yet. */ + r300_flush_depth_stencil(&r300->context, tex, level, 0); + } + } +} + /* Copy a block of pixels from one surface to another using HW. */ static void r300_hw_copy_region(struct pipe_context* pipe, struct pipe_resource *dst, @@ -360,6 +412,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; boolean is_depth; + if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, @@ -390,6 +443,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, if (is_depth) { r300_flush_depth_stencil(pipe, src, src_level, src_box->z); } + if (old_format != new_format) { r300_texture_reinterpret_format(pipe->screen, dst, new_format); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 1a14d2b79e5..e09cf87f733 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -656,6 +656,22 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300) return (struct r300_fragment_shader*)r300->fs.state; } +static INLINE void r300_mark_atom_dirty(struct r300_context *r300, + struct r300_atom *atom) +{ + atom->dirty = TRUE; + + if (!r300->first_dirty) { + r300->first_dirty = atom; + r300->last_dirty = atom+1; + } else { + if (atom < r300->first_dirty) + r300->first_dirty = atom; + else if (atom+1 > r300->last_dirty) + r300->last_dirty = atom+1; + } +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); @@ -672,10 +688,7 @@ void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); /* r300_blit.c */ -void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned level, - unsigned layer); +void r300_flush_depth_textures(struct r300_context *r300); /* r300_query.c */ void r300_resume_query(struct r300_context *r300, @@ -710,21 +723,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, enum r300_fb_state_change change); void r300_mark_fs_code_dirty(struct r300_context *r300); -static INLINE void r300_mark_atom_dirty(struct r300_context *r300, - struct r300_atom *atom) -{ - atom->dirty = TRUE; - - if (!r300->first_dirty) { - r300->first_dirty = atom; - r300->last_dirty = atom+1; - } else { - if (atom < r300->first_dirty) - r300->first_dirty = atom; - if (atom+1 > r300->last_dirty) - r300->last_dirty = atom+1; - } -} +/* r300_state_derived.c */ +void r300_update_derived_state(struct r300_context* r300); /* r300_debug.c */ void r500_dump_rs_block(struct r300_rs_block *rs); diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 52031dd97b5..d6aa90bd053 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -41,6 +41,7 @@ static const struct debug_named_value debug_options[] = { { "fb", DBG_FB, "Log framebuffer" }, { "cbzb", DBG_CBZB, "Log fast color clear info" }, { "hyperz", DBG_HYPERZ, "Log HyperZ info" }, + { "upload", DBG_UPLOAD, "Log user buffer upload info" }, { "scissor", DBG_SCISSOR, "Log scissor info" }, { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" }, { "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" }, diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index e660ca68f1b..b35822c82f8 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -39,7 +39,6 @@ #include "r300_screen_buffer.h" #include "r300_emit.h" #include "r300_reg.h" -#include "r300_state_derived.h" #include <limits.h> @@ -569,8 +568,6 @@ static void r300_draw_range_elements(struct pipe_context* pipe, r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset, &start, count); - r300_update_derived_state(r300); - /* Fallback for misaligned ushort indices. */ if (indexSize == 2 && (start & 1) && !r300_is_user_buffer(indexBuffer)) { @@ -643,8 +640,6 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; - r300_update_derived_state(r300); - if (immd_is_good_idea(r300, count)) { r300_emit_draw_arrays_immediate(r300, mode, start, count); } else { @@ -720,6 +715,8 @@ static void r300_draw_vbo(struct pipe_context* pipe, return; } + r300_update_derived_state(r300); + /* Set up the fallback for an incompatible vertex layout if needed. */ if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { r300_begin_vertex_translate(r300, real_min_index, real_max_index); @@ -737,6 +734,8 @@ static void r300_draw_vbo(struct pipe_context* pipe, min_index = MAX2(min_index, info->start); max_index = MIN2(max_index, info->start + count - 1); + r300_update_derived_state(r300); + /* Set up the fallback for an incompatible vertex layout if needed. */ if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { r300_begin_vertex_translate(r300, min_index, max_index); diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 5847fe1ffc8..752f53b7579 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -93,6 +93,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_CBZB (1 << 11) #define DBG_HYPERZ (1 << 12) #define DBG_SCISSOR (1 << 13) +#define DBG_UPLOAD (1 << 14) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index e3cf45479fd..cc3c1d7687e 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -87,13 +87,14 @@ void r300_upload_user_buffers(struct r300_context *r300, int i, nr = r300->velems->count; unsigned count = max_index + 1 - min_index; boolean flushed; + boolean uploaded[16] = {0}; for (i = 0; i < nr; i++) { unsigned index = r300->velems->velem[i].vertex_buffer_index; struct pipe_vertex_buffer *vb = &r300->vertex_buffer[index]; struct r300_buffer *userbuf = r300_buffer(vb->buffer); - if (userbuf && userbuf->user_buffer) { + if (userbuf && userbuf->user_buffer && !uploaded[index]) { unsigned first, size; if (vb->stride) { @@ -104,6 +105,12 @@ void r300_upload_user_buffers(struct r300_context *r300, size = r300->velems->hw_format_size[i]; } + DBG(r300, DBG_UPLOAD, + "Uploading %i bytes, index: %i, buffer: %p, userptr: %p " + "offset: %i, stride: %i.\n", + size, index, userbuf, userbuf->user_buffer, + vb->buffer_offset, vb->stride); + u_upload_data(r300->upload_vb, first, size, userbuf->user_buffer + first, &vb->buffer_offset, @@ -118,10 +125,12 @@ void r300_upload_user_buffers(struct r300_context *r300, r300->upload_vb_validated = FALSE; r300->validate_buffers = TRUE; } + uploaded[index] = TRUE; } else { assert(r300->valid_vertex_buffer[index]); } } + DBG(r300, DBG_UPLOAD, "-------\n"); } static void r300_buffer_destroy(struct pipe_screen *screen, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index d3985c11aa8..95be7849f8f 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -32,7 +32,6 @@ #include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" -#include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_texture.h" #include "r300_vs.h" @@ -863,39 +862,6 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } -/* We can't use compressed zbuffers as samplers. */ -static void r300_flush_depth_textures(struct r300_context *r300) -{ - struct r300_textures_state *state = - (struct r300_textures_state*)r300->textures_state.state; - unsigned i, level; - unsigned count = MIN2(state->sampler_view_count, - state->sampler_state_count); - - if (r300->z_decomp_rd) - return; - - for (i = 0; i < count; i++) - if (state->sampler_views[i] && state->sampler_states[i]) { - struct pipe_resource *tex = state->sampler_views[i]->base.texture; - - if (tex->target == PIPE_TEXTURE_3D || - tex->target == PIPE_TEXTURE_CUBE) - continue; - - /* Ignore non-depth textures. - * Also ignore reinterpreted depth textures, e.g. resource_copy. */ - if (!util_format_is_depth_or_stencil(tex->format)) - continue; - - for (level = 0; level <= tex->last_level; level++) - if (r300_texture(tex)->zmask_in_use[level]) { - /* We don't handle 3D textures and cubemaps yet. */ - r300_flush_depth_stencil(&r300->context, tex, level, 0); - } - } -} - void r300_update_derived_state(struct r300_context* r300) { r300_flush_depth_textures(r300); diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h deleted file mode 100644 index 71a4a47b003..00000000000 --- a/src/gallium/drivers/r300/r300_state_derived.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_STATE_DERIVED_H -#define R300_STATE_DERIVED_H - -struct r300_context; - -void r300_update_derived_state(struct r300_context* r300); - -#endif /* R300_STATE_DERIVED_H */ diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 335f282b06f..a852bef6156 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -248,10 +248,7 @@ struct r600_context { u32 *pm4; struct list_head query_list; unsigned num_query_running; - unsigned fence; struct list_head fenced_bo; - unsigned *cfence; - struct r600_bo *fence_bo; }; struct r600_draw { |