summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.c4
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.h5
-rw-r--r--src/gallium/drivers/i965/brw_clip.c6
-rw-r--r--src/gallium/drivers/i965/brw_clip.h2
-rw-r--r--src/gallium/drivers/i965/brw_clip_line.c14
-rw-r--r--src/gallium/drivers/i965/brw_clip_state.c4
-rw-r--r--src/gallium/drivers/i965/brw_clip_tri.c8
-rw-r--r--src/gallium/drivers/i965/brw_clip_util.c12
-rw-r--r--src/gallium/drivers/i965/brw_context.c9
-rw-r--r--src/gallium/drivers/i965/brw_context.h15
-rw-r--r--src/gallium/drivers/i965/brw_defines.h361
-rw-r--r--src/gallium/drivers/i965/brw_disasm.c179
-rw-r--r--src/gallium/drivers/i965/brw_disasm.h6
-rw-r--r--src/gallium/drivers/i965/brw_draw_upload.c2
-rw-r--r--src/gallium/drivers/i965/brw_eu_emit.c158
-rw-r--r--src/gallium/drivers/i965/brw_gs.c4
-rw-r--r--src/gallium/drivers/i965/brw_gs_state.c2
-rw-r--r--src/gallium/drivers/i965/brw_misc_state.c14
-rw-r--r--src/gallium/drivers/i965/brw_pipe_rast.c2
-rw-r--r--src/gallium/drivers/i965/brw_pipe_surface.c21
-rw-r--r--src/gallium/drivers/i965/brw_pipe_vertex.c2
-rw-r--r--src/gallium/drivers/i965/brw_reg.h52
-rw-r--r--src/gallium/drivers/i965/brw_resource_texture.c2
-rw-r--r--src/gallium/drivers/i965/brw_resource_texture_layout.c2
-rw-r--r--src/gallium/drivers/i965/brw_screen.c58
-rw-r--r--src/gallium/drivers/i965/brw_screen.h6
-rw-r--r--src/gallium/drivers/i965/brw_sf_emit.c4
-rw-r--r--src/gallium/drivers/i965/brw_sf_state.c4
-rw-r--r--src/gallium/drivers/i965/brw_structs.h238
-rw-r--r--src/gallium/drivers/i965/brw_structs_dump.c2
-rw-r--r--src/gallium/drivers/i965/brw_urb.c4
-rw-r--r--src/gallium/drivers/i965/brw_vs.h1
-rw-r--r--src/gallium/drivers/i965/brw_vs_emit.c17
-rw-r--r--src/gallium/drivers/i965/brw_vs_state.c8
-rw-r--r--src/gallium/drivers/i965/brw_winsys.h4
-rw-r--r--src/gallium/drivers/i965/brw_winsys_debug.c6
-rw-r--r--src/gallium/drivers/i965/brw_wm_emit.c12
-rw-r--r--src/gallium/drivers/i965/brw_wm_glsl.c6
-rw-r--r--src/gallium/drivers/i965/brw_wm_state.c10
-rw-r--r--src/gallium/drivers/i965/intel_decode.c563
-rw-r--r--src/gallium/drivers/i965/intel_decode.h2
-rw-r--r--src/gallium/drivers/noop/noop_pipe.c77
-rw-r--r--src/gallium/drivers/noop/noop_public.h5
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c9
-rw-r--r--src/gallium/drivers/nvc0/Makefile3
-rw-r--r--src/gallium/drivers/nvc0/SConscript3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_3d.xml.h93
-rw-r--r--src/gallium/drivers/nvc0/nvc0_buffer.c3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c1
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_fence.c3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_push.c24
-rw-r--r--src/gallium/drivers/nvc0/nvc0_query.c337
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h7
-rw-r--r--src/gallium/drivers/nvc0/nvc0_vbo.c3
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state.c2
-rw-r--r--src/gallium/drivers/r300/r300_blit.c62
-rw-r--r--src/gallium/drivers/r300/r300_context.h38
-rw-r--r--src/gallium/drivers/r300/r300_debug.c1
-rw-r--r--src/gallium/drivers/r300/r300_render.c9
-rw-r--r--src/gallium/drivers/r300/r300_screen.h1
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.c11
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c34
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.h30
-rw-r--r--src/gallium/drivers/r600/r600.h3
66 files changed, 1967 insertions, 628 deletions
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index e80067f3b19..3c935792465 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -64,13 +64,11 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch)
}
struct brw_batchbuffer *
-brw_batchbuffer_alloc(struct brw_winsys_screen *sws,
- struct brw_chipset chipset)
+brw_batchbuffer_alloc(struct brw_winsys_screen *sws)
{
struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
batch->sws = sws;
- batch->chipset = chipset;
brw_batchbuffer_reset(batch);
return batch;
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 6ca9f617f5e..6ecb91857dd 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -26,7 +26,6 @@ struct brw_batchbuffer {
struct brw_winsys_screen *sws;
struct brw_winsys_buffer *buf;
- struct brw_chipset chipset;
/**
* Values exported to speed up the writing the batchbuffer,
@@ -47,8 +46,8 @@ struct brw_batchbuffer {
/*@}*/
};
-struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws,
- struct brw_chipset chipset );
+struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws );
+
void brw_batchbuffer_free(struct brw_batchbuffer *batch);
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index ccba205e8c7..66b13ea58e2 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -66,16 +66,14 @@ compile_clip_prog( struct brw_context *brw,
c.func.single_program_flow = 1;
- c.chipset = brw->chipset;
c.key = *key;
- c.need_ff_sync = c.chipset.is_igdng;
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.header_position_offset = ATTR_SIZE;
- if (c.chipset.is_igdng)
+ if (brw->gen == 5)
delta = 3 * REG_SIZE;
else
delta = REG_SIZE;
@@ -97,7 +95,7 @@ compile_clip_prog( struct brw_context *brw,
if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT)
c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
else
c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
index 80e3a11a370..f123b73c063 100644
--- a/src/gallium/drivers/i965/brw_clip.h
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -125,12 +125,10 @@ struct brw_clip_compile {
GLuint last_tmp;
GLboolean need_direction;
- struct brw_chipset chipset;
GLuint last_mrf;
GLuint header_position_offset;
- GLboolean need_ff_sync;
GLuint nr_color_attrs;
GLuint offset_color0;
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
index 66caadc4d53..4ed7362171b 100644
--- a/src/gallium/drivers/i965/brw_clip_line.c
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -32,6 +32,7 @@
#include "util/u_debug.h"
#include "brw_defines.h"
+#include "brw_context.h"
#include "brw_eu.h"
#include "brw_clip.h"
@@ -41,7 +42,7 @@
static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
{
GLuint i = 0,j;
-
+ struct brw_context *brw = c->func.brw;
/* Register usage is static, precompute here:
*/
c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
@@ -79,7 +80,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
i++;
}
- if (c->need_ff_sync) {
+ if (brw->needs_ff_sync) {
c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
i++;
}
@@ -120,6 +121,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
static void clip_and_emit_line( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_indirect vtx0 = brw_indirect(0, 0);
struct brw_indirect vtx1 = brw_indirect(1, 0);
struct brw_indirect newvtx0 = brw_indirect(2, 0);
@@ -146,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_clip_init_clipmask(c);
/* -ve rhw workaround */
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
@@ -183,7 +185,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
* Both can be negative on GM965/G965 due to RHW workaround
* if so, this object should be rejected.
*/
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
{
@@ -208,7 +210,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
/* If both are positive, do nothing */
/* Only on GM965/G965 */
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
}
@@ -223,7 +225,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_ENDIF(p, is_neg2);
}
}
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 5c3ccfd8d0d..f56edf3177c 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -109,7 +109,7 @@ clip_unit_create_from_key(struct brw_context *brw,
/* Although up to 16 concurrent Clip threads are allowed on IGDNG,
* only 2 threads can output VUEs at a time.
*/
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
clip.thread4.max_threads = 16 - 1;
else
clip.thread4.max_threads = 2 - 1;
@@ -134,7 +134,7 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.clip5.api_mode = BRW_CLIP_API_OGL;
clip.clip5.clip_mode = key->clip_mode;
- if (BRW_IS_G4X(brw))
+ if (brw->is_g4x)
clip.clip5.negative_w_clip_test = 1;
clip.clip6.clipper_viewport_state_ptr = 0;
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
index 069524bc14f..7d400e6028b 100644
--- a/src/gallium/drivers/i965/brw_clip_tri.c
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -30,6 +30,7 @@
*/
#include "brw_defines.h"
+#include "brw_context.h"
#include "brw_eu.h"
#include "brw_clip.h"
@@ -43,6 +44,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
GLuint nr_verts )
{
GLuint i = 0,j;
+ struct brw_context *brw = c->func.brw;
/* Register usage is static, precompute here:
*/
@@ -69,7 +71,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
for (j = 0; j < 3; j++) {
GLuint delta = c->key.nr_attrs*16 + 32;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
delta = c->key.nr_attrs * 16 + 32 * 3;
brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
@@ -110,7 +112,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
i++;
}
- if (c->need_ff_sync) {
+ if (brw->needs_ff_sync) {
c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
i++;
}
@@ -563,7 +565,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
/* if -ve rhw workaround bit is set,
do cliptest */
- if (c->chipset.is_965) {
+ if (p->brw->has_negative_rhw_bug) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
index 23e51ee9bcd..5713f25da7c 100644
--- a/src/gallium/drivers/i965/brw_clip_util.c
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -31,6 +31,7 @@
#include "brw_defines.h"
+#include "brw_context.h"
#include "brw_eu.h"
#include "brw_clip.h"
@@ -126,6 +127,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
GLboolean force_edgeflag)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_reg tmp = get_tmp(c);
GLuint i;
@@ -142,7 +144,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
for (i = 0; i < c->key.nr_attrs; i++) {
GLuint delta = i*16 + 32;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
delta = i * 16 + 32 * 3;
if (delta == c->offset_edgeflag) {
@@ -176,7 +178,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
if (i & 1) {
GLuint delta = i*16 + 32;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
delta = i * 16 + 32 * 3;
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
@@ -350,7 +352,8 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
- if (c->need_ff_sync) {
+ struct brw_context *brw = c->func.brw;
+ if (brw->needs_ff_sync) {
struct brw_compile *p = &c->func;
struct brw_instruction *need_ff_sync;
@@ -379,7 +382,8 @@ void brw_clip_ff_sync(struct brw_clip_compile *c)
void brw_clip_init_ff_sync(struct brw_clip_compile *c)
{
- if (c->need_ff_sync) {
+ struct brw_context *brw = c->func.brw;
+ if (brw->needs_ff_sync) {
struct brw_compile *p = &c->func;
brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index a2736f783d5..41a468a32f0 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -107,7 +107,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen,
void *priv)
{
struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
-
+ struct brw_screen *brs = brw_screen(screen);
if (!brw) {
debug_printf("%s: failed to alloc context\n", __FUNCTION__);
return NULL;
@@ -117,7 +117,10 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen,
brw->base.priv = priv;
brw->base.destroy = brw_destroy_context;
brw->sws = brw_screen(screen)->sws;
- brw->chipset = brw_screen(screen)->chipset;
+ brw->is_g4x = brs->is_g4x;
+ brw->needs_ff_sync = brs->needs_ff_sync;
+ brw->has_negative_rhw_bug = brs->has_negative_rhw_bug;
+ brw->gen = brs->gen;
brw_init_resource_functions( brw );
brw_pipe_blend_init( brw );
@@ -145,7 +148,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen,
make_empty_list(&brw->query.active_head);
- brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset );
+ brw->batch = brw_batchbuffer_alloc( brw->sws );
if (brw->batch == NULL)
goto fail;
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index d927f382d5f..45fc26dd7d8 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -529,7 +529,14 @@ struct brw_query_object {
struct brw_context
{
struct pipe_context base;
- struct brw_chipset chipset;
+ int gen;
+ boolean has_negative_rhw_bug;
+ boolean needs_ff_sync;
+ boolean is_g4x;
+
+ int urb_size;
+ int vs_max_threads;
+ int wm_max_threads;
struct brw_winsys_screen *sws;
@@ -854,11 +861,5 @@ brw_context( struct pipe_context *ctx )
return (struct brw_context *)ctx;
}
-
-#define BRW_IS_965(brw) ((brw)->chipset.is_965)
-#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng)
-#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x)
-
-
#endif
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
index e201ce4d7ce..7547eae97c7 100644
--- a/src/gallium/drivers/i965/brw_defines.h
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -463,6 +463,13 @@
#define BRW_COMPRESSION_2NDHALF 1
#define BRW_COMPRESSION_COMPRESSED 2
+#define GEN6_COMPRESSION_1Q 0
+#define GEN6_COMPRESSION_2Q 1
+#define GEN6_COMPRESSION_3Q 2
+#define GEN6_COMPRESSION_4Q 3
+#define GEN6_COMPRESSION_1H 0
+#define GEN6_COMPRESSION_2H 2
+
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
@@ -502,6 +509,27 @@
#define BRW_MASK_ENABLE 0
#define BRW_MASK_DISABLE 1
+/** @{
+ *
+ * Gen6 has replaced "mask enable/disable" with WECtrl, which is
+ * effectively the same but much simpler to think about. Now, there
+ * are two contributors ANDed together to whether channels are
+ * executed: The predication on the instruction, and the channel write
+ * enable.
+ */
+/**
+ * This is the default value. It means that a channel's write enable is set
+ * if the per-channel IP is pointing at this instruction.
+ */
+#define BRW_WE_NORMAL 0
+/**
+ * This is used like BRW_MASK_DISABLE, and causes all channels to have
+ * their write enable set. Note that predication still contributes to
+ * whether the channel actually gets written.
+ */
+#define BRW_WE_ALL 1
+/** @} */
+
#define BRW_OPCODE_MOV 1
#define BRW_OPCODE_SEL 2
#define BRW_OPCODE_NOT 4
@@ -531,6 +559,8 @@
#define BRW_OPCODE_POP 47
#define BRW_OPCODE_WAIT 48
#define BRW_OPCODE_SEND 49
+#define BRW_OPCODE_SENDC 50
+#define BRW_OPCODE_MATH 56
#define BRW_OPCODE_ADD 64
#define BRW_OPCODE_MUL 65
#define BRW_OPCODE_AVG 66
@@ -550,6 +580,7 @@
#define BRW_OPCODE_DP2 87
#define BRW_OPCODE_DPA2 88
#define BRW_OPCODE_LINE 89
+#define BRW_OPCODE_PLN 90
#define BRW_OPCODE_NOP 126
#define BRW_PREDICATE_NONE 0
@@ -599,6 +630,8 @@
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
+#define BRW_MRF_COMPR4 (1 << 7)
+
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
@@ -645,13 +678,14 @@
#define BRW_POLYGON_FACING_BACK 1
#define BRW_MESSAGE_TARGET_NULL 0
-#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */
#define BRW_MESSAGE_TARGET_SAMPLER 2
#define BRW_MESSAGE_TARGET_GATEWAY 3
-#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
-#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 /* sampler cache on GEN6 */
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 /* render cache on Gen6 */
#define BRW_MESSAGE_TARGET_URB 6
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+#define BRW_MESSAGE_TARGET_CONST_CACHE 9 /* GEN6 */
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
@@ -674,20 +708,15 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
-
-/* for IGDNG only */
+#define BRW_SAMPLER_MESSAGE_SAMPLE_GEN5 0
+#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5 1
+#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5 2
+#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5 3
+#define BRW_SAMPLER_MESSAGE_SAMPLE_DERIVS_GEN5 4
+#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5 5
+#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5 6
+
+/* for GEN5 only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
@@ -705,10 +734,24 @@
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+/* This one stays the same across generations. */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+/* GEN4 */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
-#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+/* G45, GEN5 */
+#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
+#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
+/* GEN6 */
+#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
@@ -728,6 +771,16 @@
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+/* GEN6 */
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE_GEN6 7
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE_GEN6 8
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE_GEN6 9
+#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE_GEN6 10
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORLD_SCATTERED_WRITE_GEN6 11
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6 12
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE_GEN6 13
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE_GEN6 14
+
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
@@ -736,7 +789,8 @@
#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
-#define BRW_MATH_FUNCTION_TAN 9
+#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
#define BRW_MATH_FUNCTION_POW 10
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
@@ -787,17 +841,33 @@
#define CMD_PIPELINED_STATE_POINTERS 0x7800
#define CMD_BINDING_TABLE_PTRS 0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)
+
+#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */
+# define PS_SAMPLER_STATE_CHANGE (1 << 12)
+# define GS_SAMPLER_STATE_CHANGE (1 << 9)
+# define VS_SAMPLER_STATE_CHANGE (1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
#define CMD_VERTEX_BUFFER 0x7808
# define BRW_VB0_INDEX_SHIFT 27
+# define GEN6_VB0_INDEX_SHIFT 26
# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20)
# define BRW_VB0_PITCH_SHIFT 0
#define CMD_VERTEX_ELEMENT 0x7809
# define BRW_VE0_INDEX_SHIFT 27
+# define GEN6_VE0_INDEX_SHIFT 26
# define BRW_VE0_FORMAT_SHIFT 16
# define BRW_VE0_VALID (1 << 26)
+# define GEN6_VE0_VALID (1 << 25)
# define BRW_VE0_SRC_OFFSET_SHIFT 0
# define BRW_VE1_COMPONENT_NOSTORE 0
# define BRW_VE1_COMPONENT_STORE_SRC 1
@@ -816,6 +886,236 @@
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS_965 0x780b
#define CMD_VF_STATISTICS_GM45 0x680b
+#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */
+
+#define CMD_URB 0x7805 /* GEN6+ */
+# define GEN6_URB_VS_SIZE_SHIFT 16
+# define GEN6_URB_VS_ENTRIES_SHIFT 0
+# define GEN6_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_URB_GS_SIZE_SHIFT 0
+
+#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY (1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY (1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10)
+
+#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
+
+#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE (1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT 25
+# define GEN6_VS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_VS_CACHE_DISABLE (1 << 1)
+# define GEN6_VS_ENABLE (1 << 0)
+
+#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE (1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT 11
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT 25
+# define GEN6_GS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9)
+# define GEN6_GS_RENDERING_ENABLE (1 << 8)
+/* DW6 */
+# define GEN6_GS_ENABLE (1 << 15)
+
+#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10)
+/**
+ * Just does cheap culling based on the clip distance. Bits must be
+ * disjoint with USER_CLIP_CLIP_DISTANCE bits.
+ */
+# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0
+/* DW2 */
+# define GEN6_CLIP_ENABLE (1 << 31)
+# define GEN6_CLIP_API_OGL (0 << 30)
+# define GEN6_CLIP_API_D3D (1 << 30)
+# define GEN6_CLIP_XY_TEST (1 << 28)
+# define GEN6_CLIP_Z_TEST (1 << 27)
+# define GEN6_CLIP_GB_TEST (1 << 26)
+/** 8-bit field of which user clip distances to clip aganist. */
+# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16
+# define GEN6_CLIP_MODE_NORMAL (0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9)
+# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6
+# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5)
+
+#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */
+/* DW1 */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_SF_SWIZZLE_ENABLE (1 << 21)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE (1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7)
+# define GEN6_SF_FRONT_SOLID (0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME (1 << 5)
+# define GEN6_SF_FRONT_POINT (2 << 5)
+# define GEN6_SF_BACK_SOLID (0 << 3)
+# define GEN6_SF_BACK_WIREFRAME (1 << 3)
+# define GEN6_SF_BACK_POINT (2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1)
+# define GEN6_SF_WINDING_CCW (1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE (1 << 31)
+# define GEN6_SF_CULL_BOTH (0 << 29)
+# define GEN6_SF_CULL_NONE (1 << 29)
+# define GEN6_SF_CULL_FRONT (2 << 29)
+# define GEN6_SF_CULL_BACK (3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE (1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W (1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X (1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT 22
+# define ATTRIBUTE_1_SOURCE_SHIFT 16
+# define ATTRIBUTE_0_OVERRIDE_W (1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X (1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
+# define ATTRIBUTE_0_SOURCE_SHIFT 0
+
+# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
+# define ATTRIBUTE_SWIZZLE_SHIFT 6
+
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+
+#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE (1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT 27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE (1 << 31)
+# define GEN6_WM_DEPTH_CLEAR (1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE (1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT 25
+# define GEN6_WM_KILL_ENABLE (1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH (1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9)
+# define GEN6_WM_USES_SOURCE_W (1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_WM_POSOFFSET_NONE (0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID (2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1)
+# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+
+#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */
+#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */
+#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12)
+
+#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */
#define CMD_DRAW_RECT 0x7900
#define CMD_BLEND_CONSTANT_COLOR 0x7901
@@ -827,6 +1127,25 @@
#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
#define CMD_AA_LINE_PARAMETERS 0x790a
+#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT 29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+
+#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER (0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define MS_NUMSAMPLES_1 (0 << 1)
+# define MS_NUMSAMPLES_4 (2 << 1)
+# define MS_NUMSAMPLES_8 (3 << 1)
+
+#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */
+# define DEPTH_CLEAR_VALID (1 << 15)
+/* DW1: depth clear value */
+
#define CMD_PIPE_CONTROL 0x7a00
#define CMD_3D_PRIM 0x7b00
@@ -839,8 +1158,8 @@
#define R02_PRIM_END 0x1
#define R02_PRIM_START 0x2
-#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
- (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
+#define URB_SIZES(brw) (brw->gen == 5 ? 1024 : \
+ (brw->is_g4x ? 384 : 256)) /* 512 bit units */
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
index 28c83515ba9..b093569f0cf 100644
--- a/src/gallium/drivers/i965/brw_disasm.c
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -49,12 +49,14 @@ struct {
[BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
@@ -69,13 +71,14 @@ struct {
[BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
- [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
@@ -144,7 +147,6 @@ char *chan_sel[4] = {
};
char *dest_condmod[16] = {
- [0] = NULL
};
char *debug_ctrl[2] = {
@@ -157,6 +159,16 @@ char *saturate[2] = {
[1] = ".sat"
};
+char *accwr[2] = {
+ [0] = "",
+ [1] = "AccWrEnable"
+};
+
+char *wectrl[2] = {
+ [0] = "WE_normal",
+ [1] = "WE_all"
+};
+
char *exec_size[8] = {
[0] = "1",
[1] = "2",
@@ -204,6 +216,7 @@ char *compr_ctrl[4] = {
[0] = "",
[1] = "sechalf",
[2] = "compr",
+ [3] = "compr4",
};
char *dep_ctrl[4] = {
@@ -233,6 +246,16 @@ char *reg_encoding[8] = {
[7] = "F"
};
+int reg_type_size[8] = {
+ [0] = 4,
+ [1] = 4,
+ [2] = 2,
+ [3] = 2,
+ [4] = 1,
+ [5] = 1,
+ [7] = 4
+};
+
char *imm_encoding[8] = {
[0] = "UD",
[1] = "D",
@@ -321,6 +344,11 @@ char *math_precision[2] = {
[1] = "partial_precision"
};
+char *urb_opcode[2] = {
+ [0] = "urb_write",
+ [1] = "ff_sync",
+};
+
char *urb_swizzle[4] = {
[BRW_URB_SWIZZLE_NONE] = "",
[BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
@@ -416,6 +444,11 @@ static int print_opcode (FILE *file, int id)
static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
{
int err = 0;
+
+ /* Clear the Compr4 instruction compression bit. */
+ if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ _reg_nr &= ~(1 << 7);
+
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:
@@ -427,6 +460,9 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
case BRW_ARF_ACCUMULATOR:
format (file, "acc%d", _reg_nr & 0x0f);
break;
+ case BRW_ARF_FLAG:
+ format (file, "f%d", _reg_nr & 0x0f);
+ break;
case BRW_ARF_MASK:
format (file, "mask%d", _reg_nr & 0x0f);
break;
@@ -457,7 +493,7 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
return err;
}
-static int dest (FILE *file, const struct brw_instruction *inst)
+static int dest (FILE *file, struct brw_instruction *inst)
{
int err = 0;
@@ -469,7 +505,8 @@ static int dest (FILE *file, const struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+ reg_type_size[inst->bits1.da1.dest_reg_type]);
format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
}
@@ -477,7 +514,8 @@ static int dest (FILE *file, const struct brw_instruction *inst)
{
string (file, "g[a0");
if (inst->bits1.ia1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+ reg_type_size[inst->bits1.ia1.dest_reg_type]);
if (inst->bits1.ia1.dest_indirect_offset)
format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
string (file, "]");
@@ -493,7 +531,8 @@ static int dest (FILE *file, const struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da16.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+ reg_type_size[inst->bits1.da16.dest_reg_type]);
string (file, "<1>");
err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
@@ -534,7 +573,7 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
if (err == -1)
return 0;
if (sub_reg_num)
- format (file, ".%d", sub_reg_num);
+ format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
src_align1_region (file, _vert_stride, _width, _horiz_stride);
err |= control (file, "src reg encoding", reg_encoding, type, NULL);
return err;
@@ -588,11 +627,12 @@ static int src_da16 (FILE *file,
if (err == -1)
return 0;
if (_subreg_nr)
- format (file, ".%d", _subreg_nr);
+ /* bit4 for subreg number byte addressing. Make this same meaning as
+ in da1 case, so output looks consistent. */
+ format (file, ".%d", 16 / reg_type_size[_reg_type]);
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
- string (file, ",1,1>");
- err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ string (file, ",4,1>");
/*
* Three kinds of swizzle display:
* identity - nothing printed
@@ -619,11 +659,12 @@ static int src_da16 (FILE *file,
err |= control (file, "channel select", chan_sel, swz_z, NULL);
err |= control (file, "channel select", chan_sel, swz_w, NULL);
}
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
return err;
}
-static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) {
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
switch (type) {
case BRW_REGISTER_TYPE_UD:
format (file, "0x%08xUD", inst->bits3.ud);
@@ -652,7 +693,7 @@ static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) {
return 0;
}
-static int src0 (FILE *file, const struct brw_instruction *inst)
+static int src0 (FILE *file, struct brw_instruction *inst)
{
if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src0_reg_type,
@@ -712,7 +753,7 @@ static int src0 (FILE *file, const struct brw_instruction *inst)
}
}
-static int src1 (FILE *file, const struct brw_instruction *inst)
+static int src1 (FILE *file, struct brw_instruction *inst)
{
if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src1_reg_type,
@@ -772,7 +813,7 @@ static int src1 (FILE *file, const struct brw_instruction *inst)
}
}
-int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
+int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen)
{
int err = 0;
int space = 0;
@@ -822,7 +863,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
err |= src1 (file, inst);
}
- if (inst->header.opcode == BRW_OPCODE_SEND) {
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC) {
newline (file);
pad (file, 16);
space = 0;
@@ -842,24 +884,70 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
inst->bits3.math.precision, &space);
break;
case BRW_MESSAGE_TARGET_SAMPLER:
- format (file, " (%d, %d, ",
- inst->bits3.sampler.binding_table_index,
- inst->bits3.sampler.sampler);
- err |= control (file, "sampler target format", sampler_target_format,
- inst->bits3.sampler.return_format, NULL);
- string (file, ")");
+ if (gen >= 5) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen5.binding_table_index,
+ inst->bits3.sampler_gen5.sampler,
+ inst->bits3.sampler_gen5.msg_type,
+ inst->bits3.sampler_gen5.simd_mode);
+ } else if (0 /* FINISHME: is_g4x */) {
+ format (file, " (%d, %d)",
+ inst->bits3.sampler_g4x.binding_table_index,
+ inst->bits3.sampler_g4x.sampler);
+ } else {
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format", sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ }
+ break;
+ case BRW_MESSAGE_TARGET_DATAPORT_READ:
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d, %d, %d)",
+ inst->bits3.dp_render_cache.binding_table_index,
+ inst->bits3.dp_render_cache.msg_control,
+ inst->bits3.dp_render_cache.msg_type,
+ inst->bits3.dp_render_cache.send_commit_msg,
+ inst->bits3.dp_render_cache.msg_length,
+ inst->bits3.dp_render_cache.response_length);
+ } else if (gen >= 5 /* FINISHME: || is_g4x */) {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read_gen5.binding_table_index,
+ inst->bits3.dp_read_gen5.msg_control,
+ inst->bits3.dp_read_gen5.msg_type);
+ } else {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read.binding_table_index,
+ inst->bits3.dp_read.msg_control,
+ inst->bits3.dp_read.msg_type);
+ }
break;
case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
- format (file, " (%d, %d, %d, %d)",
- inst->bits3.dp_write.binding_table_index,
- (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
- inst->bits3.dp_write.msg_control,
- inst->bits3.dp_write.msg_type,
- inst->bits3.dp_write.send_commit_msg);
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d, %d, %d)",
+ inst->bits3.dp_render_cache.binding_table_index,
+ inst->bits3.dp_render_cache.msg_control,
+ inst->bits3.dp_render_cache.msg_type,
+ inst->bits3.dp_render_cache.send_commit_msg,
+ inst->bits3.dp_render_cache.msg_length,
+ inst->bits3.dp_render_cache.response_length);
+ } else {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ }
break;
case BRW_MESSAGE_TARGET_URB:
- format (file, " %d", inst->bits3.urb.offset);
- space = 1;
+ if (gen >= 5) {
+ format (file, " %d", inst->bits3.urb_gen5.offset);
+ } else {
+ format (file, " %d", inst->bits3.urb.offset);
+ }
err |= control (file, "urb swizzle", urb_swizzle,
inst->bits3.urb.swizzle_control, &space);
err |= control (file, "urb allocate", urb_allocate,
@@ -868,6 +956,11 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
inst->bits3.urb.used, &space);
err |= control (file, "urb complete", urb_complete,
inst->bits3.urb.complete, &space);
+ if (gen >= 5) {
+ format (file, " mlen %d, rlen %d\n",
+ inst->bits3.urb_gen5.msg_length,
+ inst->bits3.urb_gen5.response_length);
+ }
break;
case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
break;
@@ -877,10 +970,17 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
}
if (space)
string (file, " ");
- format (file, "mlen %d",
- inst->bits3.generic.msg_length);
- format (file, " rlen %d",
- inst->bits3.generic.response_length);
+ if (gen >= 5) {
+ format (file, "mlen %d",
+ inst->bits3.generic_gen5.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic_gen5.response_length);
+ } else {
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
}
pad (file, 64);
if (inst->header.opcode != BRW_OPCODE_NOP) {
@@ -891,7 +991,8 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
- if (inst->header.opcode == BRW_OPCODE_SEND)
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC)
err |= control (file, "end of thread", end_of_thread,
inst->bits3.generic.end_of_thread, &space);
if (space)
@@ -905,13 +1006,13 @@ int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
int brw_disasm (FILE *file,
- const struct brw_instruction *inst,
- unsigned count)
+ struct brw_instruction *inst,
+ unsigned count, int gen)
{
int i, err;
for (i = 0; i < count; i++) {
- err = brw_disasm_insn(stderr, &inst[i]);
+ err = brw_disasm_insn(stderr, &inst[i], gen);
if (err)
return err;
}
diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h
index ba5b109c483..ce451ed5a06 100644
--- a/src/gallium/drivers/i965/brw_disasm.h
+++ b/src/gallium/drivers/i965/brw_disasm.h
@@ -27,10 +27,10 @@
struct brw_instruction;
-int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
+int brw_disasm_insn (FILE *file, struct brw_instruction *inst, int gen);
int brw_disasm (FILE *file,
- const struct brw_instruction *inst,
- unsigned count);
+ struct brw_instruction *inst,
+ unsigned count, int gen);
#endif
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index cf9405470c8..04ec5c81a6b 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -170,7 +170,7 @@ static int brw_emit_vertex_buffers( struct brw_context *brw )
OUT_RELOC(brw->vb.vb[i].bo,
BRW_USAGE_VERTEX,
brw->vb.vb[i].offset);
- if (BRW_IS_IGDNG(brw)) {
+ if (brw->gen == 5) {
OUT_RELOC(brw->vb.vb[i].bo,
BRW_USAGE_VERTEX,
brw->vb.vb[i].bo->size - 1);
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
index 00d8eaccbc4..ba1159e4c32 100644
--- a/src/gallium/drivers/i965/brw_eu_emit.c
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -255,19 +255,19 @@ static void brw_set_math_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.math_igdng.function = function;
- insn->bits3.math_igdng.int_type = integer_type;
- insn->bits3.math_igdng.precision = low_precision;
- insn->bits3.math_igdng.saturate = saturate;
- insn->bits3.math_igdng.data_type = dataType;
- insn->bits3.math_igdng.snapshot = 0;
- insn->bits3.math_igdng.header_present = 0;
- insn->bits3.math_igdng.response_length = response_length;
- insn->bits3.math_igdng.msg_length = msg_length;
- insn->bits3.math_igdng.end_of_thread = 0;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
- insn->bits2.send_igdng.end_of_thread = 0;
+ if (brw->gen == 5) {
+ insn->bits3.math_gen5.function = function;
+ insn->bits3.math_gen5.int_type = integer_type;
+ insn->bits3.math_gen5.precision = low_precision;
+ insn->bits3.math_gen5.saturate = saturate;
+ insn->bits3.math_gen5.data_type = dataType;
+ insn->bits3.math_gen5.snapshot = 0;
+ insn->bits3.math_gen5.header_present = 0;
+ insn->bits3.math_gen5.response_length = response_length;
+ insn->bits3.math_gen5.msg_length = msg_length;
+ insn->bits3.math_gen5.end_of_thread = 0;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
+ insn->bits2.send_gen5.end_of_thread = 0;
} else {
insn->bits3.math.function = function;
insn->bits3.math.int_type = integer_type;
@@ -295,18 +295,18 @@ static void brw_set_ff_sync_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.urb_igdng.opcode = 1;
- insn->bits3.urb_igdng.offset = offset;
- insn->bits3.urb_igdng.swizzle_control = swizzle_control;
- insn->bits3.urb_igdng.allocate = allocate;
- insn->bits3.urb_igdng.used = used;
- insn->bits3.urb_igdng.complete = complete;
- insn->bits3.urb_igdng.header_present = 1;
- insn->bits3.urb_igdng.response_length = response_length;
- insn->bits3.urb_igdng.msg_length = msg_length;
- insn->bits3.urb_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ insn->bits3.urb_gen5.opcode = 1;
+ insn->bits3.urb_gen5.offset = offset;
+ insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+ insn->bits3.urb_gen5.allocate = allocate;
+ insn->bits3.urb_gen5.used = used;
+ insn->bits3.urb_gen5.complete = complete;
+ insn->bits3.urb_gen5.header_present = 1;
+ insn->bits3.urb_gen5.response_length = response_length;
+ insn->bits3.urb_gen5.msg_length = msg_length;
+ insn->bits3.urb_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
}
static void brw_set_urb_message( struct brw_context *brw,
@@ -322,19 +322,19 @@ static void brw_set_urb_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.urb_igdng.opcode = 0; /* ? */
- insn->bits3.urb_igdng.offset = offset;
- insn->bits3.urb_igdng.swizzle_control = swizzle_control;
- insn->bits3.urb_igdng.allocate = allocate;
- insn->bits3.urb_igdng.used = used; /* ? */
- insn->bits3.urb_igdng.complete = complete;
- insn->bits3.urb_igdng.header_present = 1;
- insn->bits3.urb_igdng.response_length = response_length;
- insn->bits3.urb_igdng.msg_length = msg_length;
- insn->bits3.urb_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ if (brw->gen == 5) {
+ insn->bits3.urb_gen5.opcode = 0; /* ? */
+ insn->bits3.urb_gen5.offset = offset;
+ insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+ insn->bits3.urb_gen5.allocate = allocate;
+ insn->bits3.urb_gen5.used = used; /* ? */
+ insn->bits3.urb_gen5.complete = complete;
+ insn->bits3.urb_gen5.header_present = 1;
+ insn->bits3.urb_gen5.response_length = response_length;
+ insn->bits3.urb_gen5.msg_length = msg_length;
+ insn->bits3.urb_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else {
insn->bits3.urb.opcode = 0; /* ? */
insn->bits3.urb.offset = offset;
@@ -361,18 +361,18 @@ static void brw_set_dp_write_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
- insn->bits3.dp_write_igdng.msg_control = msg_control;
- insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
- insn->bits3.dp_write_igdng.msg_type = msg_type;
- insn->bits3.dp_write_igdng.send_commit_msg = 0;
- insn->bits3.dp_write_igdng.header_present = 1;
- insn->bits3.dp_write_igdng.response_length = response_length;
- insn->bits3.dp_write_igdng.msg_length = msg_length;
- insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ if (brw->gen == 5) {
+ insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
+ insn->bits3.dp_write_gen5.msg_control = msg_control;
+ insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write_gen5.msg_type = msg_type;
+ insn->bits3.dp_write_gen5.send_commit_msg = 0;
+ insn->bits3.dp_write_gen5.header_present = 1;
+ insn->bits3.dp_write_gen5.response_length = response_length;
+ insn->bits3.dp_write_gen5.msg_length = msg_length;
+ insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else {
insn->bits3.dp_write.binding_table_index = binding_table_index;
insn->bits3.dp_write.msg_control = msg_control;
@@ -398,18 +398,18 @@ static void brw_set_dp_read_message( struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
- insn->bits3.dp_read_igdng.msg_control = msg_control;
- insn->bits3.dp_read_igdng.msg_type = msg_type;
- insn->bits3.dp_read_igdng.target_cache = target_cache;
- insn->bits3.dp_read_igdng.header_present = 1;
- insn->bits3.dp_read_igdng.response_length = response_length;
- insn->bits3.dp_read_igdng.msg_length = msg_length;
- insn->bits3.dp_read_igdng.pad1 = 0;
- insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ if (brw->gen == 5) {
+ insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
+ insn->bits3.dp_read_gen5.msg_control = msg_control;
+ insn->bits3.dp_read_gen5.msg_type = msg_type;
+ insn->bits3.dp_read_gen5.target_cache = target_cache;
+ insn->bits3.dp_read_gen5.header_present = 1;
+ insn->bits3.dp_read_gen5.response_length = response_length;
+ insn->bits3.dp_read_gen5.msg_length = msg_length;
+ insn->bits3.dp_read_gen5.pad1 = 0;
+ insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else {
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
@@ -437,18 +437,18 @@ static void brw_set_sampler_message(struct brw_context *brw,
assert(eot == 0);
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
- insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
- insn->bits3.sampler_igdng.sampler = sampler;
- insn->bits3.sampler_igdng.msg_type = msg_type;
- insn->bits3.sampler_igdng.simd_mode = simd_mode;
- insn->bits3.sampler_igdng.header_present = header_present;
- insn->bits3.sampler_igdng.response_length = response_length;
- insn->bits3.sampler_igdng.msg_length = msg_length;
- insn->bits3.sampler_igdng.end_of_thread = eot;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
- insn->bits2.send_igdng.end_of_thread = eot;
- } else if (BRW_IS_G4X(brw)) {
+ if (brw->gen == 5) {
+ insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
+ insn->bits3.sampler_gen5.sampler = sampler;
+ insn->bits3.sampler_gen5.msg_type = msg_type;
+ insn->bits3.sampler_gen5.simd_mode = simd_mode;
+ insn->bits3.sampler_gen5.header_present = header_present;
+ insn->bits3.sampler_gen5.response_length = response_length;
+ insn->bits3.sampler_gen5.msg_length = msg_length;
+ insn->bits3.sampler_gen5.end_of_thread = eot;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+ insn->bits2.send_gen5.end_of_thread = eot;
+ } else if (brw->is_g4x) {
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
insn->bits3.sampler_g4x.sampler = sampler;
insn->bits3.sampler_g4x.msg_type = msg_type;
@@ -478,7 +478,7 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
if (0 && (BRW_DEBUG & DEBUG_DISASSEM))
{
if (p->nr_insn)
- brw_disasm_insn(stderr, &p->store[p->nr_insn-1]);
+ brw_disasm_insn(stderr, &p->store[p->nr_insn-1], p->brw->gen);
}
assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
@@ -658,7 +658,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
struct brw_instruction *insn;
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
br = 2;
if (p->single_program_flow) {
@@ -699,7 +699,7 @@ void brw_ENDIF(struct brw_compile *p,
{
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
br = 2;
if (p->single_program_flow) {
@@ -813,7 +813,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *insn;
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
br = 2;
if (p->single_program_flow)
@@ -856,7 +856,7 @@ void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *landing = &p->store[p->nr_insn];
GLuint jmpi = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
jmpi = 2;
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index 06826635a8a..2a8165b83ee 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -51,13 +51,13 @@ static enum pipe_error compile_gs_prog( struct brw_context *brw,
memset(&c, 0, sizeof(c));
c.key = *key;
- c.need_ff_sync = BRW_IS_IGDNG(brw);
+ c.need_ff_sync = brw->gen == 5;
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.nr_attrs = c.key.nr_attrs;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
else
c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index b64ec286cea..6e070f6d756 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -103,7 +103,7 @@ gs_unit_create_from_key(struct brw_context *brw,
else
gs.thread4.max_threads = 0;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
gs.thread4.rendering_enable = 1;
if (BRW_DEBUG & DEBUG_STATS)
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index c635d696617..d53ce6ccfd4 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -239,7 +239,7 @@ static int prepare_depthbuffer(struct brw_context *brw)
static int emit_depthbuffer(struct brw_context *brw)
{
struct pipe_surface *surface = brw->curr.fb.zsbuf;
- unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
+ unsigned int len = (brw->is_g4x || brw->gen == 5) ? 6 : 5;
if (surface == NULL) {
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
@@ -250,7 +250,7 @@ static int emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(0);
OUT_BATCH(0);
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -298,7 +298,7 @@ static int emit_depthbuffer(struct brw_context *brw)
((surface->height - 1) << 19));
OUT_BATCH(0);
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -374,7 +374,7 @@ static int upload_invariant_state( struct brw_context *brw )
struct brw_pipeline_select ps;
memset(&ps, 0, sizeof(ps));
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
ps.header.opcode = CMD_PIPELINE_SELECT_GM45;
else
ps.header.opcode = CMD_PIPELINE_SELECT_965;
@@ -413,7 +413,7 @@ static int upload_invariant_state( struct brw_context *brw )
struct brw_vf_statistics vfs;
memset(&vfs, 0, sizeof(vfs));
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (brw->is_g4x || brw->gen == 5)
vfs.opcode = CMD_VF_STATISTICS_GM45;
else
vfs.opcode = CMD_VF_STATISTICS_965;
@@ -424,7 +424,7 @@ static int upload_invariant_state( struct brw_context *brw )
BRW_BATCH_STRUCT(brw, &vfs);
}
- if (!BRW_IS_965(brw))
+ if (!(brw->gen == 4))
{
struct brw_aa_line_parameters balp;
@@ -480,7 +480,7 @@ static int upload_state_base_address( struct brw_context *brw )
/* Output the structure (brw_state_base_address) directly to the
* batchbuffer, so we can emit relocations inline.
*/
- if (BRW_IS_IGDNG(brw)) {
+ if (brw->gen == 5) {
BEGIN_BATCH(8, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
OUT_BATCH(1); /* General state base address */
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
index 4c1a6d7dcdf..c86681d1495 100644
--- a/src/gallium/drivers/i965/brw_pipe_rast.c
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -35,7 +35,7 @@ calculate_clip_key_rast( const struct brw_context *brw,
{
memset(key, 0, sizeof *key);
- if (brw->chipset.is_igdng)
+ if (brw->gen == 5)
key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
else
key->clip_mode = BRW_CLIPMODE_NORMAL;
diff --git a/src/gallium/drivers/i965/brw_pipe_surface.c b/src/gallium/drivers/i965/brw_pipe_surface.c
index 4deead98b19..58a610089e2 100644
--- a/src/gallium/drivers/i965/brw_pipe_surface.c
+++ b/src/gallium/drivers/i965/brw_pipe_surface.c
@@ -169,20 +169,15 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
surface->ss.ss1.base_addr = surface->offset - tile_offset;
- if (brw_screen->chipset.is_g4x) {
- if (tex->tiling == BRW_TILING_X) {
- /* Note that the low bits of these fields are missing, so
- * there's the possibility of getting in trouble.
- */
- surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4;
- surface->ss.ss5.y_offset = tile_offset / 512 / 2;
- } else {
- surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4;
+ if (tex->tiling == BRW_TILING_X) {
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4;
+ surface->ss.ss5.y_offset = tile_offset / 512 / 2;
+ } else {
+ surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4;
surface->ss.ss5.y_offset = tile_offset / 128 / 2;
- }
- }
- else {
- assert(tile_offset == 0);
}
}
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index e1697687ccc..b23454b5808 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -203,7 +203,7 @@ static void brw_translate_vertex_elements(struct brw_context *brw,
brw_velems->ve[i].ve1.vfcomponent2 = comp2;
brw_velems->ve[i].ve1.vfcomponent3 = comp3;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
brw_velems->ve[i].ve1.dst_offset = 0;
else
brw_velems->ve[i].ve1.dst_offset = i * 4;
diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h
index ba10f9d5df1..53c7c435713 100644
--- a/src/gallium/drivers/i965/brw_reg.h
+++ b/src/gallium/drivers/i965/brw_reg.h
@@ -93,18 +93,54 @@
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G41_G 0x2E32
#define PCI_CHIP_B43_G 0x2E42
+#define PCI_CHIP_B43_G1 0x2E92
#define PCI_CHIP_ILD_G 0x0042
#define PCI_CHIP_ILM_G 0x0046
-struct brw_chipset {
- unsigned pci_id:16;
- unsigned is_965:1;
- unsigned is_igdng:1;
- unsigned is_g4x:1;
- unsigned pad:13;
-};
-
+#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
+#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+ devid == PCI_CHIP_Q45_G || \
+ devid == PCI_CHIP_G45_G || \
+ devid == PCI_CHIP_G41_G || \
+ devid == PCI_CHIP_B43_G || \
+ devid == PCI_CHIP_B43_G1)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \
+ devid == PCI_CHIP_I965_Q || \
+ devid == PCI_CHIP_I965_G_1 || \
+ devid == PCI_CHIP_I965_GM || \
+ devid == PCI_CHIP_I965_GME || \
+ devid == PCI_CHIP_I946_GZ || \
+ IS_G4X(devid))
+
+#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G)
+#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid))
+
+#define IS_IRONLAKE(devid) IS_GEN5(devid)
+
+#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+ devid == PCI_CHIP_SANDYBRIDGE_S)
+
+#define IS_965(devid) (IS_GEN4(devid) || \
+ IS_G4X(devid) || \
+ IS_GEN5(devid) || \
+ IS_GEN6(devid))
/* XXX: hacks
*/
diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c
index fded2da3820..0cb895f35de 100644
--- a/src/gallium/drivers/i965/brw_resource_texture.c
+++ b/src/gallium/drivers/i965/brw_resource_texture.c
@@ -392,7 +392,7 @@ brw_texture_create( struct pipe_screen *screen,
if (tex->compressed == 0 &&
!bscreen->no_tiling)
{
- if (bscreen->chipset.is_965 &&
+ if (bscreen->gen < 5 &&
util_format_is_depth_or_stencil(template->format))
tex->tiling = BRW_TILING_Y;
else
diff --git a/src/gallium/drivers/i965/brw_resource_texture_layout.c b/src/gallium/drivers/i965/brw_resource_texture_layout.c
index 2187bdd82ce..afecc77e312 100644
--- a/src/gallium/drivers/i965/brw_resource_texture_layout.c
+++ b/src/gallium/drivers/i965/brw_resource_texture_layout.c
@@ -388,7 +388,7 @@ GLboolean brw_texture_layout(struct brw_screen *brw_screen,
{
switch (tex->b.b.target) {
case PIPE_TEXTURE_CUBE:
- if (brw_screen->chipset.is_igdng)
+ if (brw_screen->gen == 5)
brw_layout_cubemap_idgng( tex );
else
brw_layout_3d_cube( tex );
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index f5b75b17e36..bf805fd080c 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -97,7 +97,7 @@ brw_get_name(struct pipe_screen *screen)
static char buffer[128];
const char *chipset;
- switch (brw_screen(screen)->chipset.pci_id) {
+ switch (brw_screen(screen)->pci_id) {
case PCI_CHIP_I965_G:
chipset = "I965_G";
break;
@@ -405,8 +405,6 @@ struct pipe_screen *
brw_screen_create(struct brw_winsys_screen *sws)
{
struct brw_screen *bscreen;
- struct brw_chipset chipset;
-
#ifdef DEBUG
BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
@@ -415,46 +413,30 @@ brw_screen_create(struct brw_winsys_screen *sws)
BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0);
#endif
- memset(&chipset, 0, sizeof chipset);
-
- chipset.pci_id = sws->pci_id;
-
- switch (chipset.pci_id) {
- case PCI_CHIP_I965_G:
- case PCI_CHIP_I965_Q:
- case PCI_CHIP_I965_G_1:
- case PCI_CHIP_I946_GZ:
- case PCI_CHIP_I965_GM:
- case PCI_CHIP_I965_GME:
- chipset.is_965 = TRUE;
- break;
-
- case PCI_CHIP_GM45_GM:
- case PCI_CHIP_IGD_E_G:
- case PCI_CHIP_Q45_G:
- case PCI_CHIP_G45_G:
- case PCI_CHIP_G41_G:
- case PCI_CHIP_B43_G:
- chipset.is_g4x = TRUE;
- break;
-
- case PCI_CHIP_ILD_G:
- case PCI_CHIP_ILM_G:
- chipset.is_igdng = TRUE;
- break;
+ bscreen = CALLOC_STRUCT(brw_screen);
+ if (!bscreen)
+ return NULL;
- default:
+ bscreen->pci_id = sws->pci_id;
+ if (IS_GEN6(sws->pci_id)) {
+ bscreen->gen = 6;
+ bscreen->needs_ff_sync = TRUE;
+ } else if (IS_GEN5(sws->pci_id)) {
+ bscreen->gen = 5;
+ bscreen->needs_ff_sync = TRUE;
+ } else if (IS_965(sws->pci_id)) {
+ bscreen->gen = 4;
+ if (IS_G4X(sws->pci_id)) {
+ bscreen->is_g4x = true;
+ }
+ } else {
debug_printf("%s: unknown pci id 0x%x, cannot create screen\n",
- __FUNCTION__, chipset.pci_id);
+ __FUNCTION__, sws->pci_id);
+ free(bscreen);
return NULL;
}
-
- bscreen = CALLOC_STRUCT(brw_screen);
- if (!bscreen)
- return NULL;
-
- bscreen->chipset = chipset;
+ sws->gen = bscreen->gen;
bscreen->sws = sws;
bscreen->base.winsys = NULL;
bscreen->base.destroy = brw_destroy_screen;
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index 58e293bc76f..a62e1afc405 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -43,7 +43,11 @@ struct brw_winsys_screen;
struct brw_screen
{
struct pipe_screen base;
- struct brw_chipset chipset;
+ int gen;
+ boolean has_negative_rhw_bug;
+ boolean needs_ff_sync;
+ boolean is_g4x;
+ int pci_id;
struct brw_winsys_screen *sws;
boolean no_tiling;
};
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
index 497634ec9ed..901c3341642 100644
--- a/src/gallium/drivers/i965/brw_sf_emit.c
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -161,7 +161,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
jmpi = 2;
brw_push_insn_state(p);
@@ -205,7 +205,7 @@ static void do_flatshade_line( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
- if (BRW_IS_IGDNG(p->brw))
+ if (p->brw->gen == 5)
jmpi = 2;
brw_push_insn_state(p);
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 6c299a86b49..eec024650ce 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -148,7 +148,7 @@ sf_unit_create_from_key(struct brw_context *brw,
sf.thread3.dispatch_grf_start_reg = 3;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
sf.thread3.urb_entry_read_offset = 3;
else
sf.thread3.urb_entry_read_offset = 1;
@@ -161,7 +161,7 @@ sf_unit_create_from_key(struct brw_context *brw,
/* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
* 48(IGDNG) threads
*/
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
chipset_max_threads = 48;
else
chipset_max_threads = 24;
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
index e97ddeb5e1c..b0d75b4f828 100644
--- a/src/gallium/drivers/i965/brw_structs.h
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -279,7 +279,7 @@ struct brw_aa_line_parameters
struct header header;
struct {
- GLuint aa_coverage_scope:8;
+ GLuint aa_coverage_slope:8;
GLuint pad0:8;
GLuint aa_coverage_bias:8;
GLuint pad1:8;
@@ -659,7 +659,105 @@ struct brw_clip_unit_state
GLfloat viewport_ymax;
};
+struct gen6_blend_state
+{
+ struct {
+ GLuint dest_blend_factor:5;
+ GLuint source_blend_factor:5;
+ GLuint pad3:1;
+ GLuint blend_func:3;
+ GLuint pad2:1;
+ GLuint ia_dest_blend_factor:5;
+ GLuint ia_source_blend_factor:5;
+ GLuint pad1:1;
+ GLuint ia_blend_func:3;
+ GLuint pad0:1;
+ GLuint ia_blend_enable:1;
+ GLuint blend_enable:1;
+ } blend0;
+
+ struct {
+ GLuint post_blend_clamp_enable:1;
+ GLuint pre_blend_clamp_enable:1;
+ GLuint clamp_range:2;
+ GLuint pad0:4;
+ GLuint x_dither_offset:2;
+ GLuint y_dither_offset:2;
+ GLuint dither_enable:1;
+ GLuint alpha_test_func:3;
+ GLuint alpha_test_enable:1;
+ GLuint pad1:1;
+ GLuint logic_op_func:4;
+ GLuint logic_op_enable:1;
+ GLuint pad2:1;
+ GLuint write_disable_b:1;
+ GLuint write_disable_g:1;
+ GLuint write_disable_r:1;
+ GLuint write_disable_a:1;
+ GLuint pad3:1;
+ GLuint alpha_to_coverage_dither:1;
+ GLuint alpha_to_one:1;
+ GLuint alpha_to_coverage:1;
+ } blend1;
+};
+struct gen6_color_calc_state
+{
+ struct {
+ GLuint alpha_test_format:1;
+ GLuint pad0:14;
+ GLuint round_disable:1;
+ GLuint bf_stencil_ref:8;
+ GLuint stencil_ref:8;
+ } cc0;
+
+ union {
+ GLfloat alpha_ref_f;
+ struct {
+ GLuint ui:8;
+ GLuint pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ GLfloat constant_r;
+ GLfloat constant_g;
+ GLfloat constant_b;
+ GLfloat constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ GLuint pad0:3;
+ GLuint bf_stencil_pass_depth_pass_op:3;
+ GLuint bf_stencil_pass_depth_fail_op:3;
+ GLuint bf_stencil_fail_op:3;
+ GLuint bf_stencil_func:3;
+ GLuint bf_stencil_enable:1;
+ GLuint pad1:2;
+ GLuint stencil_write_enable:1;
+ GLuint stencil_pass_depth_pass_op:3;
+ GLuint stencil_pass_depth_fail_op:3;
+ GLuint stencil_fail_op:3;
+ GLuint stencil_func:3;
+ GLuint stencil_enable:1;
+ } ds0;
+
+ struct {
+ GLuint bf_stencil_write_mask:8;
+ GLuint bf_stencil_test_mask:8;
+ GLuint stencil_write_mask:8;
+ GLuint stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ GLuint pad0:26;
+ GLuint depth_write_enable:1;
+ GLuint depth_test_func:3;
+ GLuint pad1:1;
+ GLuint depth_test_enable:1;
+ } ds2;
+};
struct brw_cc_unit_state
{
@@ -814,6 +912,13 @@ struct brw_sf_unit_state
};
+struct gen6_scissor_rect
+{
+ GLuint xmin:16;
+ GLuint ymin:16;
+ GLuint xmax:16;
+ GLuint ymax:16;
+};
struct brw_gs_unit_state
{
@@ -825,7 +930,7 @@ struct brw_gs_unit_state
struct
{
GLuint pad0:8;
- GLuint rendering_enable:1; /* for IGDNG */
+ GLuint rendering_enable:1; /* for Ironlake */
GLuint pad4:1;
GLuint stats_enable:1;
GLuint nr_urb_entries:7;
@@ -935,7 +1040,7 @@ struct brw_wm_unit_state
GLfloat global_depth_offset_constant;
GLfloat global_depth_offset_scale;
- /* for IGDNG only */
+ /* for Ironlake only */
struct {
GLuint pad0:1;
GLuint grf_reg_count_1:3;
@@ -962,6 +1067,15 @@ struct brw_sampler_default_color {
GLfloat color[4];
};
+struct gen5_sampler_default_color {
+ uint8_t ub[4];
+ float f[4];
+ uint16_t hf[4];
+ uint16_t us[4];
+ int16_t s[4];
+ uint8_t b[4];
+};
+
struct brw_sampler_state
{
@@ -973,7 +1087,7 @@ struct brw_sampler_state
GLuint mag_filter:3;
GLuint mip_filter:2;
GLuint base_level:5;
- GLuint pad:1;
+ GLuint min_mag_neq:1;
GLuint lod_preclamp:1;
GLuint default_color_mode:1;
GLuint pad0:1;
@@ -985,7 +1099,8 @@ struct brw_sampler_state
GLuint r_wrap_mode:3;
GLuint t_wrap_mode:3;
GLuint s_wrap_mode:3;
- GLuint pad:3;
+ GLuint cube_control_mode:1;
+ GLuint pad:2;
GLuint max_lod:10;
GLuint min_lod:10;
} ss1;
@@ -999,7 +1114,9 @@ struct brw_sampler_state
struct brw_ss3
{
- GLuint pad:19;
+ GLuint non_normalized_coord:1;
+ GLuint pad:12;
+ GLuint address_round:6;
GLuint max_aniso:3;
GLuint chroma_key_mode:1;
GLuint chroma_key_index:2;
@@ -1044,6 +1161,15 @@ struct brw_sf_viewport
} scissor;
};
+struct gen6_sf_viewport {
+ GLfloat m00;
+ GLfloat m11;
+ GLfloat m22;
+ GLfloat m30;
+ GLfloat m31;
+ GLfloat m32;
+};
+
/* Documented in the subsystem/shared-functions/sampler chapter...
*/
struct brw_surface_state
@@ -1055,7 +1181,12 @@ struct brw_surface_state
GLuint cube_neg_y:1;
GLuint cube_pos_x:1;
GLuint cube_neg_x:1;
- GLuint pad:4;
+ GLuint pad:2;
+ /* Required on gen6 for surfaces accessed through render cache messages.
+ */
+ GLuint render_cache_read_write:1;
+ /* Ironlake and newer: instead of replicating one of the texels */
+ GLuint cube_corner_average:1;
GLuint mipmap_layout_mode:1;
GLuint vert_line_stride_ofs:1;
GLuint vert_line_stride:1;
@@ -1202,7 +1333,8 @@ struct brw_instruction
GLuint predicate_inverse:1;
GLuint execution_size:3;
GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
- GLuint pad0:2;
+ GLuint acc_wr_control:1;
+ GLuint cmpt_control:1;
GLuint debug_control:1;
GLuint saturate:1;
} header;
@@ -1250,7 +1382,7 @@ struct brw_instruction
GLuint dest_writemask:4;
GLuint dest_subreg_nr:1;
GLuint dest_reg_nr:8;
- GLuint pad1:2;
+ GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} da16;
@@ -1264,9 +1396,21 @@ struct brw_instruction
GLuint dest_writemask:4;
GLint dest_indirect_offset:6;
GLuint dest_subreg_nr:3;
- GLuint pad1:2;
+ GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} ia16;
+
+ struct {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2;
+ GLuint src1_reg_type:3;
+ GLuint pad:1;
+
+ GLint jump_count:16;
+ } branch_gen6;
} bits1;
@@ -1339,7 +1483,7 @@ struct brw_instruction
GLuint end_of_thread:1;
GLuint pad1:1;
GLuint sfid:4;
- } send_igdng; /* for IGDNG only */
+ } send_gen5; /* for Ironlake only */
} bits2;
@@ -1413,6 +1557,21 @@ struct brw_instruction
GLuint pad0:12;
} if_else;
+ struct
+ {
+ /* Signed jump distance to the ip to jump to if all channels
+ * are disabled after the break or continue. It should point
+ * to the end of the innermost control flow block, as that's
+ * where some channel could get re-enabled.
+ */
+ int jip:16;
+
+ /* Signed jump distance to the location to resume execution
+ * of this channel if it's enabled for the break or continue.
+ */
+ int uip:16;
+ } break_cont;
+
struct {
GLuint function:4;
GLuint int_type:1;
@@ -1440,7 +1599,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } math_igdng;
+ } math_gen5;
struct {
GLuint binding_table_index:8;
@@ -1476,7 +1635,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } sampler_igdng;
+ } sampler_gen5;
struct brw_urb_immediate urb;
@@ -1494,7 +1653,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } urb_igdng;
+ } urb_gen5;
struct {
GLuint binding_table_index:8;
@@ -1510,6 +1669,18 @@ struct brw_instruction
struct {
GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint msg_type:3;
+ GLuint target_cache:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_read_g4x;
+
+ struct {
+ GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint msg_type:3;
GLuint target_cache:2;
@@ -1519,7 +1690,7 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } dp_read_igdng;
+ } dp_read_gen5;
struct {
GLuint binding_table_index:8;
@@ -1546,10 +1717,38 @@ struct brw_instruction
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } dp_write_igdng;
+ } dp_write_gen5;
+
+ /* Sandybridge DP for sample cache, constant cache, render cache */
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:5;
+ GLuint msg_type:3;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_sampler_const_cache;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint slot_group_select:1;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:4;
+ GLuint send_commit_msg:1;
+ GLuint pad0:1;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_render_cache;
struct {
- GLuint pad:16;
+ GLuint function_control:16;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
@@ -1557,14 +1756,15 @@ struct brw_instruction
GLuint end_of_thread:1;
} generic;
+ /* Of this struct, only end_of_thread is not present for gen6. */
struct {
- GLuint pad:19;
+ GLuint function_control:19;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
- } generic_igdng;
+ } generic_gen5;
GLint d;
GLuint ud;
diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c
index cd40fc6d618..f3de2f995b3 100644
--- a/src/gallium/drivers/i965/brw_structs_dump.c
+++ b/src/gallium/drivers/i965/brw_structs_dump.c
@@ -72,7 +72,7 @@ brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr)
{
debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
- debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope);
+ debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_slope);
debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias);
debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope);
debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias);
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
index 907ec56c6ca..b630752809e 100644
--- a/src/gallium/drivers/i965/brw_urb.c
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -147,7 +147,7 @@ static int recalculate_urb_fence( struct brw_context *brw )
brw->urb.constrained = 0;
- if (BRW_IS_IGDNG(brw)) {
+ if (brw->gen == 5) {
brw->urb.nr_vs_entries = 128;
brw->urb.nr_sf_entries = 48;
if (check_urb_layout(brw)) {
@@ -157,7 +157,7 @@ static int recalculate_urb_fence( struct brw_context *brw )
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
}
- } else if (BRW_IS_G4X(brw)) {
+ } else if (brw->is_g4x) {
brw->urb.nr_vs_entries = 64;
if (check_urb_layout(brw)) {
goto done;
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index 944d88c84cc..b6d1091618e 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -56,7 +56,6 @@ struct brw_vs_compile {
struct brw_compile func;
struct brw_vs_prog_key key;
struct brw_vs_prog_data prog_data;
- struct brw_chipset chipset;
struct brw_vertex_shader *vp;
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 5dcbd597ddc..559f0c61d8d 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -116,6 +116,7 @@ static boolean find_output_slot( struct brw_vs_compile *c,
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
+ struct brw_context *brw = c->func.brw;
GLuint i, reg = 0, subreg = 0, mrf;
int attributes_in_vue;
@@ -218,7 +219,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
*/
c->nr_outputs = c->prog_data.nr_outputs;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
mrf = 8;
else
mrf = 4;
@@ -333,7 +334,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
*/
attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
else
c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
@@ -1124,6 +1125,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
static void emit_vertex_write( struct brw_vs_compile *c)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_reg m0 = brw_message_reg(0);
struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
@@ -1143,7 +1145,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
*/
if (c->prog_data.writes_psiz ||
c->key.nr_userclip ||
- c->chipset.is_965)
+ brw->has_negative_rhw_bug)
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -1174,7 +1176,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (c->chipset.is_965) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p,
vec8(brw_null_reg()),
BRW_CONDITIONAL_L,
@@ -1202,7 +1204,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, offset(m0, 2), ndc);
- if (c->chipset.is_igdng) {
+ if (brw->gen == 5) {
/* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
/* m4, m5 contain the distances from vertex to the user clip planeXXX.
@@ -1339,6 +1341,7 @@ static void emit_insn(struct brw_vs_compile *c,
unsigned opcode = inst->Instruction.Opcode;
unsigned label = inst->Label.Label;
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_reg args[3], dst;
GLuint i;
@@ -1514,7 +1517,7 @@ static void emit_insn(struct brw_vs_compile *c,
c->loop_depth--;
- if (c->chipset.is_igdng)
+ if (brw->gen == 5)
br = 2;
inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]);
@@ -1652,6 +1655,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
if (BRW_DEBUG & DEBUG_VS) {
debug_printf("vs-native:\n");
- brw_disasm(stderr, p->store, p->nr_insn);
+ brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen);
}
}
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index dadbb622e4d..6d2ccfd6d98 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -100,7 +100,7 @@ vs_unit_create_from_key(struct brw_context *brw,
*/
vs.thread1.single_program_flow = 0;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
vs.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -111,16 +111,16 @@ vs_unit_create_from_key(struct brw_context *brw,
vs.thread3.urb_entry_read_offset = 0;
vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
else
vs.thread4.nr_urb_entries = key->nr_urb_entries;
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
chipset_max_threads = 72;
- else if (BRW_IS_G4X(brw))
+ else if (brw->is_g4x)
chipset_max_threads = 32;
else
chipset_max_threads = 16;
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index a06f8bb7d61..038f6f788a0 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -148,7 +148,7 @@ static INLINE void make_reloc(struct brw_winsys_reloc *reloc,
struct brw_winsys_screen {
unsigned pci_id;
-
+ int gen;
/**
* Buffer functions.
*/
@@ -282,7 +282,7 @@ void brw_dump_data( unsigned pci_id,
enum brw_buffer_data_type data_type,
unsigned offset,
const void *data,
- size_t size );
+ size_t size, int gen );
#endif
diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c
index f8f6a539bc9..b66b1cfccb6 100644
--- a/src/gallium/drivers/i965/brw_winsys_debug.c
+++ b/src/gallium/drivers/i965/brw_winsys_debug.c
@@ -9,7 +9,7 @@ void brw_dump_data( unsigned pci_id,
enum brw_buffer_data_type data_type,
unsigned offset,
const void *data,
- size_t size )
+ size_t size, int gen )
{
if (BRW_DUMP & DUMP_ASM) {
switch (data_type) {
@@ -18,7 +18,7 @@ void brw_dump_data( unsigned pci_id,
case BRW_DATA_GS_VS_PROG:
case BRW_DATA_GS_GS_PROG:
case BRW_DATA_GS_CLIP_PROG:
- brw_disasm( stderr, data, size / sizeof(struct brw_instruction) );
+ brw_disasm( stderr, (struct brw_instruction *)data, size / sizeof(struct brw_instruction), gen );
break;
default:
break;
@@ -77,7 +77,7 @@ void brw_dump_data( unsigned pci_id,
if (BRW_DUMP & DUMP_BATCH) {
switch (data_type) {
case BRW_DATA_BATCH_BUFFER:
- intel_decode(data, size / 4, offset, pci_id);
+ intel_decode(data, size / 4, offset, pci_id, 0);
break;
default:
break;
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 8f983a60ae8..6301062fd79 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -848,11 +848,11 @@ static void emit_tex( struct brw_wm_compile *c,
responseLength = 8; /* always */
- if (BRW_IS_IGDNG(p->brw)) {
+ if (p->brw->gen == 5) {
if (shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
} else {
if (shadow)
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
@@ -917,8 +917,8 @@ static void emit_txb( struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(8), coord[3]);
msgLength = 9;
- if (BRW_IS_IGDNG(p->brw))
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
+ if (p->brw->gen == 5)
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
else
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
@@ -1516,6 +1516,6 @@ void brw_wm_emit( struct brw_wm_compile *c )
if (BRW_DEBUG & DEBUG_WM) {
debug_printf("wm-native:\n");
- brw_disasm(stderr, p->store, p->nr_insn);
+ brw_disasm(stderr, p->store, p->nr_insn, p->brw->gen);
}
}
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index 3b3afc39d3c..fb8e40d928e 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -1607,7 +1607,7 @@ static void emit_txb(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
- if (BRW_IS_IGDNG(p->brw)) {
+ if (p->brw->gen == 5) {
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
} else {
/* Does it work well on SIMD8? */
@@ -1688,7 +1688,7 @@ static void emit_tex(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
}
- if (BRW_IS_IGDNG(p->brw)) {
+ if (p->brw->gen == 5) {
if (shadow)
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
else
@@ -1970,7 +1970,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_
struct brw_instruction *inst0, *inst1;
GLuint br = 1;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
br = 2;
loop_depth--;
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index efc2d96be13..a690003ecbd 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -70,9 +70,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
key->max_threads = 1;
else {
/* WM maximum threads is number of EUs times number of threads per EU. */
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
key->max_threads = 12 * 6;
- else if (BRW_IS_G4X(brw))
+ else if (brw->is_g4x)
key->max_threads = 10 * 5;
else
key->max_threads = 8 * 4;
@@ -155,7 +155,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
wm.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -174,7 +174,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
wm.wm4.sampler_count = 0; /* hardware requirement */
else
wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
@@ -277,7 +277,7 @@ static enum pipe_error upload_wm_unit( struct brw_context *brw )
grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
per_thread_scratch_space = key.total_scratch / 1024 - 1;
stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm;
- sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4;
+ sampler_count = brw->gen == 5 ? 0 :(key.sampler_count + 1) / 4;
/* Emit WM program relocation */
make_reloc(&reloc[nr_reloc++],
diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c
index 36c04a31655..1abe869f1ae 100644
--- a/src/gallium/drivers/i965/intel_decode.c
+++ b/src/gallium/drivers/i965/intel_decode.c
@@ -42,10 +42,11 @@
#include "util/u_memory.h"
#include "util/u_string.h"
+
#include "intel_decode.h"
+#include "brw_reg.h"
/*#include "intel_chipset.h"*/
-#define IS_965(x) 1 /* XXX */
#define IS_9XX(x) 1 /* XXX */
#define BUFFER_FAIL(_count, _len, _name) do { \
@@ -99,10 +100,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
} opcodes_mi[] = {
{ 0x08, 0, 1, 1, "MI_ARB_ON_OFF" },
{ 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+ { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" },
{ 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" },
{ 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
{ 0x04, 0, 1, 1, "MI_FLUSH" },
- { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" },
+ { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" },
{ 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
{ 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
{ 0x00, 0, 1, 1, "MI_NOOP" },
@@ -116,6 +118,11 @@ decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" },
};
+ switch ((data[0] & 0x1f800000) >> 23) {
+ case 0x0a:
+ instr_out(data, hw_offset, 0, "MI_BATCH_BUFFER_END\n");
+ return -1;
+ }
for (opcode = 0; opcode < Elements(opcodes_mi); opcode++) {
if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) {
@@ -308,9 +315,13 @@ decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
static int
decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
{
- switch ((data[0] & 0x00f80000) >> 19) {
+ uint32_t opcode;
+
+ opcode = (data[0] & 0x00f80000) >> 19;
+
+ switch (opcode) {
case 0x11:
- instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n");
+ instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n");
return 1;
case 0x10:
instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n");
@@ -326,7 +337,8 @@ decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
return 1;
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n",
+ opcode);
(*failures)++;
return 1;
}
@@ -384,7 +396,7 @@ i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask
sprintf(dstname, "oD%s%s", dstmask, sat);
break;
case 6:
- if (dst_nr > 2)
+ if (dst_nr > 3)
fprintf(out, "bad destination reg U%d\n", dst_nr);
sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
break;
@@ -455,7 +467,7 @@ i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
break;
case 6:
sprintf(name, "U%d", src_nr);
- if (src_nr > 2)
+ if (src_nr > 3)
fprintf(out, "bad src reg %s\n", name);
break;
default:
@@ -800,10 +812,14 @@ i915_decode_instruction(const uint32_t *data, uint32_t hw_offset,
}
static int
-decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
+decode_3d_1d(const uint32_t *data, int count,
+ uint32_t hw_offset,
+ uint32_t devid,
+ int *failures)
{
- unsigned int len, i, c, opcode, word, map, sampler, instr;
+ unsigned int len, i, c, idx, word, map, sampler, instr;
char *format;
+ uint32_t opcode;
struct {
uint32_t opcode;
@@ -814,7 +830,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
} opcodes_3d_1d[] = {
{ 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
{ 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" },
- { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
+ { 0x9c, 0, 7, 7, "3DSTATE_CLEAR_PARAMETERS" },
{ 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
{ 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
{ 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
@@ -822,7 +838,6 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
{ 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
{ 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
{ 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
- { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
{ 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
{ 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" },
{ 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" },
@@ -834,9 +849,11 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
{ 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" },
{ 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" },
{ 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" },
- };
+ }, *opcode_3d_1d;
- switch ((data[0] & 0x00ff0000) >> 16) {
+ opcode = (data[0] & 0x00ff0000) >> 16;
+
+ switch (opcode) {
case 0x07:
/* This instruction is unusual. A 0 length means just 1 DWORD instead of
* 2. The 0 length is specified in one place to be unsupported, but
@@ -891,26 +908,56 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
len = (data[0] & 0x0000000f) + 2;
i = 1;
- for (word = 0; word <= 7; word++) {
+ for (word = 0; word <= 8; word++) {
if (data[0] & (1 << (4 + word))) {
if (i >= count)
BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1");
/* save vertex state for decode */
- if (word == 2) {
- saved_s2_set = 1;
- saved_s2 = data[i];
- }
- if (word == 4) {
- saved_s4_set = 1;
- saved_s4 = data[i];
+ if (IS_9XX(devid)) {
+ if (word == 2) {
+ saved_s2_set = 1;
+ saved_s2 = data[i];
+ }
+ if (word == 4) {
+ saved_s4_set = 1;
+ saved_s4 = data[i];
+ }
}
instr_out(data, hw_offset, i++, "S%d\n", word);
}
}
if (len != i) {
- fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+ fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+ (*failures)++;
+ }
+ return len;
+ case 0x03:
+ instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
+ len = (data[0] & 0x0000000f) + 2;
+ i = 1;
+ for (word = 6; word <= 14; word++) {
+ if (data[0] & (1 << word)) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_2");
+
+ if (word == 6)
+ instr_out(data, hw_offset, i++, "TBCF\n");
+ else if (word >= 7 && word <= 10) {
+ instr_out(data, hw_offset, i++, "TB%dC\n", word - 7);
+ instr_out(data, hw_offset, i++, "TB%dA\n", word - 7);
+ } else if (word >= 11 && word <= 14) {
+ instr_out(data, hw_offset, i++, "TM%dS0\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS1\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS2\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS3\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS4\n", word - 11);
+ }
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
(*failures)++;
}
return len;
@@ -922,11 +969,27 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
i = 2;
for (map = 0; map <= 15; map++) {
if (data[1] & (1 << map)) {
+ int width, height, pitch, dword;
+ const char *tiling;
+
if (i + 3 >= count)
BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
instr_out(data, hw_offset, i++, "map %d MS2\n", map);
- instr_out(data, hw_offset, i++, "map %d MS3\n", map);
- instr_out(data, hw_offset, i++, "map %d MS4\n", map);
+
+ dword = data[i];
+ width = ((dword >> 10) & ((1 << 11) - 1))+1;
+ height = ((dword >> 21) & ((1 << 11) - 1))+1;
+
+ tiling = "none";
+ if (dword & (1 << 2))
+ tiling = "fenced";
+ else if (dword & (1 << 1))
+ tiling = dword & (1 << 0) ? "Y" : "X";
+ instr_out(data, hw_offset, i++, "map %d MS3 [width=%d, height=%d, tiling=%s]\n", map, width, height, tiling);
+
+ dword = data[i];
+ pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1);
+ instr_out(data, hw_offset, i++, "map %d MS4 [pitch=%d]\n", map, pitch);
}
}
if (len != i) {
@@ -982,8 +1045,8 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
}
return len;
case 0x01:
- if (i830)
- break;
+ if (!IS_9XX(devid))
+ break;
instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n");
instr_out(data, hw_offset, 1, "mask\n");
len = (data[0] & 0x0000003f) + 2;
@@ -1034,30 +1097,61 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
format,
(data[1] & (1 << 31)) ? "en" : "dis");
return len;
+
+ case 0x8e:
+ {
+ const char *name, *tiling;
+
+ len = (data[0] & 0x0000000f) + 2;
+ if (len != 3)
+ fprintf(out, "Bad count in 3DSTATE_BUFFER_INFO\n");
+ if (count < 3)
+ BUFFER_FAIL(count, len, "3DSTATE_BUFFER_INFO");
+
+ switch((data[1] >> 24) & 0x7) {
+ case 0x3: name = "color"; break;
+ case 0x7: name = "depth"; break;
+ default: name = "unknown"; break;
+ }
+
+ tiling = "none";
+ if (data[1] & (1 << 23))
+ tiling = "fenced";
+ else if (data[1] & (1 << 22))
+ tiling = data[1] & (1 << 21) ? "Y" : "X";
+
+ instr_out(data, hw_offset, 0, "3DSTATE_BUFFER_INFO\n");
+ instr_out(data, hw_offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff);
+
+ instr_out(data, hw_offset, 2, "address\n");
+ return len;
+ }
}
- for (opcode = 0; opcode < Elements(opcodes_3d_1d); opcode++) {
- if (opcodes_3d_1d[opcode].i830_only && !i830)
+ for (idx = 0; idx < Elements(opcodes_3d_1d); idx++)
+ {
+ opcode_3d_1d = &opcodes_3d_1d[idx];
+ if (opcode_3d_1d->i830_only && IS_9XX(devid))
continue;
- if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) {
+ if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) {
len = 1;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name);
- if (opcodes_3d_1d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d_1d->name);
+ if (opcode_3d_1d->max_len > 1) {
len = (data[0] & 0x0000ffff) + 2;
- if (len < opcodes_3d_1d[opcode].min_len ||
- len > opcodes_3d_1d[opcode].max_len)
+ if (len < opcode_3d_1d->min_len ||
+ len > opcode_3d_1d->max_len)
{
fprintf(out, "Bad count in %s\n",
- opcodes_3d_1d[opcode].name);
+ opcode_3d_1d->name);
(*failures)++;
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d_1d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
@@ -1065,7 +1159,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures,
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1075,8 +1169,10 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
int *failures)
{
char immediate = (data[0] & (1 << 23)) == 0;
- unsigned int len, i;
+ unsigned int len, i, ret;
char *primtype;
+ int original_s2 = saved_s2;
+ int original_s4 = saved_s4;
switch ((data[0] >> 18) & 0xf) {
case 0x0: primtype = "TRILIST"; break;
@@ -1089,7 +1185,7 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
case 0x7: primtype = "RECTLIST"; break;
case 0x8: primtype = "POINTLIST"; break;
case 0x9: primtype = "DIB"; break;
- case 0xa: primtype = "CLEAR_RECT"; break;
+ case 0xa: primtype = "CLEAR_RECT"; saved_s4 = 3 << 6; saved_s2 = ~0; break;
default: primtype = "unknown"; break;
}
@@ -1193,6 +1289,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
vertex++;
}
}
+
+ ret = len;
} else {
/* indirect vertices */
len = data[0] & 0x0000ffff; /* index count */
@@ -1210,13 +1308,15 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
if ((data[i] & 0xffff) == 0xffff) {
instr_out(data, hw_offset, i,
" indices: (terminator)\n");
- return i;
+ ret = i;
+ goto out;
} else if ((data[i] >> 16) == 0xffff) {
instr_out(data, hw_offset, i,
" indices: 0x%04x, "
"(terminator)\n",
data[i] & 0xffff);
- return i;
+ ret = i;
+ goto out;
} else {
instr_out(data, hw_offset, i,
" indices: 0x%04x, 0x%04x\n",
@@ -1226,7 +1326,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
fprintf(out,
"3DPRIMITIVE: no terminator found in index buffer\n");
(*failures)++;
- return count;
+ ret = count;
+ goto out;
} else {
/* fixed size vertex index buffer */
for (i = 0; i < len; i += 2) {
@@ -1241,7 +1342,8 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
}
}
}
- return (len + 1) / 2 + 1;
+ ret = (len + 1) / 2 + 1;
+ goto out;
} else {
/* sequential vertex access */
if (count < 2)
@@ -1250,17 +1352,22 @@ decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
"3DPRIMITIVE sequential indirect %s, %d starting from "
"%d\n", primtype, len, data[1] & 0xffff);
instr_out(data, hw_offset, 1, " start\n");
- return 2;
+ ret = 2;
+ goto out;
}
}
- return len;
+out:
+ saved_s2 = original_s2;
+ saved_s4 = original_s4;
+ return ret;
}
static int
-decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode;
+ uint32_t opcode;
+ unsigned int idx;
struct {
uint32_t opcode;
@@ -1277,41 +1384,44 @@ decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x0d, 1, 1, "3DSTATE_MODES_4" },
{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
- };
+ }, *opcode_3d;
+
+ opcode = (data[0] & 0x1f000000) >> 24;
- switch ((data[0] & 0x1f000000) >> 24) {
+ switch (opcode) {
case 0x1f:
return decode_3d_primitive(data, count, hw_offset, failures);
case 0x1d:
- return decode_3d_1d(data, count, hw_offset, failures, 0);
+ return decode_3d_1d(data, count, hw_offset, devid, failures);
case 0x1c:
return decode_3d_1c(data, count, hw_offset, failures);
}
- for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
- if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < Elements(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if (opcode == opcode_3d->opcode) {
unsigned int len = 1, i;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1401,12 +1511,87 @@ get_965_prim_type(uint32_t data)
default: return "fail";
}
}
+static int
+i965_decode_urb_fence(const uint32_t *data, uint32_t hw_offset, int len, int count,
+ int *failures)
+{
+ uint32_t vs_fence, clip_fence, gs_fence, sf_fence, vfe_fence, cs_fence;
+
+ if (len != 3)
+ fprintf(out, "Bad count in URB_FENCE\n");
+ if (count < 3)
+ BUFFER_FAIL(count, len, "URB_FENCE");
+
+ vs_fence = data[1] & 0x3ff;
+ gs_fence = (data[1] >> 10) & 0x3ff;
+ clip_fence = (data[1] >> 20) & 0x3ff;
+ sf_fence = data[2] & 0x3ff;
+ vfe_fence = (data[2] >> 10) & 0x3ff;
+ cs_fence = (data[2] >> 20) & 0x7ff;
+
+ instr_out(data, hw_offset, 0, "URB_FENCE: %s%s%s%s%s%s\n",
+ (data[0] >> 13) & 1 ? "cs " : "",
+ (data[0] >> 12) & 1 ? "vfe " : "",
+ (data[0] >> 11) & 1 ? "sf " : "",
+ (data[0] >> 10) & 1 ? "clip " : "",
+ (data[0] >> 9) & 1 ? "gs " : "",
+ (data[0] >> 8) & 1 ? "vs " : "");
+ instr_out(data, hw_offset, 1,
+ "vs fence: %d, clip_fence: %d, gs_fence: %d\n",
+ vs_fence, clip_fence, gs_fence);
+ instr_out(data, hw_offset, 2,
+ "sf fence: %d, vfe_fence: %d, cs_fence: %d\n",
+ sf_fence, vfe_fence, cs_fence);
+ if (gs_fence < vs_fence)
+ fprintf(out, "gs fence < vs fence!\n");
+ if (clip_fence < gs_fence)
+ fprintf(out, "clip fence < gs fence!\n");
+ if (sf_fence < clip_fence)
+ fprintf(out, "sf fence < clip fence!\n");
+ if (cs_fence < sf_fence)
+ fprintf(out, "cs fence < sf fence!\n");
+
+ return len;
+}
+
+static void
+state_base_out(const uint32_t *data, uint32_t hw_offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1) {
+ instr_out(data, hw_offset, index, "%s state base address 0x%08x\n",
+ name, data[index] & ~1);
+ } else {
+ instr_out(data, hw_offset, index, "%s state base not updated\n",
+ name);
+ }
+}
+
+static void
+state_max_out(const uint32_t *data, uint32_t hw_offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1) {
+ if (data[index] == 1) {
+ instr_out(data, hw_offset, index,
+ "%s state upper bound disabled\n", name);
+ } else {
+ instr_out(data, hw_offset, index, "%s state upper bound 0x%08x\n",
+ name, data[index] & ~1);
+ }
+ } else {
+ instr_out(data, hw_offset, index, "%s state upper bound not updated\n",
+ name);
+ }
+}
static int
-decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode, len;
- int i;
+ uint32_t opcode;
+ unsigned int idx, len;
+ int i, sba_len;
+ char *desc1 = NULL;
struct {
uint32_t opcode;
@@ -1435,51 +1620,78 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+ { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+ { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
+ { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
{ 0x7b00, 6, 6, "3DPRIMITIVE" },
- };
+ { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" },
+ { 0x7805, 3, 3, "3DSTATE_URB" },
+ { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" },
+ { 0x7810, 6, 6, "3DSTATE_VS_STATE" },
+ { 0x7811, 7, 7, "3DSTATE_GS_STATE" },
+ { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
+ { 0x7813, 20, 20, "3DSTATE_SF_STATE" },
+ { 0x7814, 9, 9, "3DSTATE_WM_STATE" },
+ { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
+ { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
+ { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
+ { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
+ { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
+ }, *opcode_3d;
len = (data[0] & 0x0000ffff) + 2;
- switch ((data[0] & 0xffff0000) >> 16) {
+ opcode = (data[0] & 0xffff0000) >> 16;
+ switch (opcode) {
+ case 0x6000:
+ len = (data[0] & 0x000000ff) + 2;
+ return i965_decode_urb_fence(data, hw_offset, len, count, failures);
+ case 0x6001:
+ instr_out(data, hw_offset, 0, "CS_URB_STATE\n");
+ instr_out(data, hw_offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n",
+ (data[1] >> 4) & 0x1f,
+ (((data[1] >> 4) & 0x1f) + 1) * 64,
+ data[1] & 0x7);
+ return len;
+ case 0x6002:
+ len = (data[0] & 0x000000ff) + 2;
+ instr_out(data, hw_offset, 0, "CONSTANT_BUFFER: %s\n",
+ (data[0] >> 8) & 1 ? "valid" : "invalid");
+ instr_out(data, hw_offset, 1, "offset: 0x%08x, length: %d bytes\n",
+ data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64);
+ return len;
case 0x6101:
- if (len != 6)
+ if (IS_GEN6(devid))
+ sba_len = 10;
+ else if (IS_IRONLAKE(devid))
+ sba_len = 8;
+ else
+ sba_len = 6;
+ if (len != sba_len)
fprintf(out, "Bad count in STATE_BASE_ADDRESS\n");
- if (count < 6)
+ if (len != sba_len)
BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS");
+ i = 0;
instr_out(data, hw_offset, 0,
"STATE_BASE_ADDRESS\n");
-
- if (data[1] & 1) {
- instr_out(data, hw_offset, 1, "General state at 0x%08x\n",
- data[1] & ~1);
- } else
- instr_out(data, hw_offset, 1, "General state not updated\n");
-
- if (data[2] & 1) {
- instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n",
- data[2] & ~1);
- } else
- instr_out(data, hw_offset, 2, "Surface state not updated\n");
-
- if (data[3] & 1) {
- instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n",
- data[3] & ~1);
- } else
- instr_out(data, hw_offset, 3, "Indirect state not updated\n");
-
- if (data[4] & 1) {
- instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n",
- data[4] & ~1);
- } else
- instr_out(data, hw_offset, 4, "General state not updated\n");
-
- if (data[5] & 1) {
- instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n",
- data[5] & ~1);
- } else
- instr_out(data, hw_offset, 5, "Indirect state not updated\n");
+ i++;
+
+ state_base_out(data, hw_offset, i++, "general");
+ state_base_out(data, hw_offset, i++, "surface");
+ if (IS_GEN6(devid))
+ state_base_out(data, hw_offset, i++, "dynamic");
+ state_base_out(data, hw_offset, i++, "indirect");
+ if (IS_IRONLAKE(devid) || IS_GEN6(devid))
+ state_base_out(data, hw_offset, i++, "instruction");
+
+ state_max_out(data, hw_offset, i++, "general");
+ if (IS_GEN6(devid))
+ state_max_out(data, hw_offset, i++, "dynamic");
+ state_max_out(data, hw_offset, i++, "indirect");
+ if (IS_IRONLAKE(devid) || IS_GEN6(devid))
+ state_max_out(data, hw_offset, i++, "instruction");
return len;
case 0x7800:
@@ -1498,18 +1710,33 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
instr_out(data, hw_offset, 6, "CC state\n");
return len;
case 0x7801:
- if (len != 6)
+ len = (data[0] & 0x000000ff) + 2;
+ if (len != 6 && len != 4)
fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n");
- if (count < 6)
- BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+ if (len == 6) {
+ if (count < 6)
+ BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS\n");
+ instr_out(data, hw_offset, 1, "VS binding table\n");
+ instr_out(data, hw_offset, 2, "GS binding table\n");
+ instr_out(data, hw_offset, 3, "Clip binding table\n");
+ instr_out(data, hw_offset, 4, "SF binding table\n");
+ instr_out(data, hw_offset, 5, "WM binding table\n");
+ } else {
+ if (count < 4)
+ BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
- instr_out(data, hw_offset, 0,
- "3DSTATE_BINDING_TABLE_POINTERS\n");
- instr_out(data, hw_offset, 1, "VS binding table\n");
- instr_out(data, hw_offset, 2, "GS binding table\n");
- instr_out(data, hw_offset, 3, "Clip binding table\n");
- instr_out(data, hw_offset, 4, "SF binding table\n");
- instr_out(data, hw_offset, 5, "WM binding table\n");
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, "
+ "GS mod %d, PS mod %d\n",
+ (data[0] & (1 << 8)) != 0,
+ (data[0] & (1 << 9)) != 0,
+ (data[0] & (1 << 10)) != 0);
+ instr_out(data, hw_offset, 1, "VS binding table\n");
+ instr_out(data, hw_offset, 2, "GS binding table\n");
+ instr_out(data, hw_offset, 3, "WM binding table\n");
+ }
return len;
@@ -1560,6 +1787,18 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
}
return len;
+ case 0x780d:
+ len = (data[0] & 0xff) + 2;
+ if (len != 4)
+ fprintf(out, "Bad count in 3DSTATE_VIEWPORT_STATE_POINTERS\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_VIEWPORT_STATE_POINTERS");
+ instr_out(data, hw_offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n");
+ instr_out(data, hw_offset, 1, "clip\n");
+ instr_out(data, hw_offset, 2, "sf\n");
+ instr_out(data, hw_offset, 3, "cc\n");
+ return len;
+
case 0x780a:
len = (data[0] & 0xff) + 2;
if (len != 3)
@@ -1592,7 +1831,7 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
return len;
case 0x7905:
- if (len != 5 && len != 6)
+ if (len < 5 || len > 7)
fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
if (count < len)
BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER");
@@ -1609,9 +1848,36 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
((data[3] & 0x0007ffc0) >> 6) + 1,
((data[3] & 0xfff80000) >> 19) + 1);
instr_out(data, hw_offset, 4, "volume depth\n");
- if (len == 6)
+ if (len >= 6)
instr_out(data, hw_offset, 5, "\n");
+ if (len >= 7)
+ instr_out(data, hw_offset, 6, "render target view extent\n");
+
+ return len;
+ case 0x7a00:
+ len = (data[0] & 0xff) + 2;
+ if (len != 4)
+ fprintf(out, "Bad count in PIPE_CONTROL\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "PIPE_CONTROL");
+
+ switch ((data[0] >> 14) & 0x3) {
+ case 0: desc1 = "no write"; break;
+ case 1: desc1 = "qword write"; break;
+ case 2: desc1 = "PS_DEPTH_COUNT write"; break;
+ case 3: desc1 = "TIMESTAMP write"; break;
+ }
+ instr_out(data, hw_offset, 0,
+ "PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, "
+ "%sinst flush\n",
+ desc1,
+ data[0] & (1 << 13) ? "" : "no ",
+ data[0] & (1 << 12) ? "" : "no ",
+ data[0] & (1 << 11) ? "" : "no ");
+ instr_out(data, hw_offset, 1, "destination address\n");
+ instr_out(data, hw_offset, 2, "immediate dword low\n");
+ instr_out(data, hw_offset, 3, "immediate dword high\n");
return len;
case 0x7b00:
@@ -1633,39 +1899,41 @@ decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures
return len;
}
- for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
- if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < Elements(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if ((data[0] & 0xffff0000) >> 16 == opcode_3d->opcode) {
unsigned int i;
len = 1;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_965 opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
static int
-decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode;
+ unsigned int idx;
+ uint32_t opcode;
struct {
uint32_t opcode;
@@ -1689,41 +1957,44 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure
{ 0x0f, 1, 1, "3DSTATE_MODES_2" },
{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
{ 0x16, 1, 1, "3DSTATE_MODES_4" },
- };
+ }, *opcode_3d;
+
+ opcode = (data[0] & 0x1f000000) >> 24;
- switch ((data[0] & 0x1f000000) >> 24) {
+ switch (opcode) {
case 0x1f:
return decode_3d_primitive(data, count, hw_offset, failures);
case 0x1d:
- return decode_3d_1d(data, count, hw_offset, failures, 1);
+ return decode_3d_1d(data, count, hw_offset, devid, failures);
case 0x1c:
return decode_3d_1c(data, count, hw_offset, failures);
}
- for (opcode = 0; opcode < Elements(opcodes_3d); opcode++) {
- if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < Elements(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if ((data[0] & 0x1f000000) >> 24 == opcode_3d->opcode) {
unsigned int len = 1, i;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_i830 opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1736,8 +2007,12 @@ decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failure
* \param hw_offset hardware address for the buffer
*/
int
-intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
+intel_decode(const uint32_t *data, int count,
+ uint32_t hw_offset,
+ uint32_t devid,
+ uint32_t ignore_end_of_batchbuffer)
{
+ int ret;
int index = 0;
int failures = 0;
@@ -1746,8 +2021,23 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid
while (index < count) {
switch ((data[index] & 0xe0000000) >> 29) {
case 0x0:
- index += decode_mi(data + index, count - index,
+ ret = decode_mi(data + index, count - index,
hw_offset + index * 4, &failures);
+
+ /* If MI_BATCHBUFFER_END happened, then dump the rest of the
+ * output in case we some day want it in debugging, but don't
+ * decode it since it'll just confuse in the common case.
+ */
+ if (ret == -1) {
+ if (ignore_end_of_batchbuffer) {
+ index++;
+ } else {
+ for (index = index + 1; index < count; index++) {
+ instr_out(data, hw_offset, index, "\n");
+ }
+ }
+ } else
+ index += ret;
break;
case 0x2:
index += decode_2d(data + index, count - index,
@@ -1756,13 +2046,16 @@ intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid
case 0x3:
if (IS_965(devid)) {
index += decode_3d_965(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
} else if (IS_9XX(devid)) {
index += decode_3d(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
} else {
index += decode_3d_i830(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
}
break;
default:
diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h
index 6201a23d6a4..7e7c108c0c6 100644
--- a/src/gallium/drivers/i965/intel_decode.h
+++ b/src/gallium/drivers/i965/intel_decode.h
@@ -27,5 +27,5 @@
#include "pipe/p_compiler.h"
-int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
+int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, uint32_t ignore_end_of_batchbuffer);
void intel_decode_context_reset(void);
diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c
index c9c463f470f..8c9efc2f536 100644
--- a/src/gallium/drivers/noop/noop_pipe.c
+++ b/src/gallium/drivers/noop/noop_pipe.c
@@ -30,10 +30,16 @@
#include <util/u_inlines.h>
#include <util/u_format.h>
#include "noop_public.h"
-#include "state_tracker/sw_winsys.h"
+
+DEBUG_GET_ONCE_BOOL_OPTION(noop, "GALLIUM_NOOP", FALSE)
void noop_init_state_functions(struct pipe_context *ctx);
+struct noop_pipe_screen {
+ struct pipe_screen pscreen;
+ struct pipe_screen *oscreen;
+};
+
/*
* query
*/
@@ -108,52 +114,29 @@ static struct pipe_resource *noop_resource_create(struct pipe_screen *screen,
FREE(nresource);
return NULL;
}
-#if 0
- if (nresource->base.bind & (PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) {
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- unsigned stride;
-
- nresource->dt = winsys->displaytarget_create(winsys, nresource->base.bind,
- nresource->base.format,
- nresource->base.width0,
- nresource->base.height0,
- 16, &stride);
- }
-#endif
return &nresource->base;
}
-static struct pipe_resource *noop_resource_from_handle(struct pipe_screen * screen,
+static struct pipe_resource *noop_resource_from_handle(struct pipe_screen *screen,
const struct pipe_resource *templ,
- struct winsys_handle *whandle)
+ struct winsys_handle *handle)
{
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- struct noop_resource *nresource;
- struct sw_displaytarget *dt;
- unsigned stride;
+ struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen;
+ struct pipe_screen *oscreen = noop_screen->oscreen;
+ struct pipe_resource *result;
+ struct noop_resource *noop_resource;
- dt = winsys->displaytarget_from_handle(winsys, templ, whandle, &stride);
- if (dt == NULL) {
- return NULL;
- }
- nresource = (struct noop_resource *)noop_resource_create(screen, templ);
- nresource->dt = dt;
- return &nresource->base;
+ result = oscreen->resource_from_handle(oscreen, templ, handle);
+ noop_resource = noop_resource_create(screen, result);
+ pipe_resource_reference(&result, NULL);
+ return noop_resource;
}
static boolean noop_resource_get_handle(struct pipe_screen *screen,
struct pipe_resource *resource,
struct winsys_handle *handle)
{
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- struct noop_resource *nresource = (struct noop_resource *)resource;
-
- if (nresource->dt == NULL)
- return FALSE;
-
- return winsys->displaytarget_get_handle(winsys, nresource->dt, handle);
+ return FALSE;
}
static void noop_resource_destroy(struct pipe_screen *screen,
@@ -161,11 +144,6 @@ static void noop_resource_destroy(struct pipe_screen *screen,
{
struct noop_resource *nresource = (struct noop_resource *)resource;
- if (nresource->dt) {
- /* display target */
- struct sw_winsys *winsys = (struct sw_winsys *)screen->winsys;
- winsys->displaytarget_destroy(winsys, nresource->dt);
- }
free(nresource->data);
FREE(resource);
}
@@ -483,19 +461,30 @@ static boolean noop_is_format_supported(struct pipe_screen* screen,
static void noop_destroy_screen(struct pipe_screen *screen)
{
+ struct noop_pipe_screen *noop_screen = (struct noop_pipe_screen*)screen;
+ struct pipe_screen *oscreen = noop_screen->oscreen;
+
+ oscreen->destroy(oscreen);
FREE(screen);
}
-struct pipe_screen *noop_screen_create(struct sw_winsys *winsys)
+struct pipe_screen *noop_screen_create(struct pipe_screen *oscreen)
{
+ struct noop_pipe_screen *noop_screen;
struct pipe_screen *screen;
- screen = CALLOC_STRUCT(pipe_screen);
- if (screen == NULL) {
+ if (!debug_get_option_noop()) {
+ return oscreen;
+ }
+
+ noop_screen = CALLOC_STRUCT(noop_pipe_screen);
+ if (noop_screen == NULL) {
return NULL;
}
+ noop_screen->oscreen = oscreen;
+ screen = &noop_screen->pscreen;
- screen->winsys = (struct pipe_winsys*)winsys;
+ screen->winsys = oscreen->winsys;
screen->destroy = noop_destroy_screen;
screen->get_name = noop_get_name;
screen->get_vendor = noop_get_vendor;
diff --git a/src/gallium/drivers/noop/noop_public.h b/src/gallium/drivers/noop/noop_public.h
index 8ce82bec698..180ea597fab 100644
--- a/src/gallium/drivers/noop/noop_public.h
+++ b/src/gallium/drivers/noop/noop_public.h
@@ -23,8 +23,7 @@
#ifndef NOOP_PUBLIC_H
#define NOOP_PUBLIC_H
-struct sw_winsys;
-
-struct pipe_screen *noop_screen_create(struct sw_winsys *winsys);
+struct pipe_screen;
+struct pipe_screen *noop_screen_create(struct pipe_screen *screen);
#endif
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index d97566ed7cf..b4eda0f617d 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -721,17 +721,16 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct nv50_context *nv50 = nv50_context(pipe);
if (shader == PIPE_SHADER_VERTEX) {
- nv50->constbuf[PIPE_SHADER_VERTEX] = buf;
nv50->dirty |= NV50_NEW_VERTPROG_CB;
} else
if (shader == PIPE_SHADER_FRAGMENT) {
- nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf;
nv50->dirty |= NV50_NEW_FRAGPROG_CB;
- } else
- if (shader == PIPE_SHADER_GEOMETRY) {
- nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf;
+ } else {
+ assert(shader == PIPE_SHADER_GEOMETRY);
nv50->dirty |= NV50_NEW_GEOMPROG_CB;
}
+
+ pipe_resource_reference(&nv50->constbuf[shader], buf);
}
static void
diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile
index 5c3d46d9eab..da8f9a2ab4d 100644
--- a/src/gallium/drivers/nvc0/Makefile
+++ b/src/gallium/drivers/nvc0/Makefile
@@ -28,6 +28,7 @@ C_SOURCES = \
nvc0_push.c \
nvc0_push2.c \
nvc0_fence.c \
- nvc0_mm.c
+ nvc0_mm.c \
+ nvc0_query.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript
index 46c0b5889a0..c49e0ddbc52 100644
--- a/src/gallium/drivers/nvc0/SConscript
+++ b/src/gallium/drivers/nvc0/SConscript
@@ -29,7 +29,8 @@ nvc0 = env.ConvenienceLibrary(
'nvc0_push.c',
'nvc0_push2.c',
'nvc0_fence.c',
- 'nvc0_mm.c'
+ 'nvc0_mm.c',
+ 'nvc0_query.c'
])
Export('nvc0')
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
index 1346d999409..702e58b2e3c 100644
--- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
@@ -8,15 +8,15 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- nvc0_3d.xml ( 28058 bytes, from 2010-11-26 18:05:20)
+- nvc0_3d.xml ( 30401 bytes, from 2011-01-08 18:09:11)
- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20)
- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
-- nv_3ddefs.xml ( 16394 bytes, from 2010-10-09 08:27:14)
-- nv_object.xml ( 11547 bytes, from 2010-11-26 16:41:56)
+- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40)
+- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20)
- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28)
-- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17)
-Copyright (C) 2006-2010 by the following authors:
+Copyright (C) 2006-2011 by the following authors:
- Artur Huillet <[email protected]> (ahuillet)
- Ben Skeggs (darktama, darktama_)
- B. R. <[email protected]> (koala_br)
@@ -248,6 +248,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16
+#define NVC0_3D_COUNTER_ENABLE 0x00000d68
+#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001
+#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002
+#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004
+#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008
+#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010
+#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020
+#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040
+#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080
+#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100
+#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200
+#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400
+#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800
+#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000
+#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000
+#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000
+#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000
+
#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74
#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78
@@ -498,14 +516,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_BLEND_COLOR__LEN 0x00000004
#define NVC0_3D_TSC_FLUSH 0x00001330
-#define NVC0_3D_TSC_FLUSH_UNK0 0x00000001
-#define NVC0_3D_TSC_FLUSH_UNK1__MASK 0x03fffff0
-#define NVC0_3D_TSC_FLUSH_UNK1__SHIFT 4
+#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4
#define NVC0_3D_TIC_FLUSH 0x00001334
-#define NVC0_3D_TIC_FLUSH_UNK0 0x00000001
-#define NVC0_3D_TIC_FLUSH_UNK1__MASK 0x03fffff0
-#define NVC0_3D_TIC_FLUSH_UNK1__SHIFT 4
+#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4
#define NVC0_3D_TEX_CACHE_CTL 0x00001338
#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
@@ -630,7 +648,25 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520
-#define NVC0_3D_SAMPLECNT_RESET 0x00001530
+#define NVC0_3D_COUNTER_RESET 0x00001530
+#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001
+#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002
+#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003
+#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004
+#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010
+#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011
+#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012
+#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013
+#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015
+#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016
+#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017
+#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018
+#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a
+#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b
+#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c
+#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d
+#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e
+#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f
#define NVC0_3D_MULTISAMPLE_ZETA_ENABLE 0x00001534
@@ -960,11 +996,30 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_QUERY_SEQUENCE 0x00001b08
#define NVC0_3D_QUERY_GET 0x00001b0c
-#define NVC0_3D_QUERY_GET_FENCE 0x1000f010
-#define NVC0_3D_QUERY_GET_SAMPLE_COUNT 0x0100f002
-#define NVC0_3D_QUERY_GET_TFB 0x05805002
-#define NVC0_3D_QUERY_GET_GENERATED_PRIMS 0x06805002
-#define NVC0_3D_QUERY_GET_UNK00005002 0x00005002
+#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003
+#define NVC0_3D_QUERY_GET_MODE__SHIFT 0
+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000
+#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001
+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002
+#define NVC0_3D_QUERY_GET_FENCE 0x00000010
+#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0
+#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5
+#define NVC0_3D_QUERY_GET_UNK8 0x00000100
+#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000
+#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12
+#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000
+#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16
+#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000
+#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000
+#define NVC0_3D_QUERY_GET_INTR 0x00100000
+#define NVC0_3D_QUERY_GET_UNK21 0x00200000
+#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000
+#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23
+#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000
+#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000
+#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000
+#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000
+#define NVC0_3D_QUERY_GET_SHORT 0x10000000
#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0))
#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010
@@ -1026,6 +1081,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010
#define NVC0_3D_TEX_LIMITS__LEN 0x00000005
+#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0))
+#define NVC0_3D_FIRMWARE__ESIZE 0x00000004
+#define NVC0_3D_FIRMWARE__LEN 0x00000020
+
#define NVC0_3D_CB_SIZE 0x00002380
#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384
diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c
index 06841bb19b8..f5ac6557fe9 100644
--- a/src/gallium/drivers/nvc0/nvc0_buffer.c
+++ b/src/gallium/drivers/nvc0/nvc0_buffer.c
@@ -52,8 +52,7 @@ static INLINE void
release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence)
{
if (fence && fence->state != NVC0_FENCE_STATE_SIGNALLED) {
- (*mm)->next = fence->buffers;
- fence->buffers = (*mm);
+ nvc0_fence_sched_release(fence, *mm);
} else {
nvc0_mm_free(*mm);
}
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index b2b4fd62eeb..2118abb5d5d 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -92,6 +92,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
screen->base.channel->user_private = nvc0;
+ nvc0_init_query_functions(nvc0);
nvc0_init_surface_functions(nvc0);
nvc0_init_state_functions(nvc0);
nvc0_init_resource_functions(&nvc0->pipe);
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index c181f15dbd8..0f340beb35a 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -166,6 +166,9 @@ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
boolean nvc0_program_translate(struct nvc0_program *);
void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
+/* nvc0_query.c */
+void nvc0_init_query_functions(struct nvc0_context *);
+
/* nvc0_shader_state.c */
void nvc0_vertprog_validate(struct nvc0_context *);
void nvc0_tctlprog_validate(struct nvc0_context *);
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c
index 7c214ca9a75..9d2c48cf14d 100644
--- a/src/gallium/drivers/nvc0/nvc0_fence.c
+++ b/src/gallium/drivers/nvc0/nvc0_fence.c
@@ -59,7 +59,8 @@ nvc0_fence_emit(struct nvc0_fence *fence)
OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
OUT_RING (chan, fence->sequence);
- OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE);
+ OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT |
+ (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT));
++fence->ref;
diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c
index ccbb776447f..941be678586 100644
--- a/src/gallium/drivers/nvc0/nvc0_push.c
+++ b/src/gallium/drivers/nvc0/nvc0_push.c
@@ -75,13 +75,15 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
- ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
+ ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->chan->cur);
ctx->chan->cur += size;
- count -= push;
- elts += push;
+ count -= nr;
+ elts += nr;
if (nr != push) {
+ count--;
+ elts++;
BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
OUT_RING (ctx->chan, 0);
OUT_RING (ctx->chan, ctx->prim);
@@ -106,13 +108,15 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
- ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
+ ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->chan->cur);
ctx->chan->cur += size;
- count -= push;
- elts += push;
+ count -= nr;
+ elts += nr;
if (nr != push) {
+ count--;
+ elts++;
BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
OUT_RING (ctx->chan, 0);
OUT_RING (ctx->chan, ctx->prim);
@@ -137,13 +141,15 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
- ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
+ ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->chan->cur);
ctx->chan->cur += size;
- count -= push;
- elts += push;
+ count -= nr;
+ elts += nr;
if (nr != push) {
+ count--;
+ elts++;
BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
OUT_RING (ctx->chan, 0);
OUT_RING (ctx->chan, ctx->prim);
diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c
new file mode 100644
index 00000000000..cc83fbe771c
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_query.c
@@ -0,0 +1,337 @@
+/*
+ * Copyright 2011 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
+ */
+
+#include "nvc0_context.h"
+#include "nouveau/nv_object.xml.h"
+
+/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
+ * (since we use only a single GPU channel per screen) will not work properly.
+ *
+ * The first is not that big of an issue because OpenGL does not allow nested
+ * queries anyway.
+ */
+
+struct nvc0_query {
+ uint32_t *data;
+ uint32_t type;
+ uint32_t sequence;
+ struct nouveau_bo *bo;
+ uint32_t base;
+ uint32_t offset; /* base + i * 16 */
+ boolean ready;
+ boolean is64bit;
+ struct nvc0_mm_allocation *mm;
+};
+
+#define NVC0_QUERY_ALLOC_SPACE 128
+
+static INLINE struct nvc0_query *
+nvc0_query(struct pipe_query *pipe)
+{
+ return (struct nvc0_query *)pipe;
+}
+
+static boolean
+nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ int ret;
+
+ if (q->bo) {
+ nouveau_bo_ref(NULL, &q->bo);
+ if (q->mm) {
+ if (q->ready)
+ nvc0_mm_free(q->mm);
+ else
+ nvc0_fence_sched_release(screen->fence.current, q->mm);
+ }
+ }
+ if (size) {
+ q->mm = nvc0_mm_allocate(screen->mm_GART, size, &q->bo, &q->base);
+ if (!q->bo)
+ return FALSE;
+ q->offset = q->base;
+
+ ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD |
+ NOUVEAU_BO_NOSYNC);
+ if (ret) {
+ nvc0_query_allocate(nvc0, q, 0);
+ return FALSE;
+ }
+ q->data = q->bo->map;
+ nouveau_bo_unmap(q->bo);
+ }
+ return TRUE;
+}
+
+static void
+nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
+ FREE(nvc0_query(pq));
+}
+
+static struct pipe_query *
+nvc0_query_create(struct pipe_context *pipe, unsigned type)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_query *q;
+
+ q = CALLOC_STRUCT(nvc0_query);
+ if (!q)
+ return NULL;
+
+ if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) {
+ FREE(q);
+ return NULL;
+ }
+
+ q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
+ type == PIPE_QUERY_PRIMITIVES_EMITTED ||
+ type == PIPE_QUERY_SO_STATISTICS);
+ q->type = type;
+
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset -= 16;
+ q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
+ }
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q,
+ unsigned offset, uint32_t get)
+{
+ offset += q->offset;
+
+ MARK_RING (chan, 5, 2);
+ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RING (chan, q->sequence);
+ OUT_RING (chan, get);
+}
+
+static void
+nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_query *q = nvc0_query(pq);
+
+ /* For occlusion queries we have to change the storage, because a previous
+ * query might set the initial render conition to FALSE even *after* we re-
+ * initialized it to TRUE.
+ */
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset += 16;
+ q->data += 16 / sizeof(*q->data);
+ if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
+ nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+
+ /* XXX: can we do this with the GPU, and sync with respect to a previous
+ * query ?
+ */
+ q->data[1] = 1; /* initial render condition = TRUE */
+ }
+ if (!q->is64bit)
+ q->data[0] = q->sequence++; /* the previously used one */
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT);
+ IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
+ IMMED_RING(chan, RING_3D(COUNTER_RESET),
+ NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ IMMED_RING(chan, RING_3D(COUNTER_RESET),
+ NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
+ OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
+ OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_query_get(chan, q, 0x10, 0x00005002);
+ break;
+ default:
+ break;
+ }
+ q->ready = FALSE;
+}
+
+static void
+nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_query *q = nvc0_query(pq);
+
+ const int index = 0; /* for multiple vertex streams */
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ nvc0_query_get(chan, q, 0, 0x0100f002);
+ BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+ OUT_RING (chan, 0);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5));
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
+ nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5));
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_query_get(chan, q, 0, 0x00005002);
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ nvc0_query_get(chan, q, 0, 0x1000f010);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static INLINE boolean
+nvc0_query_ready(struct nvc0_query *q)
+{
+ return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
+}
+
+static INLINE boolean
+nvc0_query_wait(struct nvc0_query *q)
+{
+ int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD);
+ if (ret)
+ return FALSE;
+ nouveau_bo_unmap(q->bo);
+ return TRUE;
+}
+
+static boolean
+nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, void *result)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+ uint64_t *res64 = result;
+ uint32_t *res32 = result;
+ boolean *res8 = result;
+ uint64_t *data64 = (uint64_t *)q->data;
+
+ if (q->type == PIPE_QUERY_GPU_FINISHED) {
+ res8[0] = nvc0_query_ready(q);
+ return TRUE;
+ }
+
+ if (!q->ready) /* update ? */
+ q->ready = nvc0_query_ready(q);
+ if (!q->ready) {
+ struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel;
+ if (!wait) {
+ if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */
+ FIRE_RING(chan);
+ return FALSE;
+ }
+ if (!nvc0_query_wait(q))
+ return FALSE;
+ }
+ q->ready = TRUE;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
+ res32[0] = q->data[1];
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
+ res64[0] = data64[0];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ res64[0] = data64[0];
+ res64[1] = data64[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
+ res64[0] = 1000000000;
+ res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ res64[0] = data64[1] - data64[3];
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void
+nvc0_render_condition(struct pipe_context *pipe,
+ struct pipe_query *pq, uint mode)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_query *q;
+
+ if (!pq) {
+ IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+ return;
+ }
+ q = nvc0_query(pq);
+
+ if (mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RING (chan, q->sequence);
+ OUT_RING (chan, 0x00001001);
+ }
+
+ BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
+ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO);
+}
+
+void
+nvc0_init_query_functions(struct nvc0_context *nvc0)
+{
+ nvc0->pipe.create_query = nvc0_query_create;
+ nvc0->pipe.destroy_query = nvc0_query_destroy;
+ nvc0->pipe.begin_query = nvc0_query_begin;
+ nvc0->pipe.end_query = nvc0_query_end;
+ nvc0->pipe.get_query_result = nvc0_query_result;
+ nvc0->pipe.render_condition = nvc0_render_condition;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index e149b907314..c191790ab14 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -92,9 +92,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 1;
case PIPE_CAP_MAX_RENDER_TARGETS:
return 8;
+ case PIPE_CAP_TIMER_QUERY:
case PIPE_CAP_OCCLUSION_QUERY:
return 1;
- case PIPE_CAP_TIMER_QUERY:
case PIPE_CAP_STREAM_OUTPUT:
return 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index 5c6482be96b..1fac142e2be 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -86,6 +86,13 @@ struct nvc0_mm_allocation {
uint32_t offset;
};
+static INLINE void
+nvc0_fence_sched_release(struct nvc0_fence *nf, struct nvc0_mm_allocation *mm)
+{
+ mm->next = nf->buffers;
+ nf->buffers = mm;
+}
+
extern struct nvc0_mman *
nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type);
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
index 881f0655506..fd7a7942cb8 100644
--- a/src/gallium/drivers/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
@@ -584,6 +584,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (info->primitive_restart) {
BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1);
OUT_RING (chan, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
}
nvc0_draw_elements(nvc0, shorten,
diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c
index 54619037d82..f3dcb205c61 100644
--- a/src/gallium/drivers/nvfx/nvfx_state.c
+++ b/src/gallium/drivers/nvfx/nvfx_state.c
@@ -304,7 +304,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- nvfx->constbuf[shader] = buf;
+ pipe_resource_reference(&nvfx->constbuf[shader], buf);
nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0;
if (shader == PIPE_SHADER_VERTEX) {
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 017db48485b..a43e83c0d36 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -179,6 +179,12 @@ static void r300_clear(struct pipe_context* pipe,
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
+ /* Decompress zbuffers that are bound as textures. If we didn't flush here,
+ * it would happen inside the blitter when updating derived state,
+ * causing a blitter operation to be called from inside the blitter,
+ * which would overwrite saved states and they would never get restored. */
+ r300_flush_depth_textures(r300);
+
/* Enable fast Z clear.
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */
if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) {
@@ -274,6 +280,12 @@ static void r300_clear_render_target(struct pipe_context *pipe,
{
struct r300_context *r300 = r300_context(pipe);
+ /* Decompress zbuffers that are bound as textures. If we didn't flush here,
+ * it would happen inside the blitter when updating derived state,
+ * causing a blitter operation to be called from inside the blitter,
+ * which would overwrite saved states and they would never get restored. */
+ r300_flush_depth_textures(r300);
+
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_render_target(r300->blitter, dst, rgba,
dstx, dsty, width, height);
@@ -291,6 +303,12 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
{
struct r300_context *r300 = r300_context(pipe);
+ /* Decompress zbuffers that are bound as textures. If we didn't flush here,
+ * it would happen inside the blitter when updating derived state,
+ * causing a blitter operation to be called from inside the blitter,
+ * which would overwrite saved states and they would never get restored. */
+ r300_flush_depth_textures(r300);
+
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil,
dstx, dsty, width, height);
@@ -298,10 +316,10 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
}
/* Flush a depth stencil buffer. */
-void r300_flush_depth_stencil(struct pipe_context *pipe,
- struct pipe_resource *dst,
- unsigned level,
- unsigned layer)
+static void r300_flush_depth_stencil(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned level,
+ unsigned layer)
{
struct r300_context *r300 = r300_context(pipe);
struct pipe_surface *dstsurf, surf_tmpl;
@@ -320,6 +338,7 @@ void r300_flush_depth_stencil(struct pipe_context *pipe,
dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl);
r300->z_decomp_rd = TRUE;
+
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_flush_depth_stencil(r300->blitter, dstsurf);
r300_blitter_end(r300);
@@ -329,6 +348,39 @@ void r300_flush_depth_stencil(struct pipe_context *pipe,
pipe_surface_reference(&dstsurf, NULL);
}
+/* We can't use compressed zbuffers as samplers. */
+void r300_flush_depth_textures(struct r300_context *r300)
+{
+ struct r300_textures_state *state =
+ (struct r300_textures_state*)r300->textures_state.state;
+ unsigned i, level;
+ unsigned count = MIN2(state->sampler_view_count,
+ state->sampler_state_count);
+
+ if (r300->z_decomp_rd)
+ return;
+
+ for (i = 0; i < count; i++)
+ if (state->sampler_views[i] && state->sampler_states[i]) {
+ struct pipe_resource *tex = state->sampler_views[i]->base.texture;
+
+ if (tex->target == PIPE_TEXTURE_3D ||
+ tex->target == PIPE_TEXTURE_CUBE)
+ continue;
+
+ /* Ignore non-depth textures.
+ * Also ignore reinterpreted depth textures, e.g. resource_copy. */
+ if (!util_format_is_depth_or_stencil(tex->format))
+ continue;
+
+ for (level = 0; level <= tex->last_level; level++)
+ if (r300_texture(tex)->zmask_in_use[level]) {
+ /* We don't handle 3D textures and cubemaps yet. */
+ r300_flush_depth_stencil(&r300->context, tex, level, 0);
+ }
+ }
+}
+
/* Copy a block of pixels from one surface to another using HW. */
static void r300_hw_copy_region(struct pipe_context* pipe,
struct pipe_resource *dst,
@@ -360,6 +412,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
enum pipe_format old_format = dst->format;
enum pipe_format new_format = old_format;
boolean is_depth;
+
if (!pipe->screen->is_format_supported(pipe->screen,
old_format, src->target,
src->nr_samples,
@@ -390,6 +443,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
if (is_depth) {
r300_flush_depth_stencil(pipe, src, src_level, src_box->z);
}
+
if (old_format != new_format) {
r300_texture_reinterpret_format(pipe->screen,
dst, new_format);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 1a14d2b79e5..e09cf87f733 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -656,6 +656,22 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300)
return (struct r300_fragment_shader*)r300->fs.state;
}
+static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
+ struct r300_atom *atom)
+{
+ atom->dirty = TRUE;
+
+ if (!r300->first_dirty) {
+ r300->first_dirty = atom;
+ r300->last_dirty = atom+1;
+ } else {
+ if (atom < r300->first_dirty)
+ r300->first_dirty = atom;
+ else if (atom+1 > r300->last_dirty)
+ r300->last_dirty = atom+1;
+ }
+}
+
struct pipe_context* r300_create_context(struct pipe_screen* screen,
void *priv);
@@ -672,10 +688,7 @@ void r300_init_state_functions(struct r300_context* r300);
void r300_init_resource_functions(struct r300_context* r300);
/* r300_blit.c */
-void r300_flush_depth_stencil(struct pipe_context *pipe,
- struct pipe_resource *dst,
- unsigned level,
- unsigned layer);
+void r300_flush_depth_textures(struct r300_context *r300);
/* r300_query.c */
void r300_resume_query(struct r300_context *r300,
@@ -710,21 +723,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
enum r300_fb_state_change change);
void r300_mark_fs_code_dirty(struct r300_context *r300);
-static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
- struct r300_atom *atom)
-{
- atom->dirty = TRUE;
-
- if (!r300->first_dirty) {
- r300->first_dirty = atom;
- r300->last_dirty = atom+1;
- } else {
- if (atom < r300->first_dirty)
- r300->first_dirty = atom;
- if (atom+1 > r300->last_dirty)
- r300->last_dirty = atom+1;
- }
-}
+/* r300_state_derived.c */
+void r300_update_derived_state(struct r300_context* r300);
/* r300_debug.c */
void r500_dump_rs_block(struct r300_rs_block *rs);
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 52031dd97b5..d6aa90bd053 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -41,6 +41,7 @@ static const struct debug_named_value debug_options[] = {
{ "fb", DBG_FB, "Log framebuffer" },
{ "cbzb", DBG_CBZB, "Log fast color clear info" },
{ "hyperz", DBG_HYPERZ, "Log HyperZ info" },
+ { "upload", DBG_UPLOAD, "Log user buffer upload info" },
{ "scissor", DBG_SCISSOR, "Log scissor info" },
{ "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" },
{ "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" },
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index e660ca68f1b..b35822c82f8 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -39,7 +39,6 @@
#include "r300_screen_buffer.h"
#include "r300_emit.h"
#include "r300_reg.h"
-#include "r300_state_derived.h"
#include <limits.h>
@@ -569,8 +568,6 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset,
&start, count);
- r300_update_derived_state(r300);
-
/* Fallback for misaligned ushort indices. */
if (indexSize == 2 && (start & 1) &&
!r300_is_user_buffer(indexBuffer)) {
@@ -643,8 +640,6 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
unsigned short_count;
- r300_update_derived_state(r300);
-
if (immd_is_good_idea(r300, count)) {
r300_emit_draw_arrays_immediate(r300, mode, start, count);
} else {
@@ -720,6 +715,8 @@ static void r300_draw_vbo(struct pipe_context* pipe,
return;
}
+ r300_update_derived_state(r300);
+
/* Set up the fallback for an incompatible vertex layout if needed. */
if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
r300_begin_vertex_translate(r300, real_min_index, real_max_index);
@@ -737,6 +734,8 @@ static void r300_draw_vbo(struct pipe_context* pipe,
min_index = MAX2(min_index, info->start);
max_index = MIN2(max_index, info->start + count - 1);
+ r300_update_derived_state(r300);
+
/* Set up the fallback for an incompatible vertex layout if needed. */
if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
r300_begin_vertex_translate(r300, min_index, max_index);
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 5847fe1ffc8..752f53b7579 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -93,6 +93,7 @@ r300_winsys_screen(struct pipe_screen *screen) {
#define DBG_CBZB (1 << 11)
#define DBG_HYPERZ (1 << 12)
#define DBG_SCISSOR (1 << 13)
+#define DBG_UPLOAD (1 << 14)
/* Features. */
#define DBG_ANISOHQ (1 << 16)
#define DBG_NO_TILING (1 << 17)
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index e3cf45479fd..cc3c1d7687e 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -87,13 +87,14 @@ void r300_upload_user_buffers(struct r300_context *r300,
int i, nr = r300->velems->count;
unsigned count = max_index + 1 - min_index;
boolean flushed;
+ boolean uploaded[16] = {0};
for (i = 0; i < nr; i++) {
unsigned index = r300->velems->velem[i].vertex_buffer_index;
struct pipe_vertex_buffer *vb = &r300->vertex_buffer[index];
struct r300_buffer *userbuf = r300_buffer(vb->buffer);
- if (userbuf && userbuf->user_buffer) {
+ if (userbuf && userbuf->user_buffer && !uploaded[index]) {
unsigned first, size;
if (vb->stride) {
@@ -104,6 +105,12 @@ void r300_upload_user_buffers(struct r300_context *r300,
size = r300->velems->hw_format_size[i];
}
+ DBG(r300, DBG_UPLOAD,
+ "Uploading %i bytes, index: %i, buffer: %p, userptr: %p "
+ "offset: %i, stride: %i.\n",
+ size, index, userbuf, userbuf->user_buffer,
+ vb->buffer_offset, vb->stride);
+
u_upload_data(r300->upload_vb, first, size,
userbuf->user_buffer + first,
&vb->buffer_offset,
@@ -118,10 +125,12 @@ void r300_upload_user_buffers(struct r300_context *r300,
r300->upload_vb_validated = FALSE;
r300->validate_buffers = TRUE;
}
+ uploaded[index] = TRUE;
} else {
assert(r300->valid_vertex_buffer[index]);
}
}
+ DBG(r300, DBG_UPLOAD, "-------\n");
}
static void r300_buffer_destroy(struct pipe_screen *screen,
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index d3985c11aa8..95be7849f8f 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -32,7 +32,6 @@
#include "r300_hyperz.h"
#include "r300_screen.h"
#include "r300_shader_semantics.h"
-#include "r300_state_derived.h"
#include "r300_state_inlines.h"
#include "r300_texture.h"
#include "r300_vs.h"
@@ -863,39 +862,6 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
}
-/* We can't use compressed zbuffers as samplers. */
-static void r300_flush_depth_textures(struct r300_context *r300)
-{
- struct r300_textures_state *state =
- (struct r300_textures_state*)r300->textures_state.state;
- unsigned i, level;
- unsigned count = MIN2(state->sampler_view_count,
- state->sampler_state_count);
-
- if (r300->z_decomp_rd)
- return;
-
- for (i = 0; i < count; i++)
- if (state->sampler_views[i] && state->sampler_states[i]) {
- struct pipe_resource *tex = state->sampler_views[i]->base.texture;
-
- if (tex->target == PIPE_TEXTURE_3D ||
- tex->target == PIPE_TEXTURE_CUBE)
- continue;
-
- /* Ignore non-depth textures.
- * Also ignore reinterpreted depth textures, e.g. resource_copy. */
- if (!util_format_is_depth_or_stencil(tex->format))
- continue;
-
- for (level = 0; level <= tex->last_level; level++)
- if (r300_texture(tex)->zmask_in_use[level]) {
- /* We don't handle 3D textures and cubemaps yet. */
- r300_flush_depth_stencil(&r300->context, tex, level, 0);
- }
- }
-}
-
void r300_update_derived_state(struct r300_context* r300)
{
r300_flush_depth_textures(r300);
diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h
deleted file mode 100644
index 71a4a47b003..00000000000
--- a/src/gallium/drivers/r300/r300_state_derived.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_STATE_DERIVED_H
-#define R300_STATE_DERIVED_H
-
-struct r300_context;
-
-void r300_update_derived_state(struct r300_context* r300);
-
-#endif /* R300_STATE_DERIVED_H */
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 335f282b06f..a852bef6156 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -248,10 +248,7 @@ struct r600_context {
u32 *pm4;
struct list_head query_list;
unsigned num_query_running;
- unsigned fence;
struct list_head fenced_bo;
- unsigned *cfence;
- struct r600_bo *fence_bo;
};
struct r600_draw {