summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorBrian Paul <[email protected]>2009-04-14 11:08:42 -0600
committerBrian Paul <[email protected]>2009-04-14 11:08:42 -0600
commitcafea7528052624c8d3e4cd1c5b26a61bf04d1d0 (patch)
tree114d80c8261fc80eb615c70095d76f8f84d53baa /src/mesa/drivers
parent43c7ffaea635f949fd4803c4f594cf53e4b98f24 (diff)
i965: checkpoint commit: VS constant buffers
Hook up a constant buffer, binding table, etc for the VS unit. This will allow using large constant buffers with vertex shaders. The new code is disabled at this time (use_const_buffer=FALSE).
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h30
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h11
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c66
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c229
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c205
11 files changed, 477 insertions, 91 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 6a9252d0375..4c2d3af8ae9 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -141,7 +141,8 @@ struct brw_context;
#define BRW_NEW_BATCH 0x10000
/** brw->depth_region updated */
#define BRW_NEW_DEPTH_BUFFER 0x20000
-#define BRW_NEW_NR_SURFACES 0x40000
+#define BRW_NEW_NR_WM_SURFACES 0x40000
+#define BRW_NEW_NR_VS_SURFACES 0x80000
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@@ -245,20 +246,30 @@ struct brw_vs_ouput_sizes {
#define BRW_MAX_TEX_UNIT 16
/**
- * Size of our surface binding table.
+ * Size of our surface binding table for the WM.
* This contains pointers to the drawing surfaces and current texture
* objects and shader constant buffers (+2).
*/
-#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2)
+#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
/**
* Helpers to convert drawing buffers, textures and constant buffers
- * to surface binding table indexes.
+ * to surface binding table indexes, for WM.
*/
#define SURF_INDEX_DRAW(d) (d)
-#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS + 0)
-#define SURF_INDEX_VERT_CONST_BUFFER (MAX_DRAW_BUFFERS + 1)
-#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 2 + t)
+#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS)
+#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t))
+
+/**
+ * Size of surface binding table for the VS.
+ * Only one constant buffer for now.
+ */
+#define BRW_VS_MAX_SURF 1
+
+/**
+ * Only a VS constant buffer
+ */
+#define SURF_INDEX_VERT_CONST_BUFFER 0
enum brw_cache_id {
@@ -566,6 +577,11 @@ struct brw_context
dri_bo *prog_bo;
dri_bo *state_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ dri_bo *bind_bo;
+ dri_bo *surf_bo[BRW_VS_MAX_SURF];
+ GLuint nr_surfaces;
} vs;
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 94bf2c0d67a..dfab14aa740 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -357,6 +357,7 @@ update_constant_buffer(struct brw_context *brw,
}
+/** Copy current vertex program's parameters into the constant buffer */
static void
update_vertex_constant_buffer(struct brw_context *brw)
{
@@ -366,6 +367,7 @@ update_vertex_constant_buffer(struct brw_context *brw)
}
+/** Copy current fragment program's parameters into the constant buffer */
static void
update_fragment_constant_buffer(struct brw_context *brw)
{
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index d05f2e6c410..e492ce162ca 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -862,9 +862,18 @@ void brw_dp_READ_4( struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
GLboolean relAddr,
- GLuint scratch_offset,
+ GLuint location,
GLuint bind_table_index );
+/* XXX this function is temporary - merge with brw_dp_READ_4() above. */
+void brw_dp_READ_4_vs( struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src,
+ GLuint msg_reg_nr,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index );
+
void brw_dp_WRITE_16( struct brw_compile *p,
struct brw_reg src,
GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index ec4d7fa76ff..bb7ea5c0492 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -952,7 +952,7 @@ void brw_dp_READ_16( struct brw_compile *p,
/**
* Read a float[4] vector from the data port Data Cache (const buffer).
- * Scratch offset should be a multiple of 16.
+ * Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
* If relAddr is true, we'll do an indirect fetch using the address register.
*/
@@ -960,7 +960,7 @@ void brw_dp_READ_4( struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
GLboolean relAddr,
- GLuint scratch_offset,
+ GLuint location,
GLuint bind_table_index )
{
{
@@ -971,7 +971,7 @@ void brw_dp_READ_4( struct brw_compile *p,
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
- brw_imm_d(scratch_offset));
+ brw_imm_d(location));
brw_pop_insn_state(p);
}
@@ -1001,6 +1001,66 @@ void brw_dp_READ_4( struct brw_compile *p,
}
+/* XXX this function is temporary - merge with brw_dp_READ_4() above. */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src,
+ GLuint msg_reg_nr,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index)
+{
+ {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ /*src.nr = 0;*/
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+#if 1
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+#elif 0
+ retype(brw_vec1_grf(src.nr, 2), BRW_REGISTER_TYPE_UD),
+#endif
+ brw_imm_d(location));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ /* cast dest to a uword[8] vector */
+ // dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+ brw_set_dest(insn, dest);
+#if 1
+ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+#elif 0
+ brw_set_src0(insn, retype(brw_vec8_grf(src.nr, 0), BRW_REGISTER_TYPE_UW));
+#endif
+
+ printf("vs const read msg, location %u, msg_reg_nr %d\n", location, msg_reg_nr);
+ brw_set_dp_read_message(insn,
+ bind_table_index,
+ 0, /* msg_control (0 means 1 Oword) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+
void brw_fb_WRITE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 5c94a49f60a..9bc5c35139c 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = {
static void prepare_binding_table_pointers(struct brw_context *brw)
{
+ brw_add_validated_bo(brw, brw->vs.bind_bo);
brw_add_validated_bo(brw, brw->wm.bind_bo);
}
@@ -117,13 +118,11 @@ static void upload_binding_table_pointers(struct brw_context *brw)
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
- OUT_BATCH(0); /* vs */
+ OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
- OUT_RELOC(brw->wm.bind_bo,
- I915_GEM_DOMAIN_SAMPLER, 0,
- 0);
+ OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 99d0e937226..d20cf78b8af 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -75,6 +75,13 @@ struct brw_vs_compile {
struct brw_reg userplane[6];
+ /** using a real constant buffer? */
+ GLboolean use_const_buffer;
+ /** we may need up to 3 constants per instruction (if use_const_buffer) */
+ struct {
+ GLint index;
+ struct brw_reg reg;
+ } current_const[3];
};
void brw_vs_emit( struct brw_vs_compile *c );
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 0d6c6ab9a8a..d21f2792afb 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -38,8 +38,31 @@
#include "brw_vs.h"
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+ struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
-/* Do things as simply as possible. Allocate and populate all regs
+ return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+static void release_tmps( struct brw_vs_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible. Allocate and populate all regs
* ahead of time.
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
@@ -47,6 +70,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
GLuint i, reg = 0, mrf;
GLuint nr_params;
+#if 0
+ if (c->vp->program.Base.Parameters->NumParameters >= 6)
+ c->use_const_buffer = 1;
+ else
+#endif
+ c->use_const_buffer = GL_FALSE;
+ /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
+
/* r0 -- reserved as usual
*/
c->r0 = brw_vec8_grf(reg, 0);
@@ -66,13 +97,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Vertex program parameters from curbe:
*/
- nr_params = c->vp->program.Base.Parameters->NumParameters;
- for (i = 0; i < nr_params; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
- }
- reg += (nr_params + 1) / 2;
-
- c->prog_data.curb_read_length = reg - 1;
+ if (c->use_const_buffer) {
+ /* get constants from a real constant buffer */
+ c->prog_data.curb_read_length = 0;
+ }
+ else {
+ /* use a section of the GRF for constants */
+ nr_params = c->vp->program.Base.Parameters->NumParameters;
+ for (i = 0; i < nr_params; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+ }
+ reg += (nr_params + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+ }
/* Allocate input regs:
*/
@@ -157,6 +194,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
c->prog_data.total_grf = reg;
+ if (c->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = get_tmp(c);
+ }
+ }
+
if (INTEL_DEBUG & DEBUG_VS) {
_mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
_mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
@@ -165,28 +209,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
-static struct brw_reg get_tmp( struct brw_vs_compile *c )
-{
- struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
-
- if (++c->last_tmp > c->prog_data.total_grf)
- c->prog_data.total_grf = c->last_tmp;
-
- return tmp;
-}
-
-static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
-{
- if (tmp.nr == c->last_tmp-1)
- c->last_tmp--;
-}
-
-static void release_tmps( struct brw_vs_compile *c )
-{
- c->last_tmp = c->first_tmp;
-}
-
-
/**
* If an instruction uses a temp reg both as a src and the dest, we
* sometimes need to allocate an intermediate temporary.
@@ -673,13 +695,59 @@ static void emit_nrm( struct brw_vs_compile *c,
}
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex)
+{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ struct brw_compile *p = &c->func;
+ struct brw_reg const_reg;
+
+ if (c->current_const[argIndex].index != src->Index) {
+ struct brw_reg src_reg = get_tmp(c);
+ struct brw_reg t = get_tmp(c);
+
+ c->current_const[argIndex].index = src->Index;
+
+ brw_MOV(p, t, brw_vec8_grf(0, 0));/*SAVE*/
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+
+ /* need to fetch the constant now */
+ brw_dp_READ_4_vs(p,
+ c->current_const[argIndex].reg, /* writeback dest */
+ src_reg, /* src reg */
+ 1, /* msg_reg */
+ src->RelAddr, /* relative indexing? */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
+
+ brw_MOV(p, brw_vec8_grf(0, 0), t);/*RESTORE*/
+ release_tmp(c, src_reg);
+ release_tmp(c, t);
+ }
+
+ /* replicate lower four floats into upper four floats (to get XYZWXYZW) */
+ const_reg = c->current_const[argIndex].reg;
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
+
+ return const_reg;
+}
+
+
+
/* TODO: relative addressing!
*/
static struct brw_reg get_reg( struct brw_vs_compile *c,
gl_register_file file,
GLuint index )
{
-
switch (file) {
case PROGRAM_TEMPORARY:
case PROGRAM_INPUT:
@@ -708,13 +776,63 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
}
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex )
+{
+ const GLuint file = inst->SrcReg[argIndex].File;
+ const GLint index = inst->SrcReg[argIndex].Index;
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ if (c->use_const_buffer) {
+ return get_constant(c, inst, argIndex);
+ }
+ else {
+ assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][index];
+ }
+ case PROGRAM_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case PROGRAM_UNDEFINED:
+ /* this is a normal case since we loop over all three src args */
+ return brw_null_reg();
+
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_WRITE_ONLY:
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+/**
+ * Indirect addressing: get reg[[arg] + offset].
+ */
static struct brw_reg deref( struct brw_vs_compile *c,
struct brw_reg arg,
GLint offset)
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = vec4(get_tmp(c));
- struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
struct brw_reg indirect = brw_vec4_indirect(0,0);
@@ -758,22 +876,29 @@ static void emit_arl( struct brw_vs_compile *c,
}
-/* Will return mangled results for SWZ op. The emit_swz() function
+/**
+ * Return the brw reg for the given instruction's src argument.
+ * Will return mangled results for SWZ op. The emit_swz() function
* ignores this result and recalculates taking extended swizzles into
* account.
*/
static struct brw_reg get_arg( struct brw_vs_compile *c,
- struct prog_src_register *src )
+ const struct prog_instruction *inst,
+ GLuint argIndex )
{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_reg reg;
if (src->File == PROGRAM_UNDEFINED)
return brw_null_reg();
- if (src->RelAddr)
+ if (src->RelAddr) {
+ /* XXX fix */
reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
- else
- reg = get_reg(c, src->File, src->Index);
+ }
+ else {
+ reg = get_src_reg(c, inst, argIndex);
+ }
/* Convert 3-bit swizzle to 2-bit.
*/
@@ -790,10 +915,28 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
}
+/**
+ * Get brw register for the given program dest register.
+ */
static struct brw_reg get_dst( struct brw_vs_compile *c,
struct prog_dst_register dst )
{
- struct brw_reg reg = get_reg(c, dst.File, dst.Index);
+ struct brw_reg reg;
+
+ switch (dst.File) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_OUTPUT:
+ assert(c->regs[dst.File][dst.Index].nr != 0);
+ reg = c->regs[dst.File][dst.Index];
+ break;
+ case PROGRAM_UNDEFINED:
+ /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+ reg = brw_null_reg();
+ break;
+ default:
+ assert(0);
+ reg = brw_null_reg();
+ }
reg.dw1.bits.writemask = dst.WriteMask;
@@ -803,8 +946,10 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
static void emit_swz( struct brw_vs_compile *c,
struct brw_reg dst,
- struct prog_src_register src )
+ const struct prog_instruction *inst)
{
+ const GLuint argIndex = 0;
+ const struct prog_src_register src = inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
GLuint zeros_mask = 0;
GLuint ones_mask = 0;
@@ -847,7 +992,7 @@ static void emit_swz( struct brw_vs_compile *c,
if (src.RelAddr)
arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
else
- arg0 = get_reg(c, src.File, src.Index);
+ arg0 = get_src_reg(c, inst, argIndex);
arg0 = brw_swizzle(arg0,
src_swz[0], src_swz[1],
@@ -1053,7 +1198,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
args[i] = c->output_regs[index].reg;
else
- args[i] = get_arg(c, src);
+ args[i] = get_arg(c, inst, i);
}
/* Get dest regs. Note that it is possible for a reg to be both
@@ -1181,7 +1326,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
/* The args[0] value can't be used here as it won't have
* correctly encoded the full swizzle:
*/
- emit_swz(c, dst, inst->SrcReg[0] );
+ emit_swz(c, dst, inst);
break;
case OPCODE_TRUNC:
/* round toward zero */
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 1a63766ea1f..3d295388437 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -44,6 +44,8 @@ struct brw_vs_unit_key {
unsigned int curbe_offset;
unsigned int nr_urb_entries, urb_size;
+
+ unsigned int nr_surfaces;
};
static void
@@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
key->nr_urb_entries = brw->urb.nr_vs_entries;
key->urb_size = brw->urb.vsize;
+ /* BRW_NEW_NR_VS_SURFACES */
+ key->nr_surfaces = brw->vs.nr_surfaces;
+
/* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled) {
/* Note that we read in the userclip planes as well, hence
@@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
* brw_urb_WRITE() results.
*/
vs.thread1.single_program_flow = 0;
+ vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
vs.thread3.dispatch_grf_start_reg = 1;
@@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = {
.dirty = {
.mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_VS_PROG
},
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 960bbb311e3..ba03afd6c13 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -79,6 +79,7 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->curbe.curbe_bo);
dri_bo_release(&brw->vs.prog_bo);
dri_bo_release(&brw->vs.state_bo);
+ dri_bo_release(&brw->vs.bind_bo);
dri_bo_release(&brw->gs.prog_bo);
dri_bo_release(&brw->gs.state_bo);
dri_bo_release(&brw->clip.prog_bo);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 58fa6aaf8f9..67b41173fb2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -290,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
- BRW_NEW_NR_SURFACES),
+ BRW_NEW_NR_WM_SURFACES),
.cache = (CACHE_NEW_WM_PROG |
CACHE_NEW_SAMPLER)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 095759f3a26..ce5dbb334b8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -176,7 +176,11 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
}
}
-struct brw_wm_surface_key {
+
+/**
+ * Use same key for WM and VS surfaces.
+ */
+struct brw_surface_key {
GLenum target, depthmode;
dri_bo *bo;
GLint format, internal_format;
@@ -187,6 +191,7 @@ struct brw_wm_surface_key {
GLuint offset;
};
+
static void
brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
{
@@ -208,7 +213,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
static dri_bo *
brw_create_texture_surface( struct brw_context *brw,
- struct brw_wm_surface_key *key )
+ struct brw_surface_key *key )
{
struct brw_surface_state surf;
dri_bo *bo;
@@ -287,7 +292,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
- struct brw_wm_surface_key key;
+ struct brw_surface_key key;
const GLuint surf = SURF_INDEX_TEXTURE(unit);
memset(&key, 0, sizeof(key));
@@ -328,12 +333,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
/**
- * Create the constant buffer surface. Fragment shader constanst will be
+ * Create the constant buffer surface. Vertex/fragment shader constants will be
* read from this buffer with Data Port Read instructions/messages.
*/
static dri_bo *
brw_create_constant_surface( struct brw_context *brw,
- struct brw_wm_surface_key *key )
+ struct brw_surface_key *key )
{
const GLint w = key->width - 1;
struct brw_surface_state surf;
@@ -345,8 +350,6 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss0.surface_type = BRW_SURFACE_BUFFER;
surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
- /* This is ok for all textures with channel width 8bit or less:
- */
assert(key->bo);
if (key->bo)
surf.ss1.base_addr = key->bo->offset; /* reloc */
@@ -356,8 +359,8 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
- surf.ss3.pitch = (key->pitch * key->cpp) - 1;
- brw_set_surface_tiling(&surf, key->tiling);
+ surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
+ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
key, sizeof(*key),
@@ -379,17 +382,17 @@ brw_create_constant_surface( struct brw_context *brw,
/**
- * Update the surface state for a constant buffer.
+ * Update the surface state for a WM constant buffer.
* The constant buffer will be (re)allocated here if needed.
*/
static dri_bo *
-brw_update_constant_surface( GLcontext *ctx,
- GLuint surf,
- dri_bo *const_buffer,
- const struct gl_program_parameter_list *params)
+brw_update_wm_constant_surface( GLcontext *ctx,
+ GLuint surf,
+ dri_bo *const_buffer,
+ const struct gl_program_parameter_list *params)
{
struct brw_context *brw = brw_context(ctx);
- struct brw_wm_surface_key key;
+ struct brw_surface_key key;
struct intel_context *intel = &brw->intel;
const int size = params->NumParameters * 4 * sizeof(GLfloat);
@@ -402,7 +405,7 @@ brw_update_constant_surface( GLcontext *ctx,
/* alloc new buffer if needed */
if (!const_buffer) {
const_buffer =
- drm_intel_bo_alloc(intel->bufmgr, "vp/fp_const_buffer", size, 64);
+ drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64);
}
memset(&key, 0, sizeof(key));
@@ -437,6 +440,66 @@ brw_update_constant_surface( GLcontext *ctx,
/**
+ * Update the surface state for a VS constant buffer.
+ * The constant buffer will be (re)allocated here if needed.
+ */
+static dri_bo *
+brw_update_vs_constant_surface( GLcontext *ctx,
+ GLuint surf,
+ dri_bo *const_buffer,
+ const struct gl_program_parameter_list *params)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_surface_key key;
+ struct intel_context *intel = &brw->intel;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+
+ assert(surf == 0);
+
+ /* free old const buffer if too small */
+ if (const_buffer && const_buffer->size < size) {
+ dri_bo_unreference(const_buffer);
+ const_buffer = NULL;
+ }
+
+ /* alloc new buffer if needed */
+ if (!const_buffer) {
+ const_buffer =
+ drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = MESA_FORMAT_RGBA_FLOAT32;
+ key.internal_format = GL_RGBA;
+ key.bo = const_buffer;
+ key.depthmode = GL_NONE;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ dri_bo_unreference(brw->vs.surf_bo[surf]);
+ brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->vs.surf_bo[surf] == NULL) {
+ brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+ }
+
+ return const_buffer;
+}
+
+
+/**
* Sets up a surface state structure to point at the given region.
* While it is only used for the front/back buffer currently, it should be
* usable for further buffers when doing ARB_draw_buffer support.
@@ -515,7 +578,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
/* Key size will never match key size for textures, so we're safe. */
brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
+ &key, sizeof(key),
&region_bo, 1,
&surf, sizeof(surf),
NULL, NULL);
@@ -544,6 +607,8 @@ brw_wm_get_binding_table(struct brw_context *brw)
{
dri_bo *bind_bo;
+ assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
+
bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
@@ -603,25 +668,13 @@ static void prepare_wm_surfaces(struct brw_context *brw )
old_nr_surfaces = brw->wm.nr_surfaces;
brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
- /* Update surface / buffer for vertex shader constant buffer */
- {
- const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
- struct brw_vertex_program *vp =
- (struct brw_vertex_program *) brw->vertex_program;
- vp->const_buffer =
- brw_update_constant_surface(ctx, surf, vp->const_buffer,
- vp->program.Base.Parameters);
-
- brw->wm.nr_surfaces = surf + 1;
- }
-
/* Update surface / buffer for fragment shader constant buffer */
{
const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
struct brw_fragment_program *fp =
(struct brw_fragment_program *) brw->fragment_program;
fp->const_buffer =
- brw_update_constant_surface(ctx, surf, fp->const_buffer,
+ brw_update_wm_constant_surface(ctx, surf, fp->const_buffer,
fp->program.Base.Parameters);
brw->wm.nr_surfaces = surf + 1;
@@ -655,17 +708,103 @@ static void prepare_wm_surfaces(struct brw_context *brw )
brw->wm.bind_bo = brw_wm_get_binding_table(brw);
if (brw->wm.nr_surfaces != old_nr_surfaces)
- brw->state.dirty.brw |= BRW_NEW_NR_SURFACES;
+ brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+}
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static dri_bo *
+brw_vs_get_binding_table(struct brw_context *brw)
+{
+ dri_bo *bind_bo;
+
+ assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF);
+
+ bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, brw->vs.nr_surfaces,
+ NULL);
+
+ if (bind_bo == NULL) {
+ GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint);
+ uint32_t *data = malloc(data_size);
+ int i;
+
+ for (i = 0; i < brw->vs.nr_surfaces; i++)
+ if (brw->vs.surf_bo[i])
+ data[i] = brw->vs.surf_bo[i]->offset;
+ else
+ data[i] = 0;
+
+ bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, brw->vs.nr_surfaces,
+ data, data_size,
+ NULL, NULL);
+
+ /* Emit binding table relocations to surface state */
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ dri_bo_emit_reloc(bind_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ i * sizeof(GLuint),
+ brw->vs.surf_bo[i]);
+ }
+ }
+
+ free(data);
+ }
+
+ return bind_bo;
+}
+
+
+/**
+ * Vertex shader surfaces. Just constant buffer for now. Could add vertex
+ * shader textures in the future.
+ */
+static void prepare_vs_surfaces(struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+
+ /* Update surface / buffer for vertex shader constant buffer */
+ {
+ const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ vp->const_buffer =
+ brw_update_vs_constant_surface(ctx, surf, vp->const_buffer,
+ vp->program.Base.Parameters);
+
+ brw->vs.nr_surfaces = 1;
+ }
+
+ dri_bo_unreference(brw->vs.bind_bo);
+ brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+
+ if (1)
+ brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+}
+
+
+static void
+prepare_surfaces(struct brw_context *brw)
+{
+ prepare_wm_surfaces(brw);
+ prepare_vs_surfaces(brw);
}
const struct brw_tracked_state brw_wm_surfaces = {
.dirty = {
- .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
+ .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
- .prepare = prepare_wm_surfaces,
+ .prepare = prepare_surfaces,
};