summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp89
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp6
4 files changed, 98 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index cdbbb557960..9d64a4009d1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -188,4 +188,93 @@ vec4_visitor::split_uniform_registers()
}
}
+void
+vec4_visitor::pack_uniform_registers()
+{
+ bool uniform_used[this->uniforms];
+ int new_loc[this->uniforms];
+ int new_chan[this->uniforms];
+
+ memset(uniform_used, 0, sizeof(uniform_used));
+ memset(new_loc, 0, sizeof(new_loc));
+ memset(new_chan, 0, sizeof(new_chan));
+
+ /* Find which uniform vectors are actually used by the program. We
+ * expect unused vector elements when we've moved array access out
+ * to pull constants, and from some GLSL code generators like wine.
+ */
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ for (int i = 0 ; i < 3; i++) {
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ uniform_used[inst->src[i].reg] = true;
+ }
+ }
+
+ int new_uniform_count = 0;
+
+ /* Now, figure out a packing of the live uniform vectors into our
+ * push constants.
+ */
+ for (int src = 0; src < uniforms; src++) {
+ int size = this->uniform_vector_size[src];
+
+ if (!uniform_used[src]) {
+ this->uniform_vector_size[src] = 0;
+ continue;
+ }
+
+ int dst;
+ /* Find the lowest place we can slot this uniform in. */
+ for (dst = 0; dst < src; dst++) {
+ if (this->uniform_vector_size[dst] + size <= 4)
+ break;
+ }
+
+ if (src == dst) {
+ new_loc[src] = dst;
+ new_chan[src] = 0;
+ } else {
+ new_loc[src] = dst;
+ new_chan[src] = this->uniform_vector_size[dst];
+
+ /* Move the references to the data */
+ for (int j = 0; j < size; j++) {
+ c->prog_data.param[dst * 4 + new_chan[src] + j] =
+ c->prog_data.param[src * 4 + j];
+ }
+
+ this->uniform_vector_size[dst] += size;
+ this->uniform_vector_size[src] = 0;
+ }
+
+ new_uniform_count = MAX2(new_uniform_count, dst + 1);
+ }
+
+ this->uniforms = new_uniform_count;
+
+ /* Now, update the instructions for our repacked uniforms. */
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ for (int i = 0 ; i < 3; i++) {
+ int src = inst->src[i].reg;
+
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ inst->src[i].reg = new_loc[src];
+
+ int sx = BRW_GET_SWZ(inst->src[i].swizzle, 0) + new_chan[src];
+ int sy = BRW_GET_SWZ(inst->src[i].swizzle, 1) + new_chan[src];
+ int sz = BRW_GET_SWZ(inst->src[i].swizzle, 2) + new_chan[src];
+ int sw = BRW_GET_SWZ(inst->src[i].swizzle, 3) + new_chan[src];
+ inst->src[i].swizzle = BRW_SWIZZLE4(sx, sy, sz, sw);
+ }
+ }
+}
+
} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 945eea576b1..327977357f7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -389,6 +389,7 @@ public:
void move_grf_array_access_to_scratch();
void move_uniform_array_access_to_pull_constants();
void split_uniform_registers();
+ void pack_uniform_registers();
void calculate_live_intervals();
bool dead_code_eliminate();
bool virtual_grf_interferes(int a, int b);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 49514070f34..f084a7f7e4a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -128,9 +128,6 @@ vec4_visitor::setup_uniforms(int reg)
reg += ALIGN(uniforms, 2) / 2;
}
- /* for now, we are not doing any elimination of unused slots, nor
- * are we packing our uniforms.
- */
c->prog_data.nr_params = this->uniforms * 4;
c->prog_data.curb_read_length = reg - 1;
@@ -607,6 +604,8 @@ vec4_visitor::run()
progress = dead_code_eliminate() || progress;
} while (progress);
+ pack_uniform_registers();
+
if (failed)
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index dc11d9883ca..69399045d85 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -136,6 +136,12 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
/* The gen6 math instruction ignores the source modifiers --
* swizzle, abs, negate, and at least some parts of the register
* region description.
+ *
+ * While it would seem that this MOV could be avoided at this point
+ * in the case that the swizzle is matched up with the destination
+ * writemask, note that uniform packing and register allocation
+ * could rearrange our swizzle, so let's leave this matter up to
+ * copy propagation later.
*/
src_reg temp_src = src_reg(this, glsl_type::vec4_type);
emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);