diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 89 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 |
4 files changed, 98 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index cdbbb557960..9d64a4009d1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -188,4 +188,93 @@ vec4_visitor::split_uniform_registers() } } +void +vec4_visitor::pack_uniform_registers() +{ + bool uniform_used[this->uniforms]; + int new_loc[this->uniforms]; + int new_chan[this->uniforms]; + + memset(uniform_used, 0, sizeof(uniform_used)); + memset(new_loc, 0, sizeof(new_loc)); + memset(new_chan, 0, sizeof(new_chan)); + + /* Find which uniform vectors are actually used by the program. We + * expect unused vector elements when we've moved array access out + * to pull constants, and from some GLSL code generators like wine. + */ + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + for (int i = 0 ; i < 3; i++) { + if (inst->src[i].file != UNIFORM) + continue; + + uniform_used[inst->src[i].reg] = true; + } + } + + int new_uniform_count = 0; + + /* Now, figure out a packing of the live uniform vectors into our + * push constants. + */ + for (int src = 0; src < uniforms; src++) { + int size = this->uniform_vector_size[src]; + + if (!uniform_used[src]) { + this->uniform_vector_size[src] = 0; + continue; + } + + int dst; + /* Find the lowest place we can slot this uniform in. */ + for (dst = 0; dst < src; dst++) { + if (this->uniform_vector_size[dst] + size <= 4) + break; + } + + if (src == dst) { + new_loc[src] = dst; + new_chan[src] = 0; + } else { + new_loc[src] = dst; + new_chan[src] = this->uniform_vector_size[dst]; + + /* Move the references to the data */ + for (int j = 0; j < size; j++) { + c->prog_data.param[dst * 4 + new_chan[src] + j] = + c->prog_data.param[src * 4 + j]; + } + + this->uniform_vector_size[dst] += size; + this->uniform_vector_size[src] = 0; + } + + new_uniform_count = MAX2(new_uniform_count, dst + 1); + } + + this->uniforms = new_uniform_count; + + /* Now, update the instructions for our repacked uniforms. */ + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + for (int i = 0 ; i < 3; i++) { + int src = inst->src[i].reg; + + if (inst->src[i].file != UNIFORM) + continue; + + inst->src[i].reg = new_loc[src]; + + int sx = BRW_GET_SWZ(inst->src[i].swizzle, 0) + new_chan[src]; + int sy = BRW_GET_SWZ(inst->src[i].swizzle, 1) + new_chan[src]; + int sz = BRW_GET_SWZ(inst->src[i].swizzle, 2) + new_chan[src]; + int sw = BRW_GET_SWZ(inst->src[i].swizzle, 3) + new_chan[src]; + inst->src[i].swizzle = BRW_SWIZZLE4(sx, sy, sz, sw); + } + } +} + } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 945eea576b1..327977357f7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -389,6 +389,7 @@ public: void move_grf_array_access_to_scratch(); void move_uniform_array_access_to_pull_constants(); void split_uniform_registers(); + void pack_uniform_registers(); void calculate_live_intervals(); bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 49514070f34..f084a7f7e4a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -128,9 +128,6 @@ vec4_visitor::setup_uniforms(int reg) reg += ALIGN(uniforms, 2) / 2; } - /* for now, we are not doing any elimination of unused slots, nor - * are we packing our uniforms. - */ c->prog_data.nr_params = this->uniforms * 4; c->prog_data.curb_read_length = reg - 1; @@ -607,6 +604,8 @@ vec4_visitor::run() progress = dead_code_eliminate() || progress; } while (progress); + pack_uniform_registers(); + if (failed) return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index dc11d9883ca..69399045d85 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -136,6 +136,12 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) /* The gen6 math instruction ignores the source modifiers -- * swizzle, abs, negate, and at least some parts of the register * region description. + * + * While it would seem that this MOV could be avoided at this point + * in the case that the swizzle is matched up with the destination + * writemask, note that uniform packing and register allocation + * could rearrange our swizzle, so let's leave this matter up to + * copy propagation later. */ src_reg temp_src = src_reg(this, glsl_type::vec4_type); emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); |