aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2015-08-14 16:01:33 -0700
committerKenneth Graunke <[email protected]>2015-10-12 14:33:26 -0700
commitbd198b9f0a292a9ff4ffffec3a29bad23d62caba (patch)
tree2f9dd5581ddc6c1f5de3a40757b6cf0712a92199
parentbf97f8d467ad1d485c2327da3f4fe1f9e1dc7379 (diff)
i965/vs: Simplify fs_visitor's ATTR file.
Previously, ATTR was indexed by VERT_ATTRIB_* slots; at the end of compilation, assign_vs_urb_setup() translated those into GRF units, and converted ATTR to HW_REGs. This patch moves the transslation earlier, making ATTR work in terms of GRF units from the beginning. assign_vs_urb_setup() simply has to add the number of payload registers and push constants to obtain the final hardware GRF number. (We can't do this earlier as those values aren't known.) ATTR still supports reg_offset; however, it's simply added to reg. It's not clear whether this is valuable or not. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp27
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c40
3 files changed, 49 insertions, 21 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 65f2e68e621..d000f16f49a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1508,9 +1508,11 @@ void
fs_visitor::assign_vs_urb_setup()
{
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
- int grf, slot, channel, attr;
assert(stage == MESA_SHADER_VERTEX);
+ int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
+ if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
+ count++;
/* Each attribute is 4 regs. */
this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
@@ -1521,25 +1523,10 @@ fs_visitor::assign_vs_urb_setup()
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == ATTR) {
-
- if (inst->src[i].reg == VERT_ATTRIB_MAX) {
- slot = vs_prog_data->nr_attributes - 1;
- } else {
- /* Attributes come in in a contiguous block, ordered by their
- * gl_vert_attrib value. That means we can compute the slot
- * number for an attribute by masking out the enabled
- * attributes before it and counting the bits.
- */
- attr = inst->src[i].reg + inst->src[i].reg_offset / 4;
- slot = _mesa_bitcount_64(vs_prog_data->inputs_read &
- BITFIELD64_MASK(attr));
- }
-
- channel = inst->src[i].reg_offset & 3;
-
- grf = payload.num_regs +
- prog_data->curb_read_length +
- slot * 4 + channel;
+ int grf = payload.num_regs +
+ prog_data->curb_read_length +
+ inst->src[i].reg +
+ inst->src[i].reg_offset;
inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg =
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index df1a7ed9b59..8aee2c087f7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -53,7 +53,8 @@ fs_reg *
fs_visitor::emit_vs_system_value(int location)
{
fs_reg *reg = new(this->mem_ctx)
- fs_reg(ATTR, VERT_ATTRIB_MAX, BRW_REGISTER_TYPE_D);
+ fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info.inputs_read),
+ BRW_REGISTER_TYPE_D);
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
switch (location) {
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 15c1b1984a1..4f35d81fc7e 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -27,6 +27,34 @@
#include "glsl/nir/glsl_to_nir.h"
#include "program/prog_to_nir.h"
+static bool
+remap_vs_attrs(nir_block *block, void *closure)
+{
+ GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* We set EmitNoIndirect for VS inputs, so there are no indirects. */
+ assert(intrin->intrinsic != nir_intrinsic_load_input_indirect);
+
+ if (intrin->intrinsic == nir_intrinsic_load_input) {
+ /* Attributes come in a contiguous block, ordered by their
+ * gl_vert_attrib value. That means we can compute the slot
+ * number for an attribute by masking out the enabled attributes
+ * before it and counting the bits.
+ */
+ int attr = intrin->const_index[0];
+ int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));
+ intrin->const_index[0] = 4 * slot;
+ }
+ }
+ return true;
+}
+
static void
brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
{
@@ -49,6 +77,18 @@ brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
* type_size_vec4 here.
*/
nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+
+ /* Finally, translate VERT_ATTRIB_* values into the actual registers.
+ *
+ * Note that we can use nir->info.inputs_read instead of key->inputs_read
+ * since the two are identical aside from Gen4-5 edge flag differences.
+ */
+ GLbitfield64 inputs_read = nir->info.inputs_read;
+ nir_foreach_overload(nir, overload) {
+ if (overload->impl) {
+ nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);
+ }
+ }
break;
case MESA_SHADER_GEOMETRY:
foreach_list_typed(nir_variable, var, node, &nir->inputs) {