diff options
author | Jason Ekstrand <[email protected]> | 2015-11-25 09:36:34 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-12-14 15:58:09 -0800 |
commit | a487f0284f618416e74f73ca2534d8d93c26531c (patch) | |
tree | 80ffa6ea3225248e6acbca93187bdce93a12e726 /src/mesa/drivers/dri | |
parent | 46f5396846e5c4d1874c5af83378b91230f0d9da (diff) |
i965/vec4: Use MOV_INDIRECT instead of reladdr for indirect push constants
This commit moves us to an instruction based model rather than a
register-based model for indirects. This is more accurate anyway as we
have to emit instructions to resolve the reladdr. It's also a lot simpler
because it gets rid of the recursive reladdr problem by design.
One side-effect of this is that we need a whole new algorithm in
move_uniform_array_access_to_pull_constants. This new algorithm is much
more straightforward than the old one and is fairly similar to what we're
already doing in the FS backend.
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 86 |
4 files changed, 50 insertions, 51 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index a697bdf84a0..e4a405b8705 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -775,7 +775,7 @@ vec4_visitor::move_push_constants_to_pull_constants() dst_reg temp = dst_reg(this, glsl_type::vec4_type); emit_pull_constant_load(block, inst, temp, inst->src[i], - pull_constant_loc[uniform]); + pull_constant_loc[uniform], src_reg()); inst->src[i].file = temp.file; inst->src[i].nr = temp.nr; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index f2e5ce18ab4..e6d6c8218be 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -293,7 +293,8 @@ public: void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg dst, src_reg orig_src, - int base_offset); + int base_offset, + src_reg indirect); void emit_pull_constant_load_reg(dst_reg dst, src_reg surf_index, src_reg offset, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index f965b39360f..58b6612de9d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -673,12 +673,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* Offsets are in bytes but they should always be multiples of 16 */ assert(const_offset->u[0] % 16 == 0); src.reg_offset = const_offset->u[0] / 16; + + emit(MOV(dest, src)); } else { - src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); - src.reladdr = new(mem_ctx) src_reg(tmp); - } + src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); - emit(MOV(dest, src)); + emit(SHADER_OPCODE_MOV_INDIRECT, dest, src, + indirect, brw_imm_ud(instr->const_index[1])); + } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 7712d3471ea..bc7b30df2ad 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1641,16 +1641,16 @@ vec4_visitor::move_grf_array_access_to_scratch() void vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg temp, src_reg orig_src, - int base_offset) + int base_offset, src_reg indirect) { int reg_offset = base_offset + orig_src.reg_offset; const unsigned index = prog_data->base.binding_table.pull_constants_start; src_reg offset; - if (orig_src.reladdr) { + if (indirect.file != BAD_FILE) { offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, ADD(dst_reg(offset), *orig_src.reladdr, + emit_before(block, inst, ADD(dst_reg(offset), indirect, brw_imm_d(reg_offset * 16))); } else if (devinfo->gen >= 8) { /* Store the offset in a GRF so we can send-from-GRF. */ @@ -1685,59 +1685,55 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() { int pull_constant_loc[this->uniforms]; memset(pull_constant_loc, -1, sizeof(pull_constant_loc)); - bool nested_reladdr; - /* Walk through and find array access of uniforms. Put a copy of that - * uniform in the pull constant buffer. - * - * Note that we don't move constant-indexed accesses to arrays. No - * testing has been done of the performance impact of this choice. + /* First, walk through the instructions and determine which things need to + * be pulled. We mark something as needing to be pulled by setting + * pull_constant_loc to 0. */ - do { - nested_reladdr = false; - - foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { - for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) - continue; + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + /* We only care about MOV_INDIRECT of a uniform */ + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || + inst->src[0].file != UNIFORM) + continue; - int uniform = inst->src[i].nr; + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; - if (inst->src[i].reladdr->reladdr) - nested_reladdr = true; /* will need another pass */ + for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++) + pull_constant_loc[uniform_nr + j] = 0; + } - /* If this array isn't already present in the pull constant buffer, - * add it. - */ - if (pull_constant_loc[uniform] == -1) { - const gl_constant_value **values = - &stage_prog_data->param[uniform * 4]; + /* Next, we walk the list of uniforms and assign real pull constant + * locations and set their corresponding entries in pull_param. + */ + for (int j = 0; j < this->uniforms; j++) { + if (pull_constant_loc[j] < 0) + continue; - pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4; + pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4; - assert(uniform < uniform_array_size); - for (int j = 0; j < uniform_size[uniform] * 4; j++) { - stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] - = values[j]; - } - } + for (int i = 0; i < 4; i++) { + stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] + = stage_prog_data->param[j * 4 + i]; + } + } - /* Set up the annotation tracking for new generated instructions. */ - base_ir = inst->ir; - current_annotation = inst->annotation; + /* Finally, we can walk through the instructions and lower MOV_INDIRECT + * instructions to actual uniform pulls. + */ + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + /* We only care about MOV_INDIRECT of a uniform */ + if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT || + inst->src[0].file != UNIFORM) + continue; - dst_reg temp = dst_reg(this, glsl_type::vec4_type); + int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset; - emit_pull_constant_load(block, inst, temp, inst->src[i], - pull_constant_loc[uniform]); + assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP); - inst->src[i].file = temp.file; - inst->src[i].nr = temp.nr; - inst->src[i].reg_offset = temp.reg_offset; - inst->src[i].reladdr = NULL; - } - } - } while (nested_reladdr); + emit_pull_constant_load(block, inst, inst->dst, inst->src[0], + pull_constant_loc[uniform_nr], inst->src[1]); + inst->remove(block); + } /* Now there are no accesses of the UNIFORM file with a reladdr, so * no need to track them as larger-than-vec4 objects. This will be |