diff options
author | Jason Ekstrand <[email protected]> | 2015-11-24 15:12:20 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-04-14 15:59:33 -0700 |
commit | 3c93cdfaf598bc3c28e3dc288da35675c666602b (patch) | |
tree | b1a89539d10149b2838517e32bea49cb13f1c6b2 /src/mesa | |
parent | 63101177f32e7ebcaa6c71e046b3e599d21a20b5 (diff) |
i965/fs: Use MOV_INDIRECT for all indirect uniform loads
Instead of using reladdr, this commit changes the FS backend to emit a
MOV_INDIRECT whenever we need an indirect uniform load. We also have to
rework some of the other bits of the backend to handle this new form of
uniform load. The obvious change is that demote_pull_constants now acts
more like a lowering pass when it hits a MOV_INDIRECT.
Reviewed-by: Kristian Høgsberg <[email protected]>
Acked-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 72 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 55 |
2 files changed, 87 insertions, 40 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0a2d769653f..38c6e966f39 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1963,8 +1963,8 @@ fs_visitor::assign_constant_locations() if (inst->src[i].file != UNIFORM) continue; - if (inst->src[i].reladdr) { - int uniform = inst->src[i].nr; + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) { + int uniform = inst->src[0].nr; /* If this array isn't already present in the pull constant buffer, * add it. @@ -2046,49 +2046,63 @@ fs_visitor::assign_constant_locations() void fs_visitor::demote_pull_constants() { - foreach_block_and_inst (block, fs_inst, inst, cfg) { + const unsigned index = stage_prog_data->binding_table.pull_constants_start; + + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { + /* Set up the annotation tracking for new generated instructions. */ + const fs_builder ibld(this, block, inst); + for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file != UNIFORM) continue; - int pull_index; + /* We'll handle this case later */ + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) + continue; + unsigned location = inst->src[i].nr + inst->src[i].reg_offset; - if (location >= uniforms) /* Out of bounds access */ - pull_index = -1; - else - pull_index = pull_constant_loc[location]; + if (location >= uniforms) + continue; /* Out of bounds access */ + + int pull_index = pull_constant_loc[location]; if (pull_index == -1) continue; - /* Set up the annotation tracking for new generated instructions. */ - const fs_builder ibld(this, block, inst); - const unsigned index = stage_prog_data->binding_table.pull_constants_start; - fs_reg dst = vgrf(glsl_type::float_type); - assert(inst->src[i].stride == 0); - /* Generate a pull load into dst. */ - if (inst->src[i].reladdr) { - VARYING_PULL_CONSTANT_LOAD(ibld, dst, - brw_imm_ud(index), - *inst->src[i].reladdr, - pull_index * 4); - inst->src[i].reladdr = NULL; - inst->src[i].stride = 1; - } else { - const fs_builder ubld = ibld.exec_all().group(8, 0); - struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); - ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, brw_imm_ud(index), offset); - inst->src[i].set_smear(pull_index & 3); - } - brw_mark_surface_used(prog_data, index); + fs_reg dst = vgrf(glsl_type::float_type); + const fs_builder ubld = ibld.exec_all().group(8, 0); + struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + dst, brw_imm_ud(index), offset); /* Rewrite the instruction to use the temporary VGRF. */ inst->src[i].file = VGRF; inst->src[i].nr = dst.nr; inst->src[i].reg_offset = 0; + inst->src[i].set_smear(pull_index & 3); + + brw_mark_surface_used(prog_data, index); + } + + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && + inst->src[0].file == UNIFORM) { + + unsigned location = inst->src[0].nr + inst->src[0].reg_offset; + if (location >= uniforms) + continue; /* Out of bounds access */ + + int pull_index = pull_constant_loc[location]; + assert(pull_index >= 0); /* This had better be pull */ + + VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst, + brw_imm_ud(index), + inst->src[1], + pull_index * 4); + inst->remove(block); + + brw_mark_surface_used(prog_data, index); } } invalidate_live_intervals(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 698894be122..59b1376d730 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1266,6 +1266,8 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) { fs_reg image(UNIFORM, deref->var->data.driver_location / 4, BRW_REGISTER_TYPE_UD); + fs_reg indirect; + unsigned indirect_max = 0; for (const nir_deref *tail = &deref->deref; tail->child; tail = tail->child) { @@ -1277,7 +1279,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) image = offset(image, bld, base * element_size); if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - fs_reg tmp = vgrf(glsl_type::int_type); + fs_reg tmp = vgrf(glsl_type::uint_type); if (devinfo->gen == 7 && !devinfo->is_haswell) { /* IVB hangs when trying to access an invalid surface index with @@ -1295,15 +1297,31 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) bld.MOV(tmp, get_nir_src(deref_array->indirect)); } + indirect_max += element_size * (tail->type->length - 1); + bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4)); - if (image.reladdr) - bld.ADD(*image.reladdr, *image.reladdr, tmp); - else - image.reladdr = new(mem_ctx) fs_reg(tmp); + if (indirect.file == BAD_FILE) { + indirect = tmp; + } else { + bld.ADD(indirect, indirect, tmp); + } } } - return image; + if (indirect.file == BAD_FILE) { + return image; + } else { + /* Emit a pile of MOVs to load the uniform into a temporary. The + * dead-code elimination pass will get rid of what we don't use. + */ + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE); + for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) { + bld.emit(SHADER_OPCODE_MOV_INDIRECT, + offset(tmp, bld, j), offset(image, bld, j), + indirect, brw_imm_ud((indirect_max + 1) * 4)); + } + return tmp; + } } void @@ -2678,13 +2696,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr /* Offsets are in bytes but they should always be multiples of 4 */ assert(const_offset->u32[0] % 4 == 0); src.reg_offset = const_offset->u32[0] / 4; + + for (unsigned j = 0; j < instr->num_components; j++) { + bld.MOV(offset(dest, bld, j), offset(src, bld, j)); + } } else { - src.reladdr = new(mem_ctx) fs_reg(retype(get_nir_src(instr->src[0]), - BRW_REGISTER_TYPE_UD)); - } + fs_reg indirect = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); - for (unsigned j = 0; j < instr->num_components; j++) { - bld.MOV(offset(dest, bld, j), offset(src, bld, j)); + /* We need to pass a size to the MOV_INDIRECT but we don't want it to + * go past the end of the uniform. In order to keep the n'th + * component from running past, we subtract off the size of all but + * one component of the vector. + */ + assert(instr->const_index[1] >= instr->num_components * 4); + unsigned read_size = instr->const_index[1] - + (instr->num_components - 1) * 4; + + for (unsigned j = 0; j < instr->num_components; j++) { + bld.emit(SHADER_OPCODE_MOV_INDIRECT, + offset(dest, bld, j), offset(src, bld, j), + indirect, brw_imm_ud(read_size)); + } } break; } |