diff options
-rw-r--r-- | src/glsl/nir/glsl_to_nir.cpp | 8 | ||||
-rw-r--r-- | src/glsl/nir/nir_intrinsics.h | 27 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 96 |
3 files changed, 76 insertions, 55 deletions
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 70f0c851f2a..78070afe8e5 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -897,11 +897,11 @@ nir_visitor::visit(ir_expression *ir) } nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op); load->num_components = ir->type->vector_elements; - load->const_index[0] = ir->operands[0]->as_constant()->value.u[0]; - load->const_index[1] = const_index ? const_index->value.u[0] : 0; /* base offset */ - load->const_index[2] = 1; /* number of vec4's */ + load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */ + load->const_index[1] = 1; /* number of vec4's */ + load->src[0] = evaluate_rvalue(ir->operands[0]); if (!const_index) - load->src[0] = evaluate_rvalue(ir->operands[1]); + load->src[1] = evaluate_rvalue(ir->operands[1]); add_instr(&load->instr, ir->type->vector_elements); /* diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index e66273d3847..d94866c8597 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -101,11 +101,11 @@ SYSTEM_VALUE(invocation_id, 1) /* * The first index is the address to load from, and the second index is the - * number of array elements to load. For UBO's (and SSBO's), the first index - * is the UBO buffer index (TODO nonconstant UBO buffer index) and the second - * and third indices play the role of the first and second indices in the other - * loads. Indirect loads have an additional register input, which is added - * to the constant address to compute the final address to load from. + * number of array elements to load. Indirect loads have an additional + * register input, which is added to the constant address to compute the + * final address to load from. For UBO's (and SSBO's), the first source is + * the (possibly constant) UBO buffer index and the indirect (if it exists) + * is the second source. * * For vector backends, the address is in terms of one vec4, and so each array * element is +4 scalar components from the previous array element. For scalar @@ -113,16 +113,15 @@ SYSTEM_VALUE(invocation_id, 1) * elements begin immediately after the previous array element. */ -#define LOAD(name, num_indices, flags) \ - INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \ - NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \ - INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \ - NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \ +#define LOAD(name, extra_srcs, flags) \ + INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \ + INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ + true, 0, 0, 2, flags) -LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER) -LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER) -LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER) -/* LOAD(ssbo, 2, 0) */ +LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* LOAD(ssbo, 1, 0) */ /* * Stores work the same way as loads, except now the first register input is diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 56e180a138a..a5360754648 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1344,52 +1344,74 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } + case nir_intrinsic_load_ubo_indirect: + has_indirect = true; case nir_intrinsic_load_ubo: { - fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + - (unsigned) instr->const_index[0]); - fs_reg packed_consts = fs_reg(this, glsl_type::float_type); - packed_consts.type = dest.type; - - fs_reg const_offset_reg = fs_reg((unsigned) instr->const_index[1] & ~15); - emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - packed_consts, surf_index, const_offset_reg)); + nir_const_value *const_index = nir_src_as_const_value(instr->src[0]); + fs_reg surf_index; - for (unsigned i = 0; i < instr->num_components; i++) { - packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i); + if (const_index) { + surf_index = fs_reg(stage_prog_data->binding_table.ubo_start + + const_index->u[0]); + } else { + /* The block index is not a constant. Evaluate the index expression + * per-channel and add the base UBO index; the generator will select + * a value from any live channel. + */ + surf_index = fs_reg(this, glsl_type::uint_type); + emit(ADD(surf_index, get_nir_src(instr->src[0]), + fs_reg(stage_prog_data->binding_table.ubo_start))) + ->force_writemask_all = true; - /* The std140 packing rules don't allow vectors to cross 16-byte - * boundaries, and a reg is 32 bytes. + /* Assume this may touch any UBO. It would be nice to provide + * a tighter bound, but the array information is already lowered away. */ - assert(packed_consts.subreg_offset < 32); + brw_mark_surface_used(prog_data, + stage_prog_data->binding_table.ubo_start + + shader_prog->NumUniformBlocks - 1); + } - fs_inst *inst = MOV(dest, packed_consts); - if (instr->has_predicate) - inst->predicate = BRW_PREDICATE_NORMAL; - emit(inst); + if (has_indirect) { + /* Turn the byte offset into a dword offset. */ + fs_reg base_offset = fs_reg(this, glsl_type::int_type); + emit(SHR(base_offset, retype(get_nir_src(instr->src[1]), + BRW_REGISTER_TYPE_D), + fs_reg(2))); - dest.reg_offset++; - } - break; - } + unsigned vec4_offset = instr->const_index[0] / 4; + for (int i = 0; i < instr->num_components; i++) { + exec_list list = VARYING_PULL_CONSTANT_LOAD(offset(dest, i), + surf_index, base_offset, + vec4_offset + i); - case nir_intrinsic_load_ubo_indirect: { - fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + - instr->const_index[0]); - /* Turn the byte offset into a dword offset. */ - unsigned base_offset = instr->const_index[1] / 4; - fs_reg offset = fs_reg(this, glsl_type::int_type); - emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D), - fs_reg(2))); + fs_inst *last_inst = (fs_inst *) list.get_tail(); + if (instr->has_predicate) + last_inst->predicate = BRW_PREDICATE_NORMAL; + emit(list); + } + } else { + fs_reg packed_consts = fs_reg(this, glsl_type::float_type); + packed_consts.type = dest.type; - for (unsigned i = 0; i < instr->num_components; i++) { - exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index, - offset, base_offset + i); - fs_inst *last_inst = (fs_inst *) list.get_tail(); - if (instr->has_predicate) - last_inst->predicate = BRW_PREDICATE_NORMAL; - emit(list); + fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15); + emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, + surf_index, const_offset_reg); - dest.reg_offset++; + for (unsigned i = 0; i < instr->num_components; i++) { + packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i); + + /* The std140 packing rules don't allow vectors to cross 16-byte + * boundaries, and a reg is 32 bytes. + */ + assert(packed_consts.subreg_offset < 32); + + fs_inst *inst = MOV(dest, packed_consts); + if (instr->has_predicate) + inst->predicate = BRW_PREDICATE_NORMAL; + emit(inst); + + dest.reg_offset++; + } } break; } |