diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 75 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 32 |
5 files changed, 114 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 40571a4d54d..ab206d1920f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -676,10 +676,12 @@ enum opcode { FS_OPCODE_SPILL, FS_OPCODE_UNSPILL, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, + FS_OPCODE_SET_GLOBAL_OFFSET, VS_OPCODE_URB_WRITE, VS_OPCODE_SCRATCH_READ, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9a18410ac5f..83128117328 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -330,7 +330,9 @@ fs_inst::is_math() bool fs_inst::is_send_from_grf() { - return opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7; + return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 || + (opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD && + src[1].file == GRF)); } bool diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index b75314cd665..87257123f27 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -529,6 +529,10 @@ private: void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset); + void generate_uniform_pull_constant_load_gen7(fs_inst *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset); void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst, struct brw_reg index); void generate_varying_pull_constant_load_gen7(fs_inst *inst, @@ -536,6 +540,10 @@ private: struct brw_reg index, struct brw_reg offset); void generate_mov_dispatch_to_flags(fs_inst *inst); + void generate_set_global_offset(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg offset); void generate_discard_jump(fs_inst *inst); void patch_discard_jumps_to_fb_writes(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 9a891414e62..63f09fe7941 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -666,6 +666,44 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, } void +fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, + struct brw_reg dst, + struct brw_reg index, + struct brw_reg offset) +{ + assert(inst->mlen == 0); + + assert(index.file == BRW_IMMEDIATE_VALUE && + index.type == BRW_REGISTER_TYPE_UD); + uint32_t surf_index = index.dw1.ud; + + assert(offset.file == BRW_GENERAL_REGISTER_FILE); + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_pop_insn_state(p); + + brw_set_dest(p, send, dst); + brw_set_src0(p, send, offset); + if (intel->gen < 6) + send->header.destreg__conditionalmod = inst->base_mrf; + + uint32_t msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + uint32_t msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ; + bool header_present = true; + brw_set_dp_read_message(p, send, + surf_index, + msg_control, + msg_type, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 1, + header_present, + 1); +} + +void fs_generator::generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst, struct brw_reg index) @@ -852,6 +890,35 @@ brw_reg_from_fs_reg(fs_reg *reg) return brw_reg; } +/** + * Sets the second dword of a vgrf for gen7+ message setup. + * + * For setting up gen7 messages in VGRFs, we need to be able to set the second + * dword for some payloads where in the MRF world we'd have just used + * brw_message_reg(). We don't want to bake it into the send message's code + * generation because that means we don't get a chance to schedule the + * instructions. + */ +void +fs_generator::generate_set_global_offset(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg value) +{ + /* We use a matching src and dst to get the information on how this + * instruction works exposed to various optimization passes that would + * otherwise treat it as completely overwriting the dst. + */ + assert(src.file == dst.file && src.nr == dst.nr); + assert(value.file == BRW_IMMEDIATE_VALUE); + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 2), value.type), value); + brw_pop_insn_state(p); +} + void fs_generator::generate_code(exec_list *instructions) { @@ -1127,6 +1194,10 @@ fs_generator::generate_code(exec_list *instructions) generate_uniform_pull_constant_load(inst, dst, src[0], src[1]); break; + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: + generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]); + break; + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: generate_varying_pull_constant_load(inst, dst, src[0]); break; @@ -1151,6 +1222,10 @@ fs_generator::generate_code(exec_list *instructions) brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_WM_SHADER_TIME); break; + case FS_OPCODE_SET_GLOBAL_OFFSET: + generate_set_global_offset(inst, dst, src[0], src[1]); + break; + default: if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(ctx, "Unsupported opcode `%s' in FS", diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index ccf905ebc62..6a39f98509e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -581,12 +581,32 @@ fs_visitor::visit(ir_expression *ir) if (const_offset) { fs_reg packed_consts = fs_reg(this, glsl_type::float_type); packed_consts.type = result.type; - fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - packed_consts, - surf_index, - fs_reg(const_offset->value.u[0]))); - pull->base_mrf = 14; - pull->mlen = 1; + + if (intel->gen >= 7) { + fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] / 16); + fs_reg payload = fs_reg(this, glsl_type::uint_type); + struct brw_reg g0 = retype(brw_vec8_grf(0, 0), + BRW_REGISTER_TYPE_UD); + fs_inst *setup = emit(MOV(payload, fs_reg(g0))); + setup->force_writemask_all = true; + /* We don't need the second half of this vgrf to be filled with g1 + * in the 16-wide case, but if we use force_uncompressed then live + * variable analysis won't consider this a def! + */ + + emit(FS_OPCODE_SET_GLOBAL_OFFSET, payload, + payload, const_offset_reg); + emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, packed_consts, + surf_index, payload); + } else { + fs_reg const_offset_reg = fs_reg(const_offset->value.u[0]); + fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + packed_consts, + surf_index, + const_offset_reg)); + pull->base_mrf = 14; + pull->mlen = 1; + } packed_consts.smear = const_offset->value.u[0] % 16 / 4; for (int i = 0; i < ir->type->vector_elements; i++) { |