diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 42 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 78 |
4 files changed, 36 insertions, 91 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 72b6df6555e..341f543dbdb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2266,7 +2266,7 @@ gen7_block_read_scratch(struct brw_codegen *p, } /** - * Read a float[4] vector from the data port Data Cache (const buffer). + * Read a float[4] vector from the data port constant cache. * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. */ @@ -2278,8 +2278,7 @@ void brw_oword_block_read(struct brw_codegen *p, { const struct gen_device_info *devinfo = p->devinfo; const unsigned target_cache = - (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : - devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_SAMPLER_CACHE : + (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE : BRW_DATAPORT_READ_TARGET_DATA_CACHE); /* On newer hardware, offset is in units of owords. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 50266ad9071..b22dc9a1a7b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3202,44 +3202,18 @@ fs_visitor::lower_uniform_pull_constant_loads() continue; if (devinfo->gen >= 7) { - /* The offset arg is a vec4-aligned immediate byte offset. */ - fs_reg const_offset_reg = inst->src[1]; - assert(const_offset_reg.file == IMM && - const_offset_reg.type == BRW_REGISTER_TYPE_UD); - assert(const_offset_reg.ud % 16 == 0); - - fs_reg payload, offset; - if (devinfo->gen >= 9) { - /* We have to use a message header on Skylake to get SIMD4x2 - * mode. Reserve space for the register. - */ - offset = payload = fs_reg(VGRF, alloc.allocate(2)); - offset.offset += REG_SIZE; - inst->mlen = 2; - } else { - offset = payload = fs_reg(VGRF, alloc.allocate(1)); - inst->mlen = 1; - } - - /* This is actually going to be a MOV, but since only the first dword - * is accessed, we have a special opcode to do just that one. Note - * that this needs to be an operation that will be considered a def - * by live variable analysis, or register allocation will explode. - */ - fs_inst *setup = new(mem_ctx) fs_inst(FS_OPCODE_SET_SIMD4X2_OFFSET, - 8, offset, const_offset_reg); - setup->force_writemask_all = true; + const fs_builder ubld = fs_builder(this, block, inst).exec_all(); + const fs_reg payload = ubld.group(8, 0).vgrf(BRW_REGISTER_TYPE_UD); - setup->ir = inst->ir; - setup->annotation = inst->annotation; - inst->insert_before(block, setup); + ubld.group(8, 0).MOV(payload, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + ubld.group(1, 0).MOV(component(payload, 2), + brw_imm_ud(inst->src[1].ud / 16)); - /* Similarly, this will only populate the first 4 channels of the - * result register (since we only use smear values from 0-3), but we - * don't tell the optimizer. - */ inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7; inst->src[1] = payload; + inst->header_size = 1; + inst->mlen = 1; invalidate_live_intervals(); } else { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 91c3985b1d6..941c05f27e1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -421,7 +421,7 @@ private: void generate_uniform_pull_constant_load_gen7(fs_inst *inst, struct brw_reg dst, struct brw_reg surf_index, - struct brw_reg offset); + struct brw_reg payload); void generate_varying_pull_constant_load_gen4(fs_inst *inst, struct brw_reg dst, struct brw_reg index); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 4ef1a292b11..8b9fa8e504b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1145,42 +1145,13 @@ void fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, struct brw_reg dst, struct brw_reg index, - struct brw_reg offset) + struct brw_reg payload) { assert(index.type == BRW_REGISTER_TYPE_UD); - - assert(offset.file == BRW_GENERAL_REGISTER_FILE); - /* Reference just the dword we need, to avoid angering validate_reg(). */ - offset = brw_vec1_grf(offset.nr, 0); - - /* We use the SIMD4x2 mode because we want to end up with 4 components in - * the destination loaded consecutively from the same offset (which appears - * in the first component, and the rest are ignored). - */ - dst.width = BRW_WIDTH_4; - - struct brw_reg src = offset; - bool header_present = false; - - if (devinfo->gen >= 9) { - /* Skylake requires a message header in order to use SIMD4x2 mode. */ - src = retype(brw_vec4_grf(offset.nr, 0), BRW_REGISTER_TYPE_UD); - header_present = true; - - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_MOV(p, vec8(src), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - brw_set_default_access_mode(p, BRW_ALIGN_1); - - brw_MOV(p, get_element_ud(src, 2), - brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2)); - brw_pop_insn_state(p); - } + assert(payload.file == BRW_GENERAL_REGISTER_FILE); if (index.file == BRW_IMMEDIATE_VALUE) { - - uint32_t surf_index = index.ud; + const uint32_t surf_index = index.ud; brw_push_insn_state(p); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); @@ -1189,19 +1160,18 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, brw_inst_set_exec_size(devinfo, send, BRW_EXECUTE_4); brw_pop_insn_state(p); - brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD)); - brw_set_src0(p, send, src); - brw_set_sampler_message(p, send, + brw_set_dest(p, send, vec4(retype(dst, BRW_REGISTER_TYPE_UD))); + brw_set_src0(p, send, vec4(retype(payload, BRW_REGISTER_TYPE_UD))); + brw_set_dp_read_message(p, send, surf_index, - 0, /* LD message ignores sampler unit */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - 1, /* rlen */ - inst->mlen, - header_present, - BRW_SAMPLER_SIMD_MODE_SIMD4X2, - 0); - } else { + BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + GEN7_DATAPORT_DC_OWORD_BLOCK_READ, + GEN6_SFID_DATAPORT_CONSTANT_CACHE, + 1, /* mlen */ + true, /* header */ + 1); /* rlen */ + } else { struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); @@ -1217,16 +1187,18 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, /* dst = send(payload, a0.0 | <descriptor>) */ brw_inst *insn = brw_send_indirect_message( - p, BRW_SFID_SAMPLER, dst, src, addr); - brw_set_sampler_message(p, insn, - 0, - 0, /* LD message ignores sampler unit */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - 1, /* rlen */ - inst->mlen, - header_present, - BRW_SAMPLER_SIMD_MODE_SIMD4X2, - 0); + p, GEN6_SFID_DATAPORT_CONSTANT_CACHE, + vec4(retype(dst, BRW_REGISTER_TYPE_UD)), + vec4(retype(payload, BRW_REGISTER_TYPE_UD)), addr); + brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4); + brw_set_dp_read_message(p, insn, + 0, /* surface */ + BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + GEN7_DATAPORT_DC_OWORD_BLOCK_READ, + GEN6_SFID_DATAPORT_CONSTANT_CACHE, + 1, /* mlen */ + true, /* header */ + 1); /* rlen */ brw_pop_insn_state(p); } |