diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 21 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 608 |
2 files changed, 0 insertions, 629 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2de97eb25e2..6b75a8dbc3d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -208,27 +208,6 @@ public: void compute_sample_position(fs_reg dst, fs_reg int_sample_pos); fs_reg rescale_texcoord(fs_reg coordinate, int coord_components, bool is_rect, uint32_t sampler, int texunit); - fs_inst *emit_texture_gen4(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int coord_components, - fs_reg shadow_comp, - fs_reg lod, fs_reg lod2, int grad_components, - uint32_t sampler); - fs_inst *emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int vector_elements, - fs_reg shadow_c, fs_reg lod, - uint32_t sampler); - fs_inst *emit_texture_gen5(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int coord_components, - fs_reg shadow_comp, - fs_reg lod, fs_reg lod2, int grad_components, - fs_reg sample_index, uint32_t sampler, - bool has_offset); - fs_inst *emit_texture_gen7(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int coord_components, - fs_reg shadow_comp, - fs_reg lod, fs_reg lod2, int grad_components, - fs_reg sample_index, fs_reg mcs, fs_reg sampler, - fs_reg offset_value); void emit_texture(ir_texture_opcode op, const glsl_type *dest_type, fs_reg coordinate, int components, diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 0cfce9f3640..5f0549c3bc0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -77,614 +77,6 @@ fs_visitor::emit_vs_system_value(int location) return reg; } -fs_inst * -fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int coord_components, - fs_reg shadow_c, - fs_reg lod, fs_reg dPdy, int grad_components, - uint32_t sampler) -{ - int mlen; - int base_mrf = 1; - bool simd16 = false; - fs_reg orig_dst; - - /* g0 header. */ - mlen = 1; - - if (shadow_c.file != BAD_FILE) { - for (int i = 0; i < coord_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); - coordinate = offset(coordinate, bld, 1); - } - - /* gen4's SIMD8 sampler always has the slots for u,v,r present. - * the unused slots must be zeroed. - */ - for (int i = coord_components; i < 3; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)); - } - mlen += 3; - - if (op == ir_tex) { - /* There's no plain shadow compare message, so we use shadow - * compare with a bias of 0.0. - */ - bld.MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); - mlen++; - } else if (op == ir_txb || op == ir_txl) { - bld.MOV(fs_reg(MRF, base_mrf + mlen), lod); - mlen++; - } else { - unreachable("Should not get here."); - } - - bld.MOV(fs_reg(MRF, base_mrf + mlen), shadow_c); - mlen++; - } else if (op == ir_tex) { - for (int i = 0; i < coord_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); - coordinate = offset(coordinate, bld, 1); - } - /* zero the others. */ - for (int i = coord_components; i<3; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)); - } - /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ - mlen += 3; - } else if (op == ir_txd) { - fs_reg &dPdx = lod; - - for (int i = 0; i < coord_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); - coordinate = offset(coordinate, bld, 1); - } - /* the slots for u and v are always present, but r is optional */ - mlen += MAX2(coord_components, 2); - - /* P = u, v, r - * dPdx = dudx, dvdx, drdx - * dPdy = dudy, dvdy, drdy - * - * 1-arg: Does not exist. - * - * 2-arg: dudx dvdx dudy dvdy - * dPdx.x dPdx.y dPdy.x dPdy.y - * m4 m5 m6 m7 - * - * 3-arg: dudx dvdx drdx dudy dvdy drdy - * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z - * m5 m6 m7 m8 m9 m10 - */ - for (int i = 0; i < grad_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdx); - dPdx = offset(dPdx, bld, 1); - } - mlen += MAX2(grad_components, 2); - - for (int i = 0; i < grad_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdy); - dPdy = offset(dPdy, bld, 1); - } - mlen += MAX2(grad_components, 2); - } else if (op == ir_txs) { - /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ - simd16 = true; - bld.MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); - mlen += 2; - } else { - /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod - * instructions. We'll need to do SIMD16 here. - */ - simd16 = true; - assert(op == ir_txb || op == ir_txl || op == ir_txf); - - for (int i = 0; i < coord_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), - coordinate); - coordinate = offset(coordinate, bld, 1); - } - - /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to - * be necessary for TXF (ld), but seems wise to do for all messages. - */ - for (int i = coord_components; i < 3; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)); - } - - /* lod/bias appears after u/v/r. */ - mlen += 6; - - bld.MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod); - mlen++; - - /* The unused upper half. */ - mlen++; - } - - if (simd16) { - /* Now, since we're doing simd16, the return is 2 interleaved - * vec4s where the odd-indexed ones are junk. We'll need to move - * this weirdness around to the expected layout. - */ - orig_dst = dst; - dst = fs_reg(GRF, alloc.allocate(8), orig_dst.type); - } - - enum opcode opcode; - switch (op) { - case ir_tex: opcode = SHADER_OPCODE_TEX; break; - case ir_txb: opcode = FS_OPCODE_TXB; break; - case ir_txl: opcode = SHADER_OPCODE_TXL; break; - case ir_txd: opcode = SHADER_OPCODE_TXD; break; - case ir_txs: opcode = SHADER_OPCODE_TXS; break; - case ir_txf: opcode = SHADER_OPCODE_TXF; break; - default: - unreachable("not reached"); - } - - fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); - inst->base_mrf = base_mrf; - inst->mlen = mlen; - inst->header_size = 1; - inst->regs_written = simd16 ? 8 : 4; - - if (simd16) { - for (int i = 0; i < 4; i++) { - bld.MOV(orig_dst, dst); - orig_dst = offset(orig_dst, bld, 1); - dst = offset(dst, bld, 2); - } - } - - return inst; -} - -fs_inst * -fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int vector_elements, - fs_reg shadow_c, fs_reg lod, - uint32_t sampler) -{ - fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F); - bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf || op == ir_txs; - - if (has_lod && shadow_c.file != BAD_FILE) - no16("TXB and TXL with shadow comparison unsupported in SIMD16."); - - if (op == ir_txd) - no16("textureGrad unsupported in SIMD16."); - - /* Copy the coordinates. */ - for (int i = 0; i < vector_elements; i++) { - bld.MOV(retype(offset(message, bld, i), coordinate.type), coordinate); - coordinate = offset(coordinate, bld, 1); - } - - fs_reg msg_end = offset(message, bld, vector_elements); - - /* Messages other than sample and ld require all three components */ - if (vector_elements > 0 && (has_lod || shadow_c.file != BAD_FILE)) { - for (int i = vector_elements; i < 3; i++) { - bld.MOV(offset(message, bld, i), fs_reg(0.0f)); - } - msg_end = offset(message, bld, 3); - } - - if (has_lod) { - fs_reg msg_lod = retype(msg_end, op == ir_txf ? - BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F); - bld.MOV(msg_lod, lod); - msg_end = offset(msg_lod, bld, 1); - } - - if (shadow_c.file != BAD_FILE) { - fs_reg msg_ref = offset(message, bld, 3 + has_lod); - bld.MOV(msg_ref, shadow_c); - msg_end = offset(msg_ref, bld, 1); - } - - enum opcode opcode; - switch (op) { - case ir_tex: opcode = SHADER_OPCODE_TEX; break; - case ir_txb: opcode = FS_OPCODE_TXB; break; - case ir_txd: opcode = SHADER_OPCODE_TXD; break; - case ir_txl: opcode = SHADER_OPCODE_TXL; break; - case ir_txs: opcode = SHADER_OPCODE_TXS; break; - case ir_txf: opcode = SHADER_OPCODE_TXF; break; - default: unreachable("not reached"); - } - - fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); - inst->base_mrf = message.reg - 1; - inst->mlen = msg_end.reg - inst->base_mrf; - inst->header_size = 1; - inst->regs_written = 8; - - return inst; -} - -/* gen5's sampler has slots for u, v, r, array index, then optional - * parameters like shadow comparitor or LOD bias. If optional - * parameters aren't present, those base slots are optional and don't - * need to be included in the message. - * - * We don't fill in the unnecessary slots regardless, which may look - * surprising in the disassembly. - */ -fs_inst * -fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int vector_elements, - fs_reg shadow_c, - fs_reg lod, fs_reg lod2, int grad_components, - fs_reg sample_index, uint32_t sampler, - bool has_offset) -{ - int reg_width = dispatch_width / 8; - unsigned header_size = 0; - - fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F); - fs_reg msg_coords = message; - - if (has_offset) { - /* The offsets set up by the ir_texture visitor are in the - * m1 header, so we can't go headerless. - */ - header_size = 1; - message.reg--; - } - - for (int i = 0; i < vector_elements; i++) { - bld.MOV(retype(offset(msg_coords, bld, i), coordinate.type), coordinate); - coordinate = offset(coordinate, bld, 1); - } - fs_reg msg_end = offset(msg_coords, bld, vector_elements); - fs_reg msg_lod = offset(msg_coords, bld, 4); - - if (shadow_c.file != BAD_FILE) { - fs_reg msg_shadow = msg_lod; - bld.MOV(msg_shadow, shadow_c); - msg_lod = offset(msg_shadow, bld, 1); - msg_end = msg_lod; - } - - enum opcode opcode; - switch (op) { - case ir_tex: - opcode = SHADER_OPCODE_TEX; - break; - case ir_txb: - bld.MOV(msg_lod, lod); - msg_end = offset(msg_lod, bld, 1); - - opcode = FS_OPCODE_TXB; - break; - case ir_txl: - bld.MOV(msg_lod, lod); - msg_end = offset(msg_lod, bld, 1); - - opcode = SHADER_OPCODE_TXL; - break; - case ir_txd: { - /** - * P = u, v, r - * dPdx = dudx, dvdx, drdx - * dPdy = dudy, dvdy, drdy - * - * Load up these values: - * - dudx dudy dvdx dvdy drdx drdy - * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z - */ - msg_end = msg_lod; - for (int i = 0; i < grad_components; i++) { - bld.MOV(msg_end, lod); - lod = offset(lod, bld, 1); - msg_end = offset(msg_end, bld, 1); - - bld.MOV(msg_end, lod2); - lod2 = offset(lod2, bld, 1); - msg_end = offset(msg_end, bld, 1); - } - - opcode = SHADER_OPCODE_TXD; - break; - } - case ir_txs: - msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD); - bld.MOV(msg_lod, lod); - msg_end = offset(msg_lod, bld, 1); - - opcode = SHADER_OPCODE_TXS; - break; - case ir_query_levels: - msg_lod = msg_end; - bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); - msg_end = offset(msg_lod, bld, 1); - - opcode = SHADER_OPCODE_TXS; - break; - case ir_txf: - msg_lod = offset(msg_coords, bld, 3); - bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod); - msg_end = offset(msg_lod, bld, 1); - - opcode = SHADER_OPCODE_TXF; - break; - case ir_txf_ms: - msg_lod = offset(msg_coords, bld, 3); - /* lod */ - bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); - /* sample index */ - bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index); - msg_end = offset(msg_lod, bld, 2); - - opcode = SHADER_OPCODE_TXF_CMS; - break; - case ir_lod: - opcode = SHADER_OPCODE_LOD; - break; - case ir_tg4: - opcode = SHADER_OPCODE_TG4; - break; - default: - unreachable("not reached"); - } - - fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); - inst->base_mrf = message.reg; - inst->mlen = msg_end.reg - message.reg; - inst->header_size = header_size; - inst->regs_written = 4 * reg_width; - - if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { - fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE) - " disallowed by hardware\n"); - } - - return inst; -} - -static bool -is_high_sampler(const struct brw_device_info *devinfo, fs_reg sampler) -{ - if (devinfo->gen < 8 && !devinfo->is_haswell) - return false; - - return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16; -} - -fs_inst * -fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int coord_components, - fs_reg shadow_c, - fs_reg lod, fs_reg lod2, int grad_components, - fs_reg sample_index, fs_reg mcs, fs_reg sampler, - fs_reg offset_value) -{ - int reg_width = dispatch_width / 8; - unsigned header_size = 0; - - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE); - for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) { - sources[i] = vgrf(glsl_type::float_type); - } - int length = 0; - - if (op == ir_tg4 || offset_value.file != BAD_FILE || - is_high_sampler(devinfo, sampler)) { - /* For general texture offsets (no txf workaround), we need a header to - * put them in. Note that we're only reserving space for it in the - * message payload as it will be initialized implicitly by the - * generator. - * - * * ir4_tg4 needs to place its channel select in the header, - * for interaction with ARB_texture_swizzle - * - * The sampler index is only 4-bits, so for larger sampler numbers we - * need to offset the Sampler State Pointer in the header. - */ - header_size = 1; - sources[0] = fs_reg(); - length++; - } - - if (shadow_c.file != BAD_FILE) { - bld.MOV(sources[length], shadow_c); - length++; - } - - bool has_nonconstant_offset = - offset_value.file != BAD_FILE && offset_value.file != IMM; - bool coordinate_done = false; - - /* The sampler can only meaningfully compute LOD for fragment shader - * messages. For all other stages, we change the opcode to ir_txl and - * hardcode the LOD to 0. - */ - if (stage != MESA_SHADER_FRAGMENT && op == ir_tex) { - op = ir_txl; - lod = fs_reg(0.0f); - } - - /* Set up the LOD info */ - switch (op) { - case ir_tex: - case ir_lod: - break; - case ir_txb: - bld.MOV(sources[length], lod); - length++; - break; - case ir_txl: - bld.MOV(sources[length], lod); - length++; - break; - case ir_txd: { - no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); - - /* Load dPdx and the coordinate together: - * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z - */ - for (int i = 0; i < coord_components; i++) { - bld.MOV(sources[length], coordinate); - coordinate = offset(coordinate, bld, 1); - length++; - - /* For cube map array, the coordinate is (u,v,r,ai) but there are - * only derivatives for (u, v, r). - */ - if (i < grad_components) { - bld.MOV(sources[length], lod); - lod = offset(lod, bld, 1); - length++; - - bld.MOV(sources[length], lod2); - lod2 = offset(lod2, bld, 1); - length++; - } - } - - coordinate_done = true; - break; - } - case ir_txs: - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod); - length++; - break; - case ir_query_levels: - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)); - length++; - break; - case ir_txf: - /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. - * On Gen9 they are u, v, lod, r - */ - - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); - coordinate = offset(coordinate, bld, 1); - length++; - - if (devinfo->gen >= 9) { - if (coord_components >= 2) { - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); - coordinate = offset(coordinate, bld, 1); - } - length++; - } - - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod); - length++; - - for (int i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) { - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); - coordinate = offset(coordinate, bld, 1); - length++; - } - - coordinate_done = true; - break; - case ir_txf_ms: - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index); - length++; - - /* data from the multisample control surface */ - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs); - length++; - - /* there is no offsetting for this message; just copy in the integer - * texture coordinates - */ - for (int i = 0; i < coord_components; i++) { - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); - coordinate = offset(coordinate, bld, 1); - length++; - } - - coordinate_done = true; - break; - case ir_tg4: - if (has_nonconstant_offset) { - if (shadow_c.file != BAD_FILE) - no16("Gen7 does not support gather4_po_c in SIMD16 mode."); - - /* More crazy intermixing */ - for (int i = 0; i < 2; i++) { /* u, v */ - bld.MOV(sources[length], coordinate); - coordinate = offset(coordinate, bld, 1); - length++; - } - - for (int i = 0; i < 2; i++) { /* offu, offv */ - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value); - offset_value = offset(offset_value, bld, 1); - length++; - } - - if (coord_components == 3) { /* r if present */ - bld.MOV(sources[length], coordinate); - coordinate = offset(coordinate, bld, 1); - length++; - } - - coordinate_done = true; - } - break; - } - - /* Set up the coordinate (except for cases where it was done above) */ - if (!coordinate_done) { - for (int i = 0; i < coord_components; i++) { - bld.MOV(sources[length], coordinate); - coordinate = offset(coordinate, bld, 1); - length++; - } - } - - int mlen; - if (reg_width == 2) - mlen = length * reg_width - header_size; - else - mlen = length * reg_width; - - fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), - BRW_REGISTER_TYPE_F); - bld.LOAD_PAYLOAD(src_payload, sources, length, header_size); - - /* Generate the SEND */ - enum opcode opcode; - switch (op) { - case ir_tex: opcode = SHADER_OPCODE_TEX; break; - case ir_txb: opcode = FS_OPCODE_TXB; break; - case ir_txl: opcode = SHADER_OPCODE_TXL; break; - case ir_txd: opcode = SHADER_OPCODE_TXD; break; - case ir_txf: opcode = SHADER_OPCODE_TXF; break; - case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; - case ir_txs: opcode = SHADER_OPCODE_TXS; break; - case ir_query_levels: opcode = SHADER_OPCODE_TXS; break; - case ir_lod: opcode = SHADER_OPCODE_LOD; break; - case ir_tg4: - if (has_nonconstant_offset) - opcode = SHADER_OPCODE_TG4_OFFSET; - else - opcode = SHADER_OPCODE_TG4; - break; - default: - unreachable("not reached"); - } - fs_inst *inst = bld.emit(opcode, dst, src_payload, sampler); - inst->base_mrf = -1; - inst->mlen = mlen; - inst->header_size = header_size; - inst->regs_written = 4 * reg_width; - - if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { - fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE) - " disallowed by hardware\n"); - } - - return inst; -} - fs_reg fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, bool is_rect, uint32_t sampler, int texunit) |