diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compiler.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 53 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 19 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen8_ps_state.c | 5 |
9 files changed, 98 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 742bac4e8ef..68a93a690dd 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -335,6 +335,7 @@ struct brw_wm_prog_data { } binding_table; uint8_t computed_depth_mode; + bool computed_stencil; bool early_fragment_tests; bool no_8; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 457f49c9709..6433cffc919 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -919,6 +919,7 @@ enum opcode { FS_OPCODE_BLORP_FB_WRITE, FS_OPCODE_REP_FB_WRITE, + FS_OPCODE_PACK_STENCIL_REF, SHADER_OPCODE_RCP, SHADER_OPCODE_RSQ, SHADER_OPCODE_SQRT, @@ -1330,6 +1331,7 @@ enum fb_write_logical_srcs { FB_WRITE_LOGICAL_SRC_SRC0_ALPHA, FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */ FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GEN4-5: passthrough from thread */ + FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */ FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */ FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */ }; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f2f598cb569..1c96cf2d85b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3357,6 +3357,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, const fs_reg &src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA]; const fs_reg &src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH]; const fs_reg &dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH]; + const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL]; fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK]; const unsigned components = inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud; @@ -3449,6 +3450,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, length++; } + if (src_stencil.file != BAD_FILE) { + assert(devinfo->gen >= 9); + assert(bld.dispatch_width() != 16); + + sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.exec_all().annotate("FB write OS") + .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length], + retype(src_stencil, BRW_REGISTER_TYPE_UB)); + length++; + } + fs_inst *load; if (devinfo->gen >= 7) { /* Send from the GRF */ @@ -5223,6 +5235,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, prog_data->uses_omask = shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); prog_data->computed_depth_mode = computed_depth_mode(shader); + prog_data->computed_stencil = + shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 50e98becf03..d98769df4dc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -337,6 +337,7 @@ public: int *push_constant_loc; fs_reg frag_depth; + fs_reg frag_stencil; fs_reg sample_mask; fs_reg outputs[VARYING_SLOT_MAX]; unsigned output_components[VARYING_SLOT_MAX]; @@ -427,6 +428,8 @@ private: void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload); void generate_urb_write(fs_inst *inst, struct brw_reg payload); void generate_cs_terminate(fs_inst *inst, struct brw_reg payload); + void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst, + struct brw_reg src); void generate_barrier(fs_inst *inst, struct brw_reg src); void generate_blorp_fb_write(fs_inst *inst); void generate_linterp(fs_inst *inst, struct brw_reg dst, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index bb7e792044f..b016b5684e7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -317,6 +317,14 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) brw_imm_ud(inst->target)); } + /* Set computes stencil to render target */ + if (prog_data->computed_stencil) { + brw_OR(p, + vec1(retype(payload, BRW_REGISTER_TYPE_UD)), + vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)), + brw_imm_ud(0x1 << 14)); + } + implied_header = brw_null_reg(); } else { implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -437,6 +445,47 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload) } void +fs_generator::generate_stencil_ref_packing(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + assert(dispatch_width == 8); + assert(devinfo->gen >= 9); + + /* Stencil value updates are provided in 8 slots of 1 byte per slot. + * Presumably, in order to save memory bandwidth, the stencil reference + * values written from the FS need to be packed into 2 dwords (this makes + * sense because the stencil values are limited to 1 byte each and a SIMD8 + * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.) + * + * The spec is confusing here because in the payload definition of MDP_RTW_S8 + * (Message Data Payload for Render Target Writes with Stencil 8b) the + * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of + * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the + * packed values specified above and diagrammed below: + * + * 31 0 + * -------------------------------- + * DW | | + * 2-7 | IGNORED | + * | | + * -------------------------------- + * DW1 | STC | STC | STC | STC | + * | slot7 | slot6 | slot5 | slot4| + * -------------------------------- + * DW0 | STC | STC | STC | STC | + * | slot3 | slot2 | slot1 | slot0| + * -------------------------------- + */ + + src.vstride = BRW_VERTICAL_STRIDE_4; + src.width = BRW_WIDTH_1; + src.hstride = BRW_HORIZONTAL_STRIDE_0; + assert(src.type == BRW_REGISTER_TYPE_UB); + brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), src); +} + +void fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src) { brw_barrier(p, src); @@ -2182,6 +2231,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_barrier(inst, src[0]); break; + case FS_OPCODE_PACK_STENCIL_REF: + generate_stencil_ref_packing(inst, dst, src[0]); + break; + default: unreachable("Unsupported opcode"); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index e1fb12060c8..acb51c023ad 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -114,6 +114,8 @@ fs_visitor::nir_setup_outputs() } } else if (var->data.location == FRAG_RESULT_DEPTH) { this->frag_depth = reg; + } else if (var->data.location == FRAG_RESULT_STENCIL) { + this->frag_stencil = reg; } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) { this->sample_mask = reg; } else { diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 9e2b221265c..5c57944ca39 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -697,7 +697,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, const fs_reg dst_depth = (payload.dest_depth_reg ? fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) : fs_reg()); - fs_reg src_depth; + fs_reg src_depth, src_stencil; if (source_depth_to_render_target) { if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) @@ -706,9 +706,12 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)); } + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) + src_stencil = frag_stencil; + const fs_reg sources[] = { - color0, color1, src0_alpha, src_depth, dst_depth, sample_mask, - fs_reg(components) + color0, color1, src0_alpha, src_depth, dst_depth, src_stencil, + sample_mask, fs_reg(components) }; assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS); fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(), @@ -741,6 +744,16 @@ fs_visitor::emit_fb_writes() no16("Missing support for simd16 depth writes on gen6\n"); } + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { + /* From the 'Render Target Write message' section of the docs: + * "Output Stencil is not supported with SIMD16 Render Target Write + * Messages." + * + * FINISHME: split 16 into 2 8s + */ + no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n"); + } + if (do_dual_src) { const fs_builder abld = bld.annotate("FB dual-source write"); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 0ac4f2f6e0d..6f6f77ab583 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -295,6 +295,8 @@ brw_instruction_name(enum opcode op) return "fb_write"; case FS_OPCODE_FB_WRITE_LOGICAL: return "fb_write_logical"; + case FS_OPCODE_PACK_STENCIL_REF: + return "pack_stencil_ref"; case FS_OPCODE_BLORP_FB_WRITE: return "blorp_fb_write"; case FS_OPCODE_REP_FB_WRITE: diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 8f0507413a7..10e433b1d59 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -95,6 +95,11 @@ gen8_upload_ps_extra(struct brw_context *brw, !brw_color_buffer_write_enabled(brw)) dw1 |= GEN8_PSX_SHADER_HAS_UAV; + if (prog_data->computed_stencil) { + assert(brw->gen >= 9); + dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL; + } + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); OUT_BATCH(dw1); |