diff options
Diffstat (limited to 'src/gallium/drivers/ilo/shader')
-rw-r--r-- | src/gallium/drivers/ilo/shader/ilo_shader_fs.c | 56 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/ilo_shader_internal.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/ilo_shader_vs.c | 58 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_tgsi.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_tgsi.h | 3 |
5 files changed, 126 insertions, 7 deletions
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c index 36a308744c6..48d5721631c 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c +++ b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c @@ -306,6 +306,32 @@ fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc, } } +static bool +fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc, + struct toy_dst dst, int dim, + struct toy_src idx) +{ + const int grf = fcc->first_const_grf + idx.val32 / 2; + const int grf_subreg = (idx.val32 & 1) * 16; + struct toy_src src; + struct toy_dst real_dst[4]; + int i; + + if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM || + grf >= fcc->first_attr_grf) + return false; + + src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010); + + tdst_transpose(dst, real_dst); + for (i = 0; i < 4; i++) { + /* cast to type D to make sure these are raw moves */ + tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i))); + } + + return true; +} + static void fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc, struct toy_dst dst, int dim, struct toy_src idx) @@ -322,6 +348,9 @@ fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc, struct toy_dst tmp, real_dst[4]; int i; + if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx)) + return; + /* set message header */ inst = tc_MOV(tc, header, r0); inst->mask_ctrl = BRW_MASK_DISABLE; @@ -365,6 +394,9 @@ fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc, struct toy_dst tmp, real_dst[4]; int i; + if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx)) + return; + /* * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was * changed from OWord Block Read to ld to increase performance in the @@ -1743,8 +1775,28 @@ fs_setup(struct fs_compile_context *fcc, fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade); fs_setup_shader_out(fcc->shader, &fcc->tgsi); - /* we do not make use of push constant buffers yet */ - num_consts = 0; + if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) { + num_consts = (fcc->tgsi.const_count + 1) / 2; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 287: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to + * 64" + * + * Since we are usually under a high register pressure, do not allow + * for more than 8. + */ + if (num_consts > 8) + num_consts = 0; + } + else { + num_consts = 0; + } + + fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts); + fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8); fcc->first_const_grf = fs_setup_payloads(fcc); fcc->first_attr_grf = fcc->first_const_grf + num_consts; diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index 07e8ee2f683..8d4a6a18c6b 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -59,6 +59,8 @@ struct ilo_shader_variant { } fs; } u; + bool use_pcb; + int num_sampler_views; struct { unsigned r:3; @@ -102,6 +104,8 @@ struct ilo_shader { bool has_pos; } out; + bool skip_cbuf0_upload; + bool has_kill; bool dispatch_16; @@ -124,6 +128,7 @@ struct ilo_shader { /* what does the push constant buffer consist of? */ struct { + int cbuf0_size; int clip_state_size; } pcb; diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_vs.c b/src/gallium/drivers/ilo/shader/ilo_shader_vs.c index dc166d7cc48..b5b44b57796 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_vs.c +++ b/src/gallium/drivers/ilo/shader/ilo_shader_vs.c @@ -47,6 +47,7 @@ struct vs_compile_context { int num_grf_per_vrf; int first_const_grf; + int first_ucp_grf; int first_vue_grf; int first_free_grf; int last_free_grf; @@ -79,6 +80,27 @@ vs_lower_opcode_tgsi_in(struct vs_compile_context *vcc, } } +static bool +vs_lower_opcode_tgsi_const_pcb(struct vs_compile_context *vcc, + struct toy_dst dst, int dim, + struct toy_src idx) +{ + const int i = idx.val32; + const int grf = vcc->first_const_grf + i / 2; + const int grf_subreg = (i & 1) * 16; + struct toy_src src; + + if (!vcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM || + grf >= vcc->first_ucp_grf) + return false; + + + src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_041); + tc_MOV(&vcc->tc, dst, src); + + return true; +} + static void vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc, struct toy_dst dst, int dim, @@ -94,6 +116,9 @@ vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc, struct toy_inst *inst; struct toy_src desc; + if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx)) + return; + /* set message header */ inst = tc_MOV(tc, header, r0); inst->mask_ctrl = BRW_MASK_DISABLE; @@ -121,6 +146,9 @@ vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc, tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); struct toy_src desc; + if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx)) + return; + /* * In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was * changed from OWord Dual Block Read to ld to increase performance in the @@ -835,7 +863,7 @@ vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs) } for (j = first_ucp; j <= last_ucp; j++) { - const int plane_grf = vcc->first_const_grf + j / 2; + const int plane_grf = vcc->first_ucp_grf + j / 2; const int plane_subreg = (j & 1) * 16; const struct toy_src plane = tsrc_rect(tsrc(TOY_FILE_GRF, plane_grf, plane_subreg), TOY_RECT_041); @@ -1199,12 +1227,34 @@ vs_setup(struct vs_compile_context *vcc, vs_setup_shader_out(vcc->shader, &vcc->tgsi, (vcc->variant->u.vs.num_ucps > 0), vcc->output_map); - /* fit each pair of user clip planes into a register */ - num_consts = (vcc->variant->u.vs.num_ucps + 1) / 2; + if (vcc->variant->use_pcb && !vcc->tgsi.const_indirect) { + num_consts = (vcc->tgsi.const_count + 1) / 2; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 138: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to + * 32" + */ + if (num_consts > 32) + num_consts = 0; + } + else { + num_consts = 0; + } + + vcc->shader->skip_cbuf0_upload = (!vcc->tgsi.const_count || num_consts); + vcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8); /* r0 is reserved for payload header */ vcc->first_const_grf = 1; - vcc->first_vue_grf = vcc->first_const_grf + num_consts; + vcc->first_ucp_grf = vcc->first_const_grf + num_consts; + + /* fit each pair of user clip planes into a register */ + vcc->first_vue_grf = vcc->first_ucp_grf + + (vcc->variant->u.vs.num_ucps + 1) / 2; + vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count; vcc->last_free_grf = 127; diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c index d5a3f2fe5af..bf1e37ef584 100644 --- a/src/gallium/drivers/ilo/shader/toy_tgsi.c +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c @@ -2244,6 +2244,12 @@ parse_instruction(struct toy_tgsi *tgsi, break; } + for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *s = &tgsi_inst->Src[i]; + if (s->Register.File == TGSI_FILE_CONSTANT && s->Register.Indirect) + tgsi->const_indirect = true; + } + /* remember channels written */ for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) { const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i]; @@ -2398,8 +2404,11 @@ parse_declaration(struct toy_tgsi *tgsi, /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */ assert(!"unexpected immediate declaration"); break; - case TGSI_FILE_NULL: case TGSI_FILE_CONSTANT: + if (tgsi->const_count <= decl->Range.Last) + tgsi->const_count = decl->Range.Last + 1; + break; + case TGSI_FILE_NULL: case TGSI_FILE_TEMPORARY: case TGSI_FILE_SAMPLER: case TGSI_FILE_PREDICATE: diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.h b/src/gallium/drivers/ilo/shader/toy_tgsi.h index 95fc897b7e0..38be9f4f891 100644 --- a/src/gallium/drivers/ilo/shader/toy_tgsi.h +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.h @@ -91,6 +91,9 @@ struct toy_tgsi { } system_values[8]; int num_system_values; + int const_count; + bool const_indirect; + bool uses_kill; }; |