summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorChia-I Wu <[email protected]>2013-07-18 05:58:45 +0800
committerChia-I Wu <[email protected]>2013-08-13 16:04:35 +0800
commitc6e1e0157b9bd9ec416062a21bbd30ca9b69f363 (patch)
treeae4d35cb65e3a14bec016b6008ab8d9615d50db1 /src/gallium/drivers
parent5e30ffbda6259c2bbd519c5fe092df1db1d0c94d (diff)
ilo: support push constant model in shaders
Source constants from URB constant data when the constant data can fit in the PCB.
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.c22
-rw-r--r--src/gallium/drivers/ilo/shader/ilo_shader_fs.c56
-rw-r--r--src/gallium/drivers/ilo/shader/ilo_shader_internal.h5
-rw-r--r--src/gallium/drivers/ilo/shader/ilo_shader_vs.c58
-rw-r--r--src/gallium/drivers/ilo/shader/toy_tgsi.c11
-rw-r--r--src/gallium/drivers/ilo/shader/toy_tgsi.h3
6 files changed, 143 insertions, 12 deletions
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index b1a11a1391b..0c785201d1a 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -273,6 +273,13 @@ ilo_shader_variant_init(struct ilo_shader_variant *variant,
break;
}
+ /* use PCB unless constant buffer 0 is not in user buffer */
+ if ((ilo->cbuf[info->type].enabled_mask & 0x1) &&
+ !ilo->cbuf[info->type].cso[0].user_buffer)
+ variant->use_pcb = false;
+ else
+ variant->use_pcb = true;
+
num_views = ilo->view[info->type].count;
assert(info->num_samplers <= num_views);
@@ -341,6 +348,8 @@ ilo_shader_variant_guess(struct ilo_shader_variant *variant,
break;
}
+ variant->use_pcb = true;
+
variant->num_sampler_views = info->num_samplers;
for (i = 0; i < info->num_samplers; i++) {
if (info->shadow_samplers & (1 << i)) {
@@ -747,7 +756,8 @@ ilo_shader_create_vs(const struct ilo_dev_info *dev,
/* states used in ilo_shader_variant_init() */
shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_VS |
- ILO_DIRTY_RASTERIZER;
+ ILO_DIRTY_RASTERIZER |
+ ILO_DIRTY_CBUF;
return shader;
}
@@ -764,7 +774,8 @@ ilo_shader_create_gs(const struct ilo_dev_info *dev,
/* states used in ilo_shader_variant_init() */
shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_GS |
ILO_DIRTY_VS |
- ILO_DIRTY_RASTERIZER;
+ ILO_DIRTY_RASTERIZER |
+ ILO_DIRTY_CBUF;
return shader;
}
@@ -781,7 +792,8 @@ ilo_shader_create_fs(const struct ilo_dev_info *dev,
/* states used in ilo_shader_variant_init() */
shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_FS |
ILO_DIRTY_RASTERIZER |
- ILO_DIRTY_FB;
+ ILO_DIRTY_FB |
+ ILO_DIRTY_CBUF;
return shader;
}
@@ -1061,10 +1073,10 @@ ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
val = kernel->in.start_grf;
break;
case ILO_KERNEL_SKIP_CBUF0_UPLOAD:
- val = false;
+ val = kernel->skip_cbuf0_upload;
break;
case ILO_KERNEL_PCB_CBUF0_SIZE:
- val = 0;
+ val = kernel->pcb.cbuf0_size;
break;
case ILO_KERNEL_VS_INPUT_INSTANCEID:
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c
index 36a308744c6..48d5721631c 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c
@@ -306,6 +306,32 @@ fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
}
}
+static bool
+fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc,
+ struct toy_dst dst, int dim,
+ struct toy_src idx)
+{
+ const int grf = fcc->first_const_grf + idx.val32 / 2;
+ const int grf_subreg = (idx.val32 & 1) * 16;
+ struct toy_src src;
+ struct toy_dst real_dst[4];
+ int i;
+
+ if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
+ grf >= fcc->first_attr_grf)
+ return false;
+
+ src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010);
+
+ tdst_transpose(dst, real_dst);
+ for (i = 0; i < 4; i++) {
+ /* cast to type D to make sure these are raw moves */
+ tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i)));
+ }
+
+ return true;
+}
+
static void
fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
struct toy_dst dst, int dim, struct toy_src idx)
@@ -322,6 +348,9 @@ fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
struct toy_dst tmp, real_dst[4];
int i;
+ if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
+ return;
+
/* set message header */
inst = tc_MOV(tc, header, r0);
inst->mask_ctrl = BRW_MASK_DISABLE;
@@ -365,6 +394,9 @@ fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
struct toy_dst tmp, real_dst[4];
int i;
+ if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
+ return;
+
/*
* In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
* changed from OWord Block Read to ld to increase performance in the
@@ -1743,8 +1775,28 @@ fs_setup(struct fs_compile_context *fcc,
fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade);
fs_setup_shader_out(fcc->shader, &fcc->tgsi);
- /* we do not make use of push constant buffers yet */
- num_consts = 0;
+ if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) {
+ num_consts = (fcc->tgsi.const_count + 1) / 2;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 287:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to
+ * 64"
+ *
+ * Since we are usually under a high register pressure, do not allow
+ * for more than 8.
+ */
+ if (num_consts > 8)
+ num_consts = 0;
+ }
+ else {
+ num_consts = 0;
+ }
+
+ fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts);
+ fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
fcc->first_const_grf = fs_setup_payloads(fcc);
fcc->first_attr_grf = fcc->first_const_grf + num_consts;
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
index 07e8ee2f683..8d4a6a18c6b 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
@@ -59,6 +59,8 @@ struct ilo_shader_variant {
} fs;
} u;
+ bool use_pcb;
+
int num_sampler_views;
struct {
unsigned r:3;
@@ -102,6 +104,8 @@ struct ilo_shader {
bool has_pos;
} out;
+ bool skip_cbuf0_upload;
+
bool has_kill;
bool dispatch_16;
@@ -124,6 +128,7 @@ struct ilo_shader {
/* what does the push constant buffer consist of? */
struct {
+ int cbuf0_size;
int clip_state_size;
} pcb;
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_vs.c b/src/gallium/drivers/ilo/shader/ilo_shader_vs.c
index dc166d7cc48..b5b44b57796 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_vs.c
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_vs.c
@@ -47,6 +47,7 @@ struct vs_compile_context {
int num_grf_per_vrf;
int first_const_grf;
+ int first_ucp_grf;
int first_vue_grf;
int first_free_grf;
int last_free_grf;
@@ -79,6 +80,27 @@ vs_lower_opcode_tgsi_in(struct vs_compile_context *vcc,
}
}
+static bool
+vs_lower_opcode_tgsi_const_pcb(struct vs_compile_context *vcc,
+ struct toy_dst dst, int dim,
+ struct toy_src idx)
+{
+ const int i = idx.val32;
+ const int grf = vcc->first_const_grf + i / 2;
+ const int grf_subreg = (i & 1) * 16;
+ struct toy_src src;
+
+ if (!vcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
+ grf >= vcc->first_ucp_grf)
+ return false;
+
+
+ src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_041);
+ tc_MOV(&vcc->tc, dst, src);
+
+ return true;
+}
+
static void
vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc,
struct toy_dst dst, int dim,
@@ -94,6 +116,9 @@ vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc,
struct toy_inst *inst;
struct toy_src desc;
+ if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx))
+ return;
+
/* set message header */
inst = tc_MOV(tc, header, r0);
inst->mask_ctrl = BRW_MASK_DISABLE;
@@ -121,6 +146,9 @@ vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc,
tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
struct toy_src desc;
+ if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx))
+ return;
+
/*
* In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was
* changed from OWord Dual Block Read to ld to increase performance in the
@@ -835,7 +863,7 @@ vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs)
}
for (j = first_ucp; j <= last_ucp; j++) {
- const int plane_grf = vcc->first_const_grf + j / 2;
+ const int plane_grf = vcc->first_ucp_grf + j / 2;
const int plane_subreg = (j & 1) * 16;
const struct toy_src plane = tsrc_rect(tsrc(TOY_FILE_GRF,
plane_grf, plane_subreg), TOY_RECT_041);
@@ -1199,12 +1227,34 @@ vs_setup(struct vs_compile_context *vcc,
vs_setup_shader_out(vcc->shader, &vcc->tgsi,
(vcc->variant->u.vs.num_ucps > 0), vcc->output_map);
- /* fit each pair of user clip planes into a register */
- num_consts = (vcc->variant->u.vs.num_ucps + 1) / 2;
+ if (vcc->variant->use_pcb && !vcc->tgsi.const_indirect) {
+ num_consts = (vcc->tgsi.const_count + 1) / 2;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 138:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to
+ * 32"
+ */
+ if (num_consts > 32)
+ num_consts = 0;
+ }
+ else {
+ num_consts = 0;
+ }
+
+ vcc->shader->skip_cbuf0_upload = (!vcc->tgsi.const_count || num_consts);
+ vcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
/* r0 is reserved for payload header */
vcc->first_const_grf = 1;
- vcc->first_vue_grf = vcc->first_const_grf + num_consts;
+ vcc->first_ucp_grf = vcc->first_const_grf + num_consts;
+
+ /* fit each pair of user clip planes into a register */
+ vcc->first_vue_grf = vcc->first_ucp_grf +
+ (vcc->variant->u.vs.num_ucps + 1) / 2;
+
vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count;
vcc->last_free_grf = 127;
diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c
index d5a3f2fe5af..bf1e37ef584 100644
--- a/src/gallium/drivers/ilo/shader/toy_tgsi.c
+++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c
@@ -2244,6 +2244,12 @@ parse_instruction(struct toy_tgsi *tgsi,
break;
}
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
+ if (s->Register.File == TGSI_FILE_CONSTANT && s->Register.Indirect)
+ tgsi->const_indirect = true;
+ }
+
/* remember channels written */
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
@@ -2398,8 +2404,11 @@ parse_declaration(struct toy_tgsi *tgsi,
/* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
assert(!"unexpected immediate declaration");
break;
- case TGSI_FILE_NULL:
case TGSI_FILE_CONSTANT:
+ if (tgsi->const_count <= decl->Range.Last)
+ tgsi->const_count = decl->Range.Last + 1;
+ break;
+ case TGSI_FILE_NULL:
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_SAMPLER:
case TGSI_FILE_PREDICATE:
diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.h b/src/gallium/drivers/ilo/shader/toy_tgsi.h
index 95fc897b7e0..38be9f4f891 100644
--- a/src/gallium/drivers/ilo/shader/toy_tgsi.h
+++ b/src/gallium/drivers/ilo/shader/toy_tgsi.h
@@ -91,6 +91,9 @@ struct toy_tgsi {
} system_values[8];
int num_system_values;
+ int const_count;
+ bool const_indirect;
+
bool uses_kill;
};