summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2018-02-28 17:33:29 -0500
committerRob Clark <[email protected]>2018-03-05 08:05:33 -0500
commit175d1b437263e5eab0e47378fbeca3dd71002cd0 (patch)
tree654c6e903192374e9621f4d04f0ac00eccc5837b
parent9a62536108514434c22ec189ecf273eb09fc7d77 (diff)
freedreno/ir3: fix fixing-up register footprint
It isn't just vertex shaders that need to fixup reg footprint for inputs populated before shader starts. This problem showed up with compute shaders. If you have (for example) a localregid sysval, but only the .x component is used, the hw still writes the .yz components, which could overflow into other threads causing corruption. Showed up in cl cts 'basic/test_basic intmath_int'. But in theory the same problem could crop up elsewhere. Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c43
2 files changed, 27 insertions, 18 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 1c8d836a87f..8644bc19218 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -3418,7 +3418,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
so->varying_in++;
so->inputs[i].compmask = (1 << maxcomp) - 1;
inloc += maxcomp;
- } else {
+ } else if (!so->inputs[i].sysval){
so->inputs[i].compmask = compmask;
}
so->inputs[i].regid = regid;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 3d6cae9f80e..555c654374e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -70,26 +70,35 @@ delete_variant(struct ir3_shader_variant *v)
static void
fixup_regfootprint(struct ir3_shader_variant *v)
{
- if (v->type == SHADER_VERTEX) {
- unsigned i;
- for (i = 0; i < v->inputs_count; i++) {
- /* skip frag inputs fetch via bary.f since their reg's are
- * not written by gpu before shader starts (and in fact the
- * regid's might not even be valid)
- */
- if (v->inputs[i].bary)
- continue;
+ unsigned i;
- if (v->inputs[i].compmask) {
- int32_t regid = (v->inputs[i].regid + 3) >> 2;
- v->info.max_reg = MAX2(v->info.max_reg, regid);
- }
- }
- for (i = 0; i < v->outputs_count; i++) {
- int32_t regid = (v->outputs[i].regid + 3) >> 2;
+ for (i = 0; i < v->inputs_count; i++) {
+ /* skip frag inputs fetch via bary.f since their reg's are
+ * not written by gpu before shader starts (and in fact the
+ * regid's might not even be valid)
+ */
+ if (v->inputs[i].bary)
+ continue;
+
+ /* ignore high regs that are global to all threads in a warp
+ * (they exist by default) (a5xx+)
+ */
+ if (v->inputs[i].regid >= regid(48,0))
+ continue;
+
+ if (v->inputs[i].compmask) {
+ unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
+ int32_t regid = (v->inputs[i].regid + n) >> 2;
v->info.max_reg = MAX2(v->info.max_reg, regid);
}
- } else if (v->type == SHADER_FRAGMENT) {
+ }
+
+ for (i = 0; i < v->outputs_count; i++) {
+ int32_t regid = (v->outputs[i].regid + 3) >> 2;
+ v->info.max_reg = MAX2(v->info.max_reg, regid);
+ }
+
+ if (v->type == SHADER_FRAGMENT) {
/* NOTE: not sure how to turn pos_regid off.. but this could
* be, for example, r1.x while max reg used by the shader is
* r0.*, in which case we need to fixup the reg footprint: