diff options
author | Rob Clark <[email protected]> | 2018-02-28 17:33:29 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2018-03-05 08:05:33 -0500 |
commit | 175d1b437263e5eab0e47378fbeca3dd71002cd0 (patch) | |
tree | 654c6e903192374e9621f4d04f0ac00eccc5837b | |
parent | 9a62536108514434c22ec189ecf273eb09fc7d77 (diff) |
freedreno/ir3: fix fixing-up register footprint
It isn't just vertex shaders that need to fixup reg footprint for inputs
populated before shader starts.
This problem showed up with compute shaders. If you have (for example)
a localregid sysval, but only the .x component is used, the hw still
writes the .yz components, which could overflow into other threads
causing corruption. Showed up in cl cts 'basic/test_basic intmath_int'.
But in theory the same problem could crop up elsewhere.
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.c | 43 |
2 files changed, 27 insertions, 18 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 1c8d836a87f..8644bc19218 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -3418,7 +3418,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, so->varying_in++; so->inputs[i].compmask = (1 << maxcomp) - 1; inloc += maxcomp; - } else { + } else if (!so->inputs[i].sysval){ so->inputs[i].compmask = compmask; } so->inputs[i].regid = regid; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 3d6cae9f80e..555c654374e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -70,26 +70,35 @@ delete_variant(struct ir3_shader_variant *v) static void fixup_regfootprint(struct ir3_shader_variant *v) { - if (v->type == SHADER_VERTEX) { - unsigned i; - for (i = 0; i < v->inputs_count; i++) { - /* skip frag inputs fetch via bary.f since their reg's are - * not written by gpu before shader starts (and in fact the - * regid's might not even be valid) - */ - if (v->inputs[i].bary) - continue; + unsigned i; - if (v->inputs[i].compmask) { - int32_t regid = (v->inputs[i].regid + 3) >> 2; - v->info.max_reg = MAX2(v->info.max_reg, regid); - } - } - for (i = 0; i < v->outputs_count; i++) { - int32_t regid = (v->outputs[i].regid + 3) >> 2; + for (i = 0; i < v->inputs_count; i++) { + /* skip frag inputs fetch via bary.f since their reg's are + * not written by gpu before shader starts (and in fact the + * regid's might not even be valid) + */ + if (v->inputs[i].bary) + continue; + + /* ignore high regs that are global to all threads in a warp + * (they exist by default) (a5xx+) + */ + if (v->inputs[i].regid >= regid(48,0)) + continue; + + if (v->inputs[i].compmask) { + unsigned n = util_last_bit(v->inputs[i].compmask) - 1; + int32_t regid = (v->inputs[i].regid + n) >> 2; v->info.max_reg = MAX2(v->info.max_reg, regid); } - } else if (v->type == SHADER_FRAGMENT) { + } + + for (i = 0; i < v->outputs_count; i++) { + int32_t regid = (v->outputs[i].regid + 3) >> 2; + v->info.max_reg = MAX2(v->info.max_reg, regid); + } + + if (v->type == SHADER_FRAGMENT) { /* NOTE: not sure how to turn pos_regid off.. but this could * be, for example, r1.x while max reg used by the shader is * r0.*, in which case we need to fixup the reg footprint: |