summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4/vc4_draw.c
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2017-02-24 12:57:03 -0800
committerEric Anholt <[email protected]>2017-02-24 17:01:29 -0800
commit292c24ddac5acc35676424f05291c101fcd47b3e (patch)
tree1cc326dc2c1dd5c8abd664dae0b4e1fcfa4bf373 /src/gallium/drivers/vc4/vc4_draw.c
parentf06915d7b71eb955cc0db4b5555f5c6474926a01 (diff)
vc4: Lazily emit our FS/VS input loads.
This reduces register pressure in both types of shaders, by reordering the input loads from the var->data.driver_location order to whatever order they appear first in the NIR shader. These instructions aren't reorderable at our QIR scheduling level because the FS takes two in lockstep to do an interpolation, and the VS takes multiple read instructions in a row to get a whole vec4-level attribute read. shader-db impact: total instructions in shared programs: 76666 -> 76590 (-0.10%) instructions in affected programs: 42945 -> 42869 (-0.18%) total max temps in shared programs: 9395 -> 9208 (-1.99%) max temps in affected programs: 2951 -> 2764 (-6.34%) Some programs get their max temps hurt, depending on the order that the load_input intrinsics appear, because we end up being unable to copy propagate an older VPM read into its only use.
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_draw.c')
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index ebd080298a4..9f3765db1af 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -170,14 +170,14 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
/* VC4_DIRTY_COMPILED_VS */
cl_u16(&shader_rec, 0); /* vs num uniforms */
cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
- cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
+ cl_u8(&shader_rec, vc4->prog.vs->vattr_total_size);
cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
/* VC4_DIRTY_COMPILED_CS */
cl_u16(&shader_rec, 0); /* cs num uniforms */
cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
- cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
+ cl_u8(&shader_rec, vc4->prog.cs->vattr_total_size);
cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */