summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/v3d
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2019-04-11 12:28:30 -0700
committerEric Anholt <[email protected]>2019-04-12 15:59:31 -0700
commit8a2d91e1248e31426ff656c02d3e598f9e117422 (patch)
tree612f30a6394ceaeb540fb89c99548b635a916fd7 /src/gallium/drivers/v3d
parent11ba8a46e4e72e028b77519b3b90af36f4982f26 (diff)
v3d: Detect the correct number of QPUs and use it to fix the spill size.
We were missing a * 4 even if the particular hardware matched our assumption.
Diffstat (limited to 'src/gallium/drivers/v3d')
-rw-r--r--src/gallium/drivers/v3d/v3d_program.c10
-rw-r--r--src/gallium/drivers/v3d/v3d_screen.c4
2 files changed, 10 insertions, 4 deletions
diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c
index 7805b808a01..cdacb5dbb80 100644
--- a/src/gallium/drivers/v3d/v3d_program.c
+++ b/src/gallium/drivers/v3d/v3d_program.c
@@ -376,11 +376,13 @@ v3d_get_compiled_shader(struct v3d_context *v3d,
if (shader->prog_data.base->spill_size >
v3d->prog.spill_size_per_thread) {
- /* Max 4 QPUs per slice, 3 slices per core. We only do single
- * core so far. This overallocates memory on smaller cores.
+ /* The TIDX register we use for choosing the area to access
+ * for scratch space is: (core << 6) | (qpu << 2) | thread.
+ * Even at minimum threadcount in a particular shader, that
+ * means we still multiply by qpus by 4.
*/
- int total_spill_size =
- 4 * 3 * shader->prog_data.base->spill_size;
+ int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 *
+ shader->prog_data.base->spill_size);
v3d_bo_unreference(&v3d->prog.spill_bo);
v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen,
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index b77e3d9060e..6f91e35521a 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -594,6 +594,10 @@ v3d_get_device_info(struct v3d_screen *screen)
screen->devinfo.vpm_size = (ident1.value >> 28 & 0xf) * 8192;
+ int nslc = (ident1.value >> 4) & 0xf;
+ int qups = (ident1.value >> 8) & 0xf;
+ screen->devinfo.qpu_count = nslc * qups;
+
switch (screen->devinfo.ver) {
case 33:
case 41: