diff options
author | Eric Anholt <[email protected]> | 2019-04-11 12:28:30 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2019-04-12 15:59:31 -0700 |
commit | 8a2d91e1248e31426ff656c02d3e598f9e117422 (patch) | |
tree | 612f30a6394ceaeb540fb89c99548b635a916fd7 /src/gallium/drivers/v3d | |
parent | 11ba8a46e4e72e028b77519b3b90af36f4982f26 (diff) |
v3d: Detect the correct number of QPUs and use it to fix the spill size.
We were missing a * 4 even if the particular hardware matched our
assumption.
Diffstat (limited to 'src/gallium/drivers/v3d')
-rw-r--r-- | src/gallium/drivers/v3d/v3d_program.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/v3d/v3d_screen.c | 4 |
2 files changed, 10 insertions, 4 deletions
diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c index 7805b808a01..cdacb5dbb80 100644 --- a/src/gallium/drivers/v3d/v3d_program.c +++ b/src/gallium/drivers/v3d/v3d_program.c @@ -376,11 +376,13 @@ v3d_get_compiled_shader(struct v3d_context *v3d, if (shader->prog_data.base->spill_size > v3d->prog.spill_size_per_thread) { - /* Max 4 QPUs per slice, 3 slices per core. We only do single - * core so far. This overallocates memory on smaller cores. + /* The TIDX register we use for choosing the area to access + * for scratch space is: (core << 6) | (qpu << 2) | thread. + * Even at minimum threadcount in a particular shader, that + * means we still multiply by qpus by 4. */ - int total_spill_size = - 4 * 3 * shader->prog_data.base->spill_size; + int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 * + shader->prog_data.base->spill_size); v3d_bo_unreference(&v3d->prog.spill_bo); v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen, diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index b77e3d9060e..6f91e35521a 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -594,6 +594,10 @@ v3d_get_device_info(struct v3d_screen *screen) screen->devinfo.vpm_size = (ident1.value >> 28 & 0xf) * 8192; + int nslc = (ident1.value >> 4) & 0xf; + int qups = (ident1.value >> 8) & 0xf; + screen->devinfo.qpu_count = nslc * qups; + switch (screen->devinfo.ver) { case 33: case 41: |