diff options
author | Francisco Jerez <[email protected]> | 2016-05-19 21:43:48 -0700 |
---|---|---|
committer | Francisco Jerez <[email protected]> | 2016-05-27 23:22:10 -0700 |
commit | 0b4cd91071fdf9802559974aa9fd32ac4bbd7439 (patch) | |
tree | 6630793ba4d9cf2a7877f238a45a5826e875b1a8 /src | |
parent | dabaf4fb9664a431014387cece356d5b64caf9b6 (diff) |
i965/fs: Extend region width calculation to allow arbitrary execution sizes.
Instead of just halving the execution size when the instruction is
compressed hoping that it will give a legal source region width, we
can calculate the maximum legal width value in closed form from the
component size and stride. This makes sure that brw_reg_from_fs_reg()
always returns a valid hardware region even for virtual 32-wide
instructions (e.g. send-like instructions) that would seem to exceed
the hardware region width limit after halving.
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 39 |
1 files changed, 23 insertions, 16 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 67c7aa5aa2f..85b24bea107 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -65,27 +65,34 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen, bool compressed) case VGRF: if (reg->stride == 0) { brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0); - } else if (!compressed && - inst->exec_size * reg->stride * type_sz(reg->type) <= 32) { - brw_reg = brw_vecn_reg(inst->exec_size, brw_file_from_reg(reg), - reg->nr, 0); - brw_reg = stride(brw_reg, inst->exec_size * reg->stride, - inst->exec_size, reg->stride); } else { /* From the Haswell PRM: * - * VertStride must be used to cross GRF register boundaries. This - * rule implies that elements within a 'Width' cannot cross GRF - * boundaries. + * "VertStride must be used to cross GRF register boundaries. This + * rule implies that elements within a 'Width' cannot cross GRF + * boundaries." * - * So, for registers that are large enough, we have to split the exec - * size in two and trust the compression state to sort it out. + * The maximum width value that could satisfy this restriction is: */ - assert(inst->exec_size / 2 * reg->stride * type_sz(reg->type) <= 32); - brw_reg = brw_vecn_reg(inst->exec_size / 2, brw_file_from_reg(reg), - reg->nr, 0); - brw_reg = stride(brw_reg, inst->exec_size / 2 * reg->stride, - inst->exec_size / 2, reg->stride); + const unsigned reg_width = REG_SIZE / (reg->stride * type_sz(reg->type)); + + /* Because the hardware can only split source regions at a whole + * multiple of width during decompression (i.e. vertically), clamp + * the value obtained above to the physical execution size of a + * single decompressed chunk of the instruction: + */ + const unsigned phys_width = compressed ? inst->exec_size / 2 : + inst->exec_size; + + /* XXX - The equation above is strictly speaking not correct on + * hardware that supports unbalanced GRF writes -- On Gen9+ + * each decompressed chunk of the instruction may have a + * different execution size when the number of components + * written to each destination GRF is not the same. + */ + const unsigned width = MIN2(reg_width, phys_width); + brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0); + brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride); } brw_reg = retype(brw_reg, reg->type); |