diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 35 |
1 files changed, 34 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index a55f024c843..8a8705f153d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2976,7 +2976,40 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, num_records = size / stride; num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); - if (screen->b.chip_class == VI) + /* The NUM_RECORDS field has a different meaning depending on the chip, + * instruction type, STRIDE, and SWIZZLE_ENABLE. + * + * SI-CIK: + * - If STRIDE == 0, it's in byte units. + * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. + * + * VI: + * - For SMEM and STRIDE == 0, it's in byte units. + * - For SMEM and STRIDE != 0, it's in units of STRIDE. + * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. + * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE. + * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_- + * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when + * using SMEM. This can be done in the shader by clearing STRIDE with s_and. + * That way the same descriptor can be used by both SMEM and VMEM. + * + * GFX9: + * - For SMEM and STRIDE == 0, it's in byte units. + * - For SMEM and STRIDE != 0, it's in units of STRIDE. + * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. + * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. + */ + if (screen->b.chip_class >= GFX9) + /* When vindex == 0, LLVM sets IDXEN = 0, thus changing units + * from STRIDE to bytes. This works around it by setting + * NUM_RECORDS to at least the size of one element, so that + * the first element is readable when IDXEN == 0. + * + * TODO: Fix this in LLVM, but do we need a new intrinsic where + * IDXEN is enforced? + */ + num_records = num_records ? MAX2(num_records, stride) : 0; + else if (screen->b.chip_class == VI) num_records *= stride; state[4] = 0; |