summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <[email protected]>2019-12-12 11:28:08 -0500
committerMarge Bot <[email protected]>2019-12-16 19:48:28 +0000
commit6378797a6d1ce3652d0394beeb1af33af7426ed2 (patch)
tree7d4c8225c084a3d2adf65888dbee5abb8a87e761 /src
parenta649bbffee579b306a2d12f252d0f3230b2402b5 (diff)
panfrost: Pack invocation_shifts manually instead of a bit field
gcc generates exceptionally bad code for panfrost_pack_work_groups_fused otherwise ... although that routine is somehow still hot ... Signed-off-by: Alyssa Rosenzweig <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3067>
Diffstat (limited to 'src')
-rw-r--r--src/panfrost/encoder/pan_invocation.c38
-rw-r--r--src/panfrost/include/panfrost-job.h17
-rw-r--r--src/panfrost/pandecode/decode.c47
3 files changed, 51 insertions, 51 deletions
diff --git a/src/panfrost/encoder/pan_invocation.c b/src/panfrost/encoder/pan_invocation.c
index 8fb1669c9ed..ecde3da4648 100644
--- a/src/panfrost/encoder/pan_invocation.c
+++ b/src/panfrost/encoder/pan_invocation.c
@@ -91,33 +91,38 @@ panfrost_pack_work_groups_compute(
shifts[i + 1] = shifts[i] + bit_count;
}
- /* We're packed, so upload everything */
- out->invocation_count = packed;
- out->size_y_shift = shifts[1];
- out->size_z_shift = shifts[2];
- out->workgroups_x_shift = shifts[3];
- out->workgroups_y_shift = shifts[4];
- out->workgroups_z_shift = shifts[5];
-
/* Quirk: for non-instanced graphics, the blob sets workgroups_z_shift
* = 32. This doesn't appear to matter to the hardware, but it's good
* to be bit-identical. */
if (quirk_graphics && (num_z <= 1))
- out->workgroups_z_shift = 32;
+ shifts[5] = 32;
/* Quirk: for graphics, workgroups_x_shift_2 must be at least 2,
* whereas for OpenCL it is simply equal to workgroups_x_shift. For GL
* compute, it seems it might *always* be 2, but this is suspicious and
* needs further investigation. (I'm probably just using GL wrong). */
+ unsigned shift_2 = shifts[3];
+
if (quirk_graphics)
- out->workgroups_x_shift_2 = MAX2(out->workgroups_x_shift, 2);
- else
- out->workgroups_x_shift_2 = out->workgroups_x_shift;
+ shift_2 = MAX2(shift_2, 2);
+
+ /* Pack them in */
+ uint32_t packed_shifts =
+ (shifts[1] << 0) |
+ (shifts[2] << 5) |
+ (shifts[3] << 10) |
+ (shifts[4] << 16) |
+ (shifts[5] << 22) |
+ (shift_2 << 28);
+
+ /* Upload the packed bitfields */
+ out->invocation_count = packed;
+ out->invocation_shifts = packed_shifts;
/* TODO: Compute workgroups_x_shift_3 */
- out->workgroups_x_shift_3 = out->workgroups_x_shift_2;
+ out->workgroups_x_shift_3 = shift_2;
}
/* Packs vertex/tiler descriptors simultaneously */
@@ -136,12 +141,7 @@ panfrost_pack_work_groups_fused(
/* Copy results over */
tiler->invocation_count = vertex->invocation_count;
- tiler->size_y_shift = vertex->size_y_shift;
- tiler->size_z_shift = vertex->size_z_shift;
- tiler->workgroups_x_shift = vertex->workgroups_x_shift;
- tiler->workgroups_x_shift_2 = vertex->workgroups_x_shift_2;
- tiler->workgroups_y_shift = vertex->workgroups_y_shift;
- tiler->workgroups_z_shift = vertex->workgroups_z_shift;
+ tiler->invocation_shifts = vertex->invocation_shifts;
/* Set special fields for each */
vertex->workgroups_x_shift_3 = 5;
diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h
index 15c22d8fa09..7bf23b09836 100644
--- a/src/panfrost/include/panfrost-job.h
+++ b/src/panfrost/include/panfrost-job.h
@@ -937,13 +937,16 @@ struct mali_vertex_tiler_prefix {
*/
u32 invocation_count;
- u32 size_y_shift : 5;
- u32 size_z_shift : 5;
- u32 workgroups_x_shift : 6;
- u32 workgroups_y_shift : 6;
- u32 workgroups_z_shift : 6;
- /* This is max(workgroups_x_shift, 2) in all the cases I've seen. */
- u32 workgroups_x_shift_2 : 4;
+ /* Bitfield for shifts:
+ *
+ * size_y_shift : 5
+ * size_z_shift : 5
+ * workgroups_x_shift : 6
+ * workgroups_y_shift : 6
+ * workgroups_z_shift : 6
+ * workgroups_x_shift_2 : 4
+ */
+ u32 invocation_shifts;
u32 draw_mode : 4;
u32 unknown_draw : 22;
diff --git a/src/panfrost/pandecode/decode.c b/src/panfrost/pandecode/decode.c
index b24ce3f65d1..0b25e354735 100644
--- a/src/panfrost/pandecode/decode.c
+++ b/src/panfrost/pandecode/decode.c
@@ -1674,13 +1674,20 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bo
* invocation_count for an explanation.
*/
- unsigned size_x = bits(p->invocation_count, 0, p->size_y_shift) + 1;
- unsigned size_y = bits(p->invocation_count, p->size_y_shift, p->size_z_shift) + 1;
- unsigned size_z = bits(p->invocation_count, p->size_z_shift, p->workgroups_x_shift) + 1;
+ unsigned size_y_shift = bits(p->invocation_shifts, 0, 5);
+ unsigned size_z_shift = bits(p->invocation_shifts, 5, 10);
+ unsigned workgroups_x_shift = bits(p->invocation_shifts, 10, 16);
+ unsigned workgroups_y_shift = bits(p->invocation_shifts, 16, 22);
+ unsigned workgroups_z_shift = bits(p->invocation_shifts, 22, 28);
+ unsigned workgroups_x_shift_2 = bits(p->invocation_shifts, 28, 32);
- unsigned groups_x = bits(p->invocation_count, p->workgroups_x_shift, p->workgroups_y_shift) + 1;
- unsigned groups_y = bits(p->invocation_count, p->workgroups_y_shift, p->workgroups_z_shift) + 1;
- unsigned groups_z = bits(p->invocation_count, p->workgroups_z_shift, 32) + 1;
+ unsigned size_x = bits(p->invocation_count, 0, size_y_shift) + 1;
+ unsigned size_y = bits(p->invocation_count, size_y_shift, size_z_shift) + 1;
+ unsigned size_z = bits(p->invocation_count, size_z_shift, workgroups_x_shift) + 1;
+
+ unsigned groups_x = bits(p->invocation_count, workgroups_x_shift, workgroups_y_shift) + 1;
+ unsigned groups_y = bits(p->invocation_count, workgroups_y_shift, workgroups_z_shift) + 1;
+ unsigned groups_z = bits(p->invocation_count, workgroups_z_shift, 32) + 1;
/* Even though we have this decoded, we want to ensure that the
* representation is "unique" so we don't lose anything by printing only
@@ -1695,31 +1702,21 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bo
bool canonical =
(p->invocation_count == ref.invocation_count) &&
- (p->size_y_shift == ref.size_y_shift) &&
- (p->size_z_shift == ref.size_z_shift) &&
- (p->workgroups_x_shift == ref.workgroups_x_shift) &&
- (p->workgroups_y_shift == ref.workgroups_y_shift) &&
- (p->workgroups_z_shift == ref.workgroups_z_shift) &&
- (p->workgroups_x_shift_2 == ref.workgroups_x_shift_2);
+ (p->invocation_shifts == ref.invocation_shifts);
if (!canonical) {
pandecode_msg("XXX: non-canonical workgroups packing\n");
- pandecode_msg("expected: %X, %d, %d, %d, %d, %d, %d\n",
+ pandecode_msg("expected: %X, %X",
ref.invocation_count,
- ref.size_y_shift,
- ref.size_z_shift,
- ref.workgroups_x_shift,
- ref.workgroups_y_shift,
- ref.workgroups_z_shift,
- ref.workgroups_x_shift_2);
+ ref.invocation_shifts);
pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
- pandecode_prop("size_y_shift = %d", p->size_y_shift);
- pandecode_prop("size_z_shift = %d", p->size_z_shift);
- pandecode_prop("workgroups_x_shift = %d", p->workgroups_x_shift);
- pandecode_prop("workgroups_y_shift = %d", p->workgroups_y_shift);
- pandecode_prop("workgroups_z_shift = %d", p->workgroups_z_shift);
- pandecode_prop("workgroups_x_shift_2 = %d", p->workgroups_x_shift_2);
+ pandecode_prop("size_y_shift = %d", size_y_shift);
+ pandecode_prop("size_z_shift = %d", size_z_shift);
+ pandecode_prop("workgroups_x_shift = %d", workgroups_x_shift);
+ pandecode_prop("workgroups_y_shift = %d", workgroups_y_shift);
+ pandecode_prop("workgroups_z_shift = %d", workgroups_z_shift);
+ pandecode_prop("workgroups_x_shift_2 = %d", workgroups_x_shift_2);
}
/* Regardless, print the decode */