aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/panfrost/pan_compute.c14
-rw-r--r--src/gallium/drivers/panfrost/pan_mfbd.c12
-rw-r--r--src/gallium/drivers/panfrost/pan_sfbd.c6
-rw-r--r--src/panfrost/include/panfrost-job.h53
-rw-r--r--src/panfrost/pandecode/decode.c107
5 files changed, 96 insertions, 96 deletions
diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c
index 56bac7a8523..33618cb6997 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -127,20 +127,12 @@ panfrost_launch_grid(struct pipe_context *pipe,
panfrost_emit_for_draw(ctx, false);
- /* Compute jobs have a "compute FBD". It's not a real framebuffer
- * descriptor - there is no framebuffer - but it takes the place of
- * one. As far as I can tell, it's actually the beginning of a
- * single-render-target framebuffer descriptor with almost everything
- * zeroed out.
- */
- struct mali_compute_fbd compute_fbd = {
- .unknown1 = {
- 0, 0x1F, 0, 0, 0, 0, 0, 0
- }
+ struct mali_shared_memory shared = {
+ .shared_workgroup_count = ~0
};
payload->postfix.framebuffer =
- panfrost_upload_transient(batch, &compute_fbd, sizeof(compute_fbd));
+ panfrost_upload_transient(batch, &shared, sizeof(shared));
/* Invoke according to the grid info */
diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c
index fe427c452b0..3e0f5cbd275 100644
--- a/src/gallium/drivers/panfrost/pan_mfbd.c
+++ b/src/gallium/drivers/panfrost/pan_mfbd.c
@@ -380,12 +380,14 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
.rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs),
.rt_count_2 = 4,
- .unknown2 = 0x1f,
.tiler = panfrost_emit_midg_tiler(batch, vertex_count),
-
- .stack_shift = shift,
- .unk0 = 0x1e,
- .scratchpad = panfrost_batch_get_scratchpad(batch, shift, screen->thread_tls_alloc, screen->core_count)->gpu
+
+ .shared_memory = {
+ .unk0 = 0x1e,
+ .stack_shift = shift,
+ .scratchpad = panfrost_batch_get_scratchpad(batch, shift, screen->thread_tls_alloc, screen->core_count)->gpu,
+ .shared_workgroup_count = ~0,
+ }
};
return framebuffer;
diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c
index 97d00651076..a4d29adc460 100644
--- a/src/gallium/drivers/panfrost/pan_sfbd.c
+++ b/src/gallium/drivers/panfrost/pan_sfbd.c
@@ -213,12 +213,14 @@ panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count)
struct mali_single_framebuffer framebuffer = {
.width = MALI_POSITIVE(width),
.height = MALI_POSITIVE(height),
- .unknown2 = 0x1f,
+ .shared_memory = {
+ .shared_workgroup_count = ~0,
+ .scratchpad = panfrost_batch_get_scratchpad(batch, shift, screen->thread_tls_alloc, screen->core_count)->gpu,
+ },
.format = {
.unk3 = 0x3,
},
.clear_flags = 0x1000,
- .scratchpad = panfrost_batch_get_scratchpad(batch, shift, screen->thread_tls_alloc, screen->core_count)->gpu,
.tiler = panfrost_emit_midg_tiler(batch, vertex_count),
};
diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h
index 1a59f4c77c8..8027abebf34 100644
--- a/src/panfrost/include/panfrost-job.h
+++ b/src/panfrost/include/panfrost-job.h
@@ -1470,14 +1470,41 @@ struct mali_sfbd_format {
unsigned unk3 : 4;
};
-struct mali_single_framebuffer {
- u32 unknown1;
- u32 unknown2;
+/* Shared structure at the start of framebuffer descriptors, or used bare for
+ * compute jobs, configuring stack and shared memory */
+
+struct mali_shared_memory {
+ u32 stack_shift : 4;
+ u32 unk0 : 28;
+
+ /* Configuration for shared memory for compute shaders.
+ * shared_workgroup_count is logarithmic and may be computed for a
+ * compute shader using shared memory as:
+ *
+ * shared_workgroup_count = MAX2(ceil(log2(count_x)) + ... + ceil(log2(count_z), 10)
+ *
+ * For compute shaders that don't use shared memory, or non-compute
+ * shaders, this is set to ~0
+ */
+
+ u32 shared_workgroup_count : 5;
+ u32 shared_unk1 : 3;
+ u32 shared_shift : 4;
+ u32 shared_zero : 20;
+
mali_ptr scratchpad;
- u64 zero1;
- u64 zero0;
+ /* For compute shaders, the RAM backing of workgroup-shared memory. For
+ * fragment shaders on Bifrost, apparently multisampling locations */
+
+ mali_ptr shared_memory;
+ mali_ptr unknown1;
+} __attribute__((packed));
+
+
+struct mali_single_framebuffer {
+ struct mali_shared_memory shared_memory;
struct mali_sfbd_format format;
u32 clear_flags;
@@ -1540,13 +1567,6 @@ struct mali_single_framebuffer {
/* More below this, maybe */
} __attribute__((packed));
-/* On Midgard, this "framebuffer descriptor" is used for the framebuffer field
- * of compute jobs. Superficially resembles a single framebuffer descriptor */
-
-struct mali_compute_fbd {
- u32 unknown1[8];
-} __attribute__((packed));
-
/* Format bits for the render target flags */
#define MALI_MFBD_FORMAT_MSAA (1 << 1)
@@ -1675,15 +1695,8 @@ struct bifrost_fb_extra {
#define MALI_MFBD_EXTRA (1 << 13)
struct bifrost_framebuffer {
- u32 stack_shift : 4;
- u32 unk0 : 28;
+ struct mali_shared_memory shared_memory;
- u32 unknown2; // = 0x1f, same as SFBD
- mali_ptr scratchpad;
-
- /* 0x10 */
- mali_ptr sample_locations;
- mali_ptr unknown1;
/* 0x20 */
u16 width1, height1;
u32 zero3;
diff --git a/src/panfrost/pandecode/decode.c b/src/panfrost/pandecode/decode.c
index dc755fcc364..41b843f7a3b 100644
--- a/src/panfrost/pandecode/decode.c
+++ b/src/panfrost/pandecode/decode.c
@@ -666,6 +666,41 @@ pandecode_sfbd_format(struct mali_sfbd_format format)
pandecode_log("},\n");
}
+static void
+pandecode_shared_memory(const struct mali_shared_memory *desc, bool is_compute)
+{
+ pandecode_prop("stack_shift = 0x%x", desc->stack_shift);
+
+ if (desc->unk0)
+ pandecode_prop("unk0 = 0x%x", desc->unk0);
+
+ if (desc->shared_workgroup_count != 0x1F) {
+ pandecode_prop("shared_workgroup_count = %d", desc->shared_workgroup_count);
+ if (!is_compute)
+ pandecode_msg("XXX: wrong workgroup count for noncompute\n");
+ }
+
+ if (desc->shared_unk1 || desc->shared_shift) {
+ pandecode_prop("shared_unk1 = %X", desc->shared_unk1);
+ pandecode_prop("shared_shift = %X", desc->shared_shift);
+
+ if (!is_compute)
+ pandecode_msg("XXX: shared memory configured in noncompute shader");
+ }
+
+ if (desc->shared_zero) {
+ pandecode_msg("XXX: shared memory zero tripped\n");
+ pandecode_prop("shared_zero = 0x%" PRIx32, desc->shared_zero);
+ }
+
+ if (desc->shared_memory && !is_compute)
+ pandecode_msg("XXX: shared memory used in noncompute shader\n");
+
+ MEMORY_PROP(desc, scratchpad);
+ MEMORY_PROP(desc, shared_memory);
+ MEMORY_PROP(desc, unknown1);
+}
+
static struct pandecode_fbd
pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
{
@@ -680,8 +715,11 @@ pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
pandecode_log("struct mali_single_framebuffer framebuffer_%"PRIx64"_%d = {\n", gpu_va, job_no);
pandecode_indent++;
- pandecode_prop("unknown1 = 0x%" PRIx32, s->unknown1);
- pandecode_prop("unknown2 = 0x%" PRIx32, s->unknown2);
+ pandecode_log(".shared_memory = {\n");
+ pandecode_indent++;
+ pandecode_shared_memory(&s->shared_memory, false);
+ pandecode_indent--;
+ pandecode_log("},\n");
pandecode_sfbd_format(s->format);
@@ -748,7 +786,6 @@ pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
pandecode_prop("clear_stencil = 0x%x", s->clear_stencil);
}
- MEMORY_PROP(s, scratchpad);
const struct midgard_tiler_descriptor t = s->tiler;
bool has_hierarchy = !(gpu_id == 0x0720 || gpu_id == 0x0820 || gpu_id == 0x0830);
@@ -757,8 +794,6 @@ pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
pandecode_indent--;
pandecode_log("};\n");
- pandecode_prop("zero0 = 0x%" PRIx64, s->zero0);
- pandecode_prop("zero1 = 0x%" PRIx64, s->zero1);
pandecode_prop("zero2 = 0x%" PRIx32, s->zero2);
pandecode_prop("zero4 = 0x%" PRIx32, s->zero4);
pandecode_prop("zero5 = 0x%" PRIx32, s->zero5);
@@ -784,20 +819,13 @@ static void
pandecode_compute_fbd(uint64_t gpu_va, int job_no)
{
struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
- const struct mali_compute_fbd *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
+ const struct mali_shared_memory *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
- pandecode_log("struct mali_compute_fbd framebuffer_%"PRIx64"_%d = {\n", gpu_va, job_no);
+ pandecode_log("struct mali_shared_memory shared_%"PRIx64"_%d = {\n", gpu_va, job_no);
pandecode_indent++;
-
- pandecode_log(".unknown1 = {");
-
- for (int i = 0; i < ARRAY_SIZE(s->unknown1); ++i)
- pandecode_log_cont("%X, ", s->unknown1[i]);
-
- pandecode_log("},\n");
-
+ pandecode_shared_memory(s, true);
pandecode_indent--;
- pandecode_log_cont("},\n");
+ pandecode_log("},\n");
}
/* Extracts the number of components associated with a Mali format */
@@ -1034,45 +1062,14 @@ pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, bool is_comput
struct pandecode_fbd info;
- if (fb->sample_locations) {
- /* The blob stores all possible sample locations in a single buffer
- * allocated on startup, and just switches the pointer when switching
- * MSAA state. For now, we just put the data into the cmdstream, but we
- * should do something like what the blob does with a real driver.
- *
- * There seem to be 32 slots for sample locations, followed by another
- * 16. The second 16 is just the center location followed by 15 zeros
- * in all the cases I've identified (maybe shader vs. depth/color
- * samples?).
- */
-
- struct pandecode_mapped_memory *smem = pandecode_find_mapped_gpu_mem_containing(fb->sample_locations);
-
- const u16 *PANDECODE_PTR_VAR(samples, smem, fb->sample_locations);
-
- pandecode_log("uint16_t sample_locations_%d[] = {\n", job_no);
- pandecode_indent++;
-
- for (int i = 0; i < 32 + 16; i++) {
- pandecode_log("%d, %d,\n", samples[2 * i], samples[2 * i + 1]);
- }
-
- pandecode_indent--;
- pandecode_log("};\n");
- }
-
pandecode_log("struct bifrost_framebuffer framebuffer_%"PRIx64"_%d = {\n", gpu_va, job_no);
pandecode_indent++;
- pandecode_prop("stack_shift = 0x%x", fb->stack_shift);
- pandecode_prop("unk0 = 0x%x", fb->unk0);
-
- if (fb->sample_locations)
- pandecode_prop("sample_locations = sample_locations_%d", job_no);
-
- /* Assume that unknown1 was emitted in the last job for
- * now */
- MEMORY_PROP(fb, unknown1);
+ pandecode_log(".shared_memory = {\n");
+ pandecode_indent++;
+ pandecode_shared_memory(&fb->shared_memory, is_compute);
+ pandecode_indent--;
+ pandecode_log("},\n");
info.width = fb->width1 + 1;
info.height = fb->height1 + 1;
@@ -1098,12 +1095,6 @@ pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, bool is_comput
if (fb->clear_depth)
pandecode_prop("clear_depth = %f", fb->clear_depth);
- /* TODO: What is this? Let's not blow up.. */
- if (fb->unknown2 != 0x1F)
- pandecode_prop("unknown2 = 0x%x", fb->unknown2);
-
- pandecode_prop("unknown2 = 0x%x", fb->unknown2);
- MEMORY_PROP(fb, scratchpad);
const struct midgard_tiler_descriptor t = fb->tiler;
if (!is_compute)
pandecode_midgard_tiler_descriptor(&t, fb->width1 + 1, fb->height1 + 1, is_fragment, true);