summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h14
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.c47
-rw-r--r--src/gallium/drivers/freedreno/freedreno_gmem.c7
-rw-r--r--src/gallium/drivers/freedreno/freedreno_gmem.h1
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c115
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.h6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_state.c32
7 files changed, 216 insertions, 6 deletions
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 7e940bca496..698af4efd19 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -255,6 +255,7 @@ struct fd_context {
/* per shader-stage dirty status: */
enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES];
+ void *compute;
struct pipe_blend_state *blend;
struct pipe_rasterizer_state *rasterizer;
struct pipe_depth_stencil_alpha_state *zsa;
@@ -299,6 +300,9 @@ struct fd_context {
void (*clear)(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil);
+ /* compute: */
+ void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info);
+
/* constant emit: (note currently not used/needed for a2xx) */
void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
@@ -376,8 +380,16 @@ static inline void
fd_context_all_clean(struct fd_context *ctx)
{
ctx->dirty = 0;
- for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++)
+ for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
+ /* don't mark compute state as clean, since it is not emitted
+ * during normal draw call. The places that call _all_dirty(),
+ * it is safe to mark compute state dirty as well, but the
+ * inverse is not true.
+ */
+ if (i == PIPE_SHADER_COMPUTE)
+ continue;
ctx->dirty_shader[i] = 0;
+ }
}
static inline struct pipe_scissor_state *
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index 6691f65db97..4e7827ddd02 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -408,6 +408,49 @@ fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
buffers, depth, stencil, x, y, w, h);
}
+static void
+fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_batch *batch, *save_batch = NULL;
+ unsigned i;
+
+ /* TODO maybe we don't want to allocate and flush a batch each time?
+ * We could use a special bogus (ie. won't match any fb state) key
+ * in the batch-case for compute shaders, and rely on the rest of
+ * the dependency tracking mechanism to tell us when the compute
+ * batch needs to be flushed?
+ */
+ batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx);
+ fd_batch_reference(&save_batch, ctx->batch);
+ fd_batch_reference(&ctx->batch, batch);
+
+ mtx_lock(&ctx->screen->lock);
+
+ /* Mark SSBOs as being written.. we don't actually know which ones are
+ * read vs written, so just assume the worst
+ */
+ foreach_bit(i, ctx->shaderbuf[PIPE_SHADER_COMPUTE].enabled_mask)
+ resource_read(batch, ctx->shaderbuf[PIPE_SHADER_COMPUTE].sb[i].buffer);
+
+ /* UBO's are read */
+ foreach_bit(i, ctx->constbuf[PIPE_SHADER_COMPUTE].enabled_mask)
+ resource_read(batch, ctx->constbuf[PIPE_SHADER_COMPUTE].cb[i].buffer);
+
+ /* Mark textures as being read */
+ foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
+ resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
+
+ mtx_unlock(&ctx->screen->lock);
+
+ ctx->launch_grid(ctx, info);
+
+ fd_gmem_flush_compute(batch);
+
+ fd_batch_reference(&ctx->batch, save_batch);
+ fd_batch_reference(&save_batch, NULL);
+}
+
void
fd_draw_init(struct pipe_context *pctx)
{
@@ -415,4 +458,8 @@ fd_draw_init(struct pipe_context *pctx)
pctx->clear = fd_clear;
pctx->clear_render_target = fd_clear_render_target;
pctx->clear_depth_stencil = fd_clear_depth_stencil;
+
+ if (has_compute(fd_screen(pctx->screen))) {
+ pctx->launch_grid = fd_launch_grid;
+ }
}
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index ded23219dc2..23be0472b25 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -439,6 +439,13 @@ fd_gmem_render_noop(struct fd_batch *batch)
flush_ring(batch);
}
+void
+fd_gmem_flush_compute(struct fd_batch *batch)
+{
+ render_sysmem(batch);
+ flush_ring(batch);
+}
+
/* tile needs restore if it isn't completely contained within the
* cleared scissor:
*/
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h
index 6598ea9cc8b..42a8dfa8047 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.h
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.h
@@ -63,6 +63,7 @@ struct fd_batch;
void fd_gmem_render_tiles(struct fd_batch *batch);
void fd_gmem_render_noop(struct fd_batch *batch);
+void fd_gmem_flush_compute(struct fd_batch *batch);
bool fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile,
uint32_t buffers);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 15293b1b3be..052565dcbdc 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -189,13 +189,15 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_USER_CONSTANT_BUFFERS:
return is_a4xx(screen) ? 0 : 1;
+ case PIPE_CAP_COMPUTE:
+ return has_compute(screen);
+
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_COMPUTE:
case PIPE_CAP_QUERY_MEMORY_INFO:
case PIPE_CAP_PCI_GROUP:
case PIPE_CAP_PCI_BUS:
@@ -454,6 +456,9 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_VERTEX:
break;
case PIPE_SHADER_COMPUTE:
+ if (has_compute(screen))
+ break;
+ return 0;
case PIPE_SHADER_GEOMETRY:
/* maye we could emulate.. */
return 0;
@@ -514,13 +519,30 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
- if ((fd_mesa_debug & FD_DBG_NIR) && is_ir3(screen))
+ switch (shader) {
+ case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_VERTEX:
+ if ((fd_mesa_debug & FD_DBG_NIR) && is_ir3(screen))
+ return PIPE_SHADER_IR_NIR;
+ return PIPE_SHADER_IR_TGSI;
+ default:
+ /* tgsi_to_nir doesn't really support much beyond FS/VS: */
+ debug_assert(is_ir3(screen));
return PIPE_SHADER_IR_NIR;
- return PIPE_SHADER_IR_TGSI;
+ }
+ break;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ if (is_ir3(screen)) {
+ return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
+ } else {
+ return (1 << PIPE_SHADER_IR_TGSI);
+ }
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
+ case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+ case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+ return 0;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
if (is_a5xx(screen)) {
/* a5xx (and a4xx for that matter) has one state-block
@@ -552,14 +574,96 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
}
return 0;
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
- case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
- case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+ /* probably should be same as MAX_SHADRER_BUFFERS but not implemented yet */
return 0;
}
debug_printf("unknown shader param %d\n", param);
return 0;
}
+/* TODO depending on how much the limits differ for a3xx/a4xx, maybe move this
+ * into per-generation backend?
+ */
+static int
+fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
+ enum pipe_compute_cap param, void *ret)
+{
+ struct fd_screen *screen = fd_screen(pscreen);
+ const char *ir = "ir3";
+
+ if (!has_compute(screen))
+ return 0;
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+ if (ret) {
+ uint32_t *address_bits = ret;
+ address_bits[0] = 32;
+
+ if (is_a5xx(screen))
+ address_bits[0] = 64;
+ }
+ return 1 * sizeof(uint32_t);
+
+ case PIPE_COMPUTE_CAP_IR_TARGET:
+ if (ret)
+ sprintf(ret, ir);
+ return strlen(ir) * sizeof(char);
+
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ if (ret) {
+ uint64_t *grid_dimension = ret;
+ grid_dimension[0] = 3;
+ }
+ return 1 * sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ if (ret) {
+ uint64_t *grid_size = ret;
+ grid_size[0] = 65535;
+ grid_size[1] = 65535;
+ grid_size[2] = 65535;
+ }
+ return 3 * sizeof(uint64_t) ;
+
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ if (ret) {
+ uint64_t *grid_size = ret;
+ grid_size[0] = 1024;
+ grid_size[1] = 1024;
+ grid_size[2] = 64;
+ }
+ return 3 * sizeof(uint64_t) ;
+
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ if (ret) {
+ uint64_t *max_threads_per_block = ret;
+ *max_threads_per_block = 1024;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+ break;
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ if (ret) {
+ uint64_t *max = ret;
+ *max = 32768;
+ }
+ return sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+ break;
+ }
+
+ return 0;
+}
+
static const void *
fd_get_compiler_options(struct pipe_screen *pscreen,
enum pipe_shader_ir ir, unsigned shader)
@@ -752,6 +856,7 @@ fd_screen_create(struct fd_device *dev)
pscreen->get_param = fd_screen_get_param;
pscreen->get_paramf = fd_screen_get_paramf;
pscreen->get_shader_param = fd_screen_get_shader_param;
+ pscreen->get_compute_param = fd_get_compute_param;
pscreen->get_compiler_options = fd_get_compiler_options;
fd_resource_screen_init(pscreen);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index dac7224f3af..83c044988f1 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -128,4 +128,10 @@ is_ir3(struct fd_screen *screen)
return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen);
}
+static inline bool
+has_compute(struct fd_screen *screen)
+{
+ return false;
+}
+
#endif /* FREEDRENO_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index bc9fe4ab73e..75bf1b142bd 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -451,6 +451,32 @@ fd_set_stream_output_targets(struct pipe_context *pctx,
ctx->dirty |= FD_DIRTY_STREAMOUT;
}
+static void
+fd_bind_compute_state(struct pipe_context *pctx, void *state)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->compute = state;
+ ctx->dirty_shader[PIPE_SHADER_COMPUTE] |= FD_DIRTY_SHADER_PROG;
+}
+
+static void
+fd_set_compute_resources(struct pipe_context *pctx,
+ unsigned start, unsigned count, struct pipe_surface **prscs)
+{
+ // TODO
+}
+
+static void
+fd_set_global_binding(struct pipe_context *pctx,
+ unsigned first, unsigned count, struct pipe_resource **prscs,
+ uint32_t **handles)
+{
+ /* TODO only used by clover.. seems to need us to return the actual
+ * gpuaddr of the buffer.. which isn't really exposed to mesa atm.
+ * How is this used?
+ */
+}
+
void
fd_state_init(struct pipe_context *pctx)
{
@@ -484,4 +510,10 @@ fd_state_init(struct pipe_context *pctx)
pctx->create_stream_output_target = fd_create_stream_output_target;
pctx->stream_output_target_destroy = fd_stream_output_target_destroy;
pctx->set_stream_output_targets = fd_set_stream_output_targets;
+
+ if (has_compute(fd_screen(pctx->screen))) {
+ pctx->bind_compute_state = fd_bind_compute_state;
+ pctx->set_compute_resources = fd_set_compute_resources;
+ pctx->set_global_binding = fd_set_global_binding;
+ }
}