summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/broadcom/compiler/nir_to_vir.c8
-rw-r--r--src/broadcom/compiler/v3d_compiler.h6
-rw-r--r--src/broadcom/compiler/vir.c34
-rw-r--r--src/gallium/drivers/v3d/v3d_context.h11
-rw-r--r--src/gallium/drivers/v3d/v3d_program.c203
-rw-r--r--src/gallium/drivers/v3d/v3d_screen.c102
-rw-r--r--src/gallium/drivers/v3d/v3d_screen.h2
-rw-r--r--src/gallium/drivers/v3d/v3d_uniforms.c15
-rw-r--r--src/gallium/drivers/v3d/v3dx_draw.c4
9 files changed, 302 insertions, 83 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 2b196324754..a7b3adb6c63 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1806,7 +1806,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_memory_barrier_atomic_counter:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image:
- case nir_intrinsic_memory_barrier_shared:
+ case nir_intrinsic_group_memory_barrier:
/* We don't do any instruction scheduling of these NIR
* instructions between each other, so we just need to make
* sure that the TMU operations before the barrier are flushed
@@ -1869,6 +1869,10 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
vir_uniform_ui(c, 0xffff)));
break;
+ case nir_intrinsic_load_subgroup_id:
+ ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));
+ break;
+
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
@@ -2444,6 +2448,8 @@ v3d_nir_to_vir(struct v3d_compile *c)
case MESA_SHADER_VERTEX:
emit_vert_end(c);
break;
+ case MESA_SHADER_COMPUTE:
+ break;
default:
unreachable("bad stage");
}
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 94247860c68..b2bc40b10fe 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -691,6 +691,12 @@ struct v3d_fs_prog_data {
bool uses_center_w;
};
+struct v3d_compute_prog_data {
+ struct v3d_prog_data base;
+ /* Size in bytes of the workgroup's shared space. */
+ uint32_t shared_size;
+};
+
static inline bool
vir_has_uniform(struct qinst *inst)
{
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index b785b53c62c..6655e5e73bc 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -562,6 +562,21 @@ v3d_lower_nir(struct v3d_compile *c)
}
}
+ /* CS textures may not have return_size reflecting the shadow state. */
+ nir_foreach_variable(var, &c->s->uniforms) {
+ const struct glsl_type *type = glsl_without_array(var->type);
+ unsigned array_len = MAX2(glsl_get_length(var->type), 1);
+
+ if (!glsl_type_is_sampler(type) ||
+ !glsl_sampler_type_is_shadow(type))
+ continue;
+
+ for (int i = 0; i < array_len; i++) {
+ tex_options.lower_tex_packing[var->data.binding + i] =
+ nir_lower_tex_packing_16;
+ }
+ }
+
NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
NIR_PASS_V(c->s, nir_lower_system_values);
}
@@ -670,6 +685,13 @@ v3d_fs_set_prog_data(struct v3d_compile *c,
}
static void
+v3d_cs_set_prog_data(struct v3d_compile *c,
+ struct v3d_compute_prog_data *prog_data)
+{
+ prog_data->shared_size = c->s->info.cs.shared_size;
+}
+
+static void
v3d_set_prog_data(struct v3d_compile *c,
struct v3d_prog_data *prog_data)
{
@@ -679,7 +701,9 @@ v3d_set_prog_data(struct v3d_compile *c,
v3d_set_prog_data_uniforms(c, prog_data);
- if (c->s->info.stage == MESA_SHADER_VERTEX) {
+ if (c->s->info.stage == MESA_SHADER_COMPUTE) {
+ v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data);
+ } else if (c->s->info.stage == MESA_SHADER_VERTEX) {
v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
} else {
assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
@@ -865,13 +889,17 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
c->fs_key = (struct v3d_fs_key *)key;
prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data));
break;
+ case MESA_SHADER_COMPUTE:
+ prog_data = rzalloc_size(NULL,
+ sizeof(struct v3d_compute_prog_data));
+ break;
default:
unreachable("unsupported shader stage");
}
if (c->s->info.stage == MESA_SHADER_VERTEX) {
v3d_nir_lower_vs_early(c);
- } else {
+ } else if (c->s->info.stage != MESA_SHADER_COMPUTE) {
assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
v3d_nir_lower_fs_early(c);
}
@@ -880,7 +908,7 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
if (c->s->info.stage == MESA_SHADER_VERTEX) {
v3d_nir_lower_vs_late(c);
- } else {
+ } else if (c->s->info.stage != MESA_SHADER_COMPUTE) {
assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
v3d_nir_lower_fs_late(c);
}
diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h
index 3b39d18145d..225ebe2f5ab 100644
--- a/src/gallium/drivers/v3d/v3d_context.h
+++ b/src/gallium/drivers/v3d/v3d_context.h
@@ -186,6 +186,7 @@ struct v3d_compiled_shader {
struct v3d_prog_data *base;
struct v3d_vs_prog_data *vs;
struct v3d_fs_prog_data *fs;
+ struct v3d_compute_prog_data *compute;
} prog_data;
/**
@@ -197,8 +198,10 @@ struct v3d_compiled_shader {
};
struct v3d_program_stateobj {
- struct v3d_uncompiled_shader *bind_vs, *bind_fs;
- struct v3d_compiled_shader *cs, *vs, *fs;
+ struct v3d_uncompiled_shader *bind_vs, *bind_fs, *bind_compute;
+ struct v3d_compiled_shader *cs, *vs, *fs, *compute;
+
+ struct hash_table *cache[MESA_SHADER_STAGES];
struct v3d_bo *spill_bo;
int spill_size_per_thread;
@@ -414,7 +417,6 @@ struct v3d_context {
struct primconvert_context *primconvert;
- struct hash_table *fs_cache, *vs_cache;
uint32_t next_uncompiled_program_id;
uint64_t next_compiled_program_id;
@@ -446,6 +448,8 @@ struct v3d_context {
struct v3d_depth_stencil_alpha_state *zsa;
struct v3d_program_stateobj prog;
+ uint32_t compute_num_workgroups[3];
+ struct v3d_bo *compute_shared_memory;
struct v3d_vertex_stateobj *vtx;
@@ -584,6 +588,7 @@ void v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
void v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
struct pipe_resource *prsc);
void v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode);
+void v3d_update_compiled_cs(struct v3d_context *v3d);
bool v3d_rt_format_supported(const struct v3d_device_info *devinfo,
enum pipe_format f);
diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c
index e3e491e9fd7..7805b808a01 100644
--- a/src/gallium/drivers/v3d/v3d_program.c
+++ b/src/gallium/drivers/v3d/v3d_program.c
@@ -38,7 +38,8 @@
#include "broadcom/cle/v3d_packet_v33_pack.h"
static struct v3d_compiled_shader *
-v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key);
+v3d_get_compiled_shader(struct v3d_context *v3d,
+ struct v3d_key *key, size_t key_size);
static void
v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled,
struct v3d_key *key);
@@ -200,7 +201,7 @@ v3d_shader_precompile(struct v3d_context *v3d,
}
v3d_setup_shared_precompile_key(so, &key.base);
- v3d_get_compiled_shader(v3d, &key.base);
+ v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
} else {
struct v3d_vs_key key = {
.base.shader_state = so,
@@ -223,7 +224,7 @@ v3d_shader_precompile(struct v3d_context *v3d,
}
}
- v3d_get_compiled_shader(v3d, &key.base);
+ v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
/* Compile VS bin shader: only position (XXX: include TF) */
key.is_coord = true;
@@ -233,13 +234,13 @@ v3d_shader_precompile(struct v3d_context *v3d,
v3d_slot_from_slot_and_component(VARYING_SLOT_POS,
i);
}
- v3d_get_compiled_shader(v3d, &key.base);
+ v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
}
}
static void *
-v3d_shader_state_create(struct pipe_context *pctx,
- const struct pipe_shader_state *cso)
+v3d_uncompiled_shader_create(struct pipe_context *pctx,
+ enum pipe_shader_ir type, void *ir)
{
struct v3d_context *v3d = v3d_context(pctx);
struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader);
@@ -250,21 +251,21 @@ v3d_shader_state_create(struct pipe_context *pctx,
nir_shader *s;
- if (cso->type == PIPE_SHADER_IR_NIR) {
+ if (type == PIPE_SHADER_IR_NIR) {
/* The backend takes ownership of the NIR shader on state
* creation.
*/
- s = cso->ir.nir;
+ s = ir;
} else {
- assert(cso->type == PIPE_SHADER_IR_TGSI);
+ assert(type == PIPE_SHADER_IR_TGSI);
if (V3D_DEBUG & V3D_DEBUG_TGSI) {
fprintf(stderr, "prog %d TGSI:\n",
so->program_id);
- tgsi_dump(cso->tokens, 0);
+ tgsi_dump(ir, 0);
fprintf(stderr, "\n");
}
- s = tgsi_to_nir(cso->tokens, pctx->screen);
+ s = tgsi_to_nir(ir, pctx->screen);
}
nir_variable_mode lower_mode = nir_var_all & ~nir_var_uniform;
@@ -289,8 +290,6 @@ v3d_shader_state_create(struct pipe_context *pctx,
so->base.type = PIPE_SHADER_IR_NIR;
so->base.ir.nir = s;
- v3d_set_transform_feedback_outputs(so, &cso->stream_output);
-
if (V3D_DEBUG & (V3D_DEBUG_NIR |
v3d_debug_flag_for_shader_stage(s->info.stage))) {
fprintf(stderr, "%s prog %d NIR:\n",
@@ -314,22 +313,31 @@ v3d_shader_debug_output(const char *message, void *data)
pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message);
}
-static struct v3d_compiled_shader *
-v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key)
+static void *
+v3d_shader_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ struct v3d_uncompiled_shader *so =
+ v3d_uncompiled_shader_create(pctx,
+ cso->type,
+ (cso->type == PIPE_SHADER_IR_TGSI ?
+ (void *)cso->tokens :
+ cso->ir.nir));
+
+ v3d_set_transform_feedback_outputs(so, &cso->stream_output);
+
+ return so;
+}
+
+struct v3d_compiled_shader *
+v3d_get_compiled_shader(struct v3d_context *v3d,
+ struct v3d_key *key,
+ size_t key_size)
{
struct v3d_uncompiled_shader *shader_state = key->shader_state;
nir_shader *s = shader_state->base.ir.nir;
- struct hash_table *ht;
- uint32_t key_size;
- if (s->info.stage == MESA_SHADER_FRAGMENT) {
- ht = v3d->fs_cache;
- key_size = sizeof(struct v3d_fs_key);
- } else {
- ht = v3d->vs_cache;
- key_size = sizeof(struct v3d_vs_key);
- }
-
+ struct hash_table *ht = v3d->prog.cache[s->info.stage];
struct hash_entry *entry = _mesa_hash_table_search(ht, key);
if (entry)
return entry->data;
@@ -359,10 +367,12 @@ v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key)
free(qpu_insts);
- struct v3d_key *dup_key;
- dup_key = ralloc_size(shader, key_size);
- memcpy(dup_key, key, key_size);
- _mesa_hash_table_insert(ht, dup_key, shader);
+ if (ht) {
+ struct v3d_key *dup_key;
+ dup_key = ralloc_size(shader, key_size);
+ memcpy(dup_key, key, key_size);
+ _mesa_hash_table_insert(ht, dup_key, shader);
+ }
if (shader->prog_data.base->spill_size >
v3d->prog.spill_size_per_thread) {
@@ -446,8 +456,6 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP;
}
}
-
- key->ucp_enables = v3d->rasterizer->base.clip_plane_enable;
}
static void
@@ -489,6 +497,7 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode)
memset(key, 0, sizeof(*key));
v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]);
key->base.shader_state = v3d->prog.bind_fs;
+ key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
key->is_points = (prim_mode == PIPE_PRIM_POINTS);
key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
prim_mode <= PIPE_PRIM_LINE_STRIP);
@@ -554,7 +563,7 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode)
key->shade_model_flat = v3d->rasterizer->base.flatshade;
struct v3d_compiled_shader *old_fs = v3d->prog.fs;
- v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base);
+ v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
if (v3d->prog.fs == old_fs)
return;
@@ -602,6 +611,7 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode)
memset(key, 0, sizeof(*key));
v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]);
key->base.shader_state = v3d->prog.bind_vs;
+ key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
key->num_fs_inputs = v3d->prog.fs->prog_data.fs->num_inputs;
STATIC_ASSERT(sizeof(key->fs_inputs) ==
sizeof(v3d->prog.fs->prog_data.fs->input_slots));
@@ -614,7 +624,7 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode)
v3d->rasterizer->base.point_size_per_vertex);
struct v3d_compiled_shader *vs =
- v3d_get_compiled_shader(v3d, &key->base);
+ v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
if (vs != v3d->prog.vs) {
v3d->prog.vs = vs;
v3d->dirty |= VC5_DIRTY_COMPILED_VS;
@@ -634,7 +644,7 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode)
key->num_fs_inputs = shader_state->num_tf_outputs;
struct v3d_compiled_shader *cs =
- v3d_get_compiled_shader(v3d, &key->base);
+ v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
if (cs != v3d->prog.cs) {
v3d->prog.cs = cs;
v3d->dirty |= VC5_DIRTY_COMPILED_CS;
@@ -648,6 +658,30 @@ v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode)
v3d_update_compiled_vs(v3d, prim_mode);
}
+void
+v3d_update_compiled_cs(struct v3d_context *v3d)
+{
+ struct v3d_key local_key;
+ struct v3d_key *key = &local_key;
+
+ if (!(v3d->dirty & (~0 | /* XXX */
+ VC5_DIRTY_VERTTEX |
+ VC5_DIRTY_UNCOMPILED_FS))) {
+ return;
+ }
+
+ memset(key, 0, sizeof(*key));
+ v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]);
+ key->shader_state = v3d->prog.bind_compute;
+
+ struct v3d_compiled_shader *cs =
+ v3d_get_compiled_shader(v3d, key, sizeof(*key));
+ if (cs != v3d->prog.compute) {
+ v3d->prog.compute = cs;
+ v3d->dirty |= VC5_DIRTY_COMPILED_CS; /* XXX */
+ }
+}
+
static uint32_t
fs_cache_hash(const void *key)
{
@@ -660,6 +694,12 @@ vs_cache_hash(const void *key)
return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
}
+static uint32_t
+cs_cache_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct v3d_key));
+}
+
static bool
fs_cache_compare(const void *key1, const void *key2)
{
@@ -672,23 +712,10 @@ vs_cache_compare(const void *key1, const void *key2)
return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
}
-static void
-delete_from_cache_if_matches(struct hash_table *ht,
- struct v3d_compiled_shader **last_compile,
- struct hash_entry *entry,
- struct v3d_uncompiled_shader *so)
+static bool
+cs_cache_compare(const void *key1, const void *key2)
{
- const struct v3d_key *key = entry->key;
-
- if (key->shader_state == so) {
- struct v3d_compiled_shader *shader = entry->data;
- _mesa_hash_table_remove(ht, entry);
-
- if (shader == *last_compile)
- *last_compile = NULL;
-
- v3d_free_compiled_shader(shader);
- }
+ return memcmp(key1, key2, sizeof(struct v3d_key)) == 0;
}
static void
@@ -696,14 +723,26 @@ v3d_shader_state_delete(struct pipe_context *pctx, void *hwcso)
{
struct v3d_context *v3d = v3d_context(pctx);
struct v3d_uncompiled_shader *so = hwcso;
+ nir_shader *s = so->base.ir.nir;
- hash_table_foreach(v3d->fs_cache, entry) {
- delete_from_cache_if_matches(v3d->fs_cache, &v3d->prog.fs,
- entry, so);
- }
- hash_table_foreach(v3d->vs_cache, entry) {
- delete_from_cache_if_matches(v3d->vs_cache, &v3d->prog.vs,
- entry, so);
+ hash_table_foreach(v3d->prog.cache[s->info.stage], entry) {
+ const struct v3d_key *key = entry->key;
+ struct v3d_compiled_shader *shader = entry->data;
+
+ if (key->shader_state != so)
+ continue;
+
+ if (v3d->prog.fs == shader)
+ v3d->prog.fs = NULL;
+ if (v3d->prog.vs == shader)
+ v3d->prog.vs = NULL;
+ if (v3d->prog.cs == shader)
+ v3d->prog.cs = NULL;
+ if (v3d->prog.compute == shader)
+ v3d->prog.compute = NULL;
+
+ _mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry);
+ v3d_free_compiled_shader(shader);
}
ralloc_free(so->base.ir.nir);
@@ -726,6 +765,22 @@ v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso)
v3d->dirty |= VC5_DIRTY_UNCOMPILED_VS;
}
+static void
+v3d_compute_state_bind(struct pipe_context *pctx, void *state)
+{
+ struct v3d_context *v3d = v3d_context(pctx);
+
+ v3d->prog.bind_compute = state;
+}
+
+static void *
+v3d_create_compute_state(struct pipe_context *pctx,
+ const struct pipe_compute_state *cso)
+{
+ return v3d_uncompiled_shader_create(pctx, cso->ir_type,
+ (void *)cso->prog);
+}
+
void
v3d_program_init(struct pipe_context *pctx)
{
@@ -740,10 +795,18 @@ v3d_program_init(struct pipe_context *pctx)
pctx->bind_fs_state = v3d_fp_state_bind;
pctx->bind_vs_state = v3d_vp_state_bind;
- v3d->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash,
- fs_cache_compare);
- v3d->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
- vs_cache_compare);
+ if (v3d->screen->has_csd) {
+ pctx->create_compute_state = v3d_create_compute_state;
+ pctx->delete_compute_state = v3d_shader_state_delete;
+ pctx->bind_compute_state = v3d_compute_state_bind;
+ }
+
+ v3d->prog.cache[MESA_SHADER_VERTEX] =
+ _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare);
+ v3d->prog.cache[MESA_SHADER_FRAGMENT] =
+ _mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare);
+ v3d->prog.cache[MESA_SHADER_COMPUTE] =
+ _mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare);
}
void
@@ -751,16 +814,16 @@ v3d_program_fini(struct pipe_context *pctx)
{
struct v3d_context *v3d = v3d_context(pctx);
- hash_table_foreach(v3d->fs_cache, entry) {
- struct v3d_compiled_shader *shader = entry->data;
- v3d_free_compiled_shader(shader);
- _mesa_hash_table_remove(v3d->fs_cache, entry);
- }
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct hash_table *cache = v3d->prog.cache[i];
+ if (!cache)
+ continue;
- hash_table_foreach(v3d->vs_cache, entry) {
- struct v3d_compiled_shader *shader = entry->data;
- v3d_free_compiled_shader(shader);
- _mesa_hash_table_remove(v3d->vs_cache, entry);
+ hash_table_foreach(cache, entry) {
+ struct v3d_compiled_shader *shader = entry->data;
+ v3d_free_compiled_shader(shader);
+ _mesa_hash_table_remove(cache, entry);
+ }
}
v3d_bo_unreference(&v3d->prog.spill_bo);
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index afac781725a..b77e3d9060e 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -22,6 +22,8 @@
* IN THE SOFTWARE.
*/
+#include <sys/sysinfo.h>
+
#include "util/os_misc.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
@@ -122,7 +124,6 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_OCCLUSION_QUERY:
case PIPE_CAP_POINT_SPRITE:
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
- case PIPE_CAP_COMPUTE:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
@@ -143,6 +144,9 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return 0;
+ case PIPE_CAP_COMPUTE:
+ return screen->has_csd && screen->devinfo.ver >= 41;
+
case PIPE_CAP_GENERATE_MIPMAP:
return v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_TFU);
@@ -260,8 +264,15 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
{
struct v3d_screen *screen = v3d_screen(pscreen);
- if (shader != PIPE_SHADER_VERTEX &&
- shader != PIPE_SHADER_FRAGMENT) {
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_FRAGMENT:
+ break;
+ case PIPE_SHADER_COMPUTE:
+ if (!screen->has_csd)
+ return 0;
+ break;
+ default:
return 0;
}
@@ -335,7 +346,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_NIR;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
- return 0;
+ return 1 << PIPE_SHADER_IR_NIR;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
@@ -348,6 +359,86 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 0;
}
+static int
+v3d_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
+ enum pipe_compute_cap param, void *ret)
+{
+ struct v3d_screen *screen = v3d_screen(pscreen);
+
+ if (!screen->has_csd)
+ return 0;
+
+#define RET(x) do { \
+ if (ret) \
+ memcpy(ret, x, sizeof(x)); \
+ return sizeof(x); \
+ } while (0)
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+ RET((uint32_t []) { 32 });
+ break;
+
+ case PIPE_COMPUTE_CAP_IR_TARGET:
+ sprintf(ret, "v3d");
+ return strlen(ret);
+
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ RET((uint64_t []) { 3 });
+
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ /* GL_MAX_COMPUTE_SHADER_WORK_GROUP_COUNT: The CSD has a
+ * 16-bit field for the number of workgroups in each
+ * dimension.
+ */
+ RET(((uint64_t []) { 65535, 65535, 65535 }));
+
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ /* GL_MAX_COMPUTE_WORK_GROUP_SIZE */
+ RET(((uint64_t []) { 256, 256, 256 }));
+
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+ /* GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS: This is
+ * limited by WG_SIZE in the CSD.
+ */
+ RET((uint64_t []) { 256 });
+
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+ RET((uint64_t []) { 1024 * 1024 * 1024 });
+
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+ /* GL_MAX_COMPUTE_SHARED_MEMORY_SIZE */
+ RET((uint64_t []) { 32768 });
+
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+ RET((uint64_t []) { 4096 });
+
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
+ struct sysinfo si;
+ sysinfo(&si);
+ RET((uint64_t []) { si.totalram });
+ }
+
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ /* OpenCL only */
+ RET((uint32_t []) { 0 });
+
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ RET((uint32_t []) { 1 });
+
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ RET((uint32_t []) { 1 });
+
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ RET((uint32_t []) { 16 });
+
+ }
+
+ return 0;
+}
+
static boolean
v3d_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
@@ -565,6 +656,7 @@ v3d_screen_create(int fd, struct renderonly *ro)
pscreen->get_param = v3d_screen_get_param;
pscreen->get_paramf = v3d_screen_get_paramf;
pscreen->get_shader_param = v3d_screen_get_shader_param;
+ pscreen->get_compute_param = v3d_get_compute_param;
pscreen->context_create = v3d_context_create;
pscreen->is_format_supported = v3d_screen_is_format_supported;
@@ -590,6 +682,8 @@ v3d_screen_create(int fd, struct renderonly *ro)
slab_create_parent(&screen->transfer_pool, sizeof(struct v3d_transfer), 16);
+ screen->has_csd = false; /* until the UABI is enabled. */
+
v3d_fence_init(screen);
v3d_process_debug_variable();
diff --git a/src/gallium/drivers/v3d/v3d_screen.h b/src/gallium/drivers/v3d/v3d_screen.h
index 94ae8b30f3a..6e90755e77c 100644
--- a/src/gallium/drivers/v3d/v3d_screen.h
+++ b/src/gallium/drivers/v3d/v3d_screen.h
@@ -77,6 +77,8 @@ struct v3d_screen {
uint32_t bo_size;
uint32_t bo_count;
+ bool has_csd;
+
struct v3d_simulator_file *sim_file;
};
diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c
index a5532bdf2b4..77101947e2b 100644
--- a/src/gallium/drivers/v3d/v3d_uniforms.c
+++ b/src/gallium/drivers/v3d/v3d_uniforms.c
@@ -358,6 +358,16 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
v3d->prog.spill_size_per_thread);
break;
+ case QUNIFORM_NUM_WORK_GROUPS:
+ cl_aligned_u32(&uniforms,
+ v3d->compute_num_workgroups[data]);
+ break;
+
+ case QUNIFORM_SHARED_OFFSET:
+ cl_aligned_reloc(&job->indirect, &uniforms,
+ v3d->compute_shared_memory, 0);
+ break;
+
default:
assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
@@ -444,6 +454,11 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader)
dirty |= VC5_DIRTY_ZSA;
break;
+ case QUNIFORM_NUM_WORK_GROUPS:
+ case QUNIFORM_SHARED_OFFSET:
+ /* Compute always recalculates uniforms. */
+ break;
+
default:
assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c
index 14e85e78485..14e95c71204 100644
--- a/src/gallium/drivers/v3d/v3dx_draw.c
+++ b/src/gallium/drivers/v3d/v3dx_draw.c
@@ -489,7 +489,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
/* Before setting up the draw, flush anything writing to the textures
* that we read from.
*/
- for (int s = 0; s < PIPE_SHADER_TYPES; s++)
+ for (int s = 0; s < PIPE_SHADER_COMPUTE; s++)
v3d_predraw_check_stage_inputs(pctx, s);
if (info->indirect)
@@ -514,7 +514,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
/* Mark SSBOs as being written. We don't actually know which ones are
* read vs written, so just assume the worst
*/
- for (int s = 0; s < PIPE_SHADER_TYPES; s++) {
+ for (int s = 0; s < PIPE_SHADER_COMPUTE; s++) {
foreach_bit(i, v3d->ssbo[s].enabled_mask) {
v3d_job_add_write_resource(job,
v3d->ssbo[s].sb[i].buffer);