aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/freedreno/ir3/ir3_a4xx.c2
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c10
-rw-r--r--src/freedreno/ir3/ir3_context.c2
-rw-r--r--src/freedreno/ir3/ir3_cp.c2
-rw-r--r--src/freedreno/ir3/ir3_nir.c18
-rw-r--r--src/freedreno/ir3/ir3_nir.h2
-rw-r--r--src/freedreno/ir3/ir3_shader.c5
-rw-r--r--src/freedreno/ir3/ir3_shader.h2
-rw-r--r--src/freedreno/vulkan/tu_shader.c4
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_gallium.c16
10 files changed, 33 insertions, 30 deletions
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c
index 5fe15cf8e27..30e452540bf 100644
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@@ -217,7 +217,7 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var,
/* to calculate the byte offset (yes, uggg) we need (up to) three
* const values to know the bytes per pixel, and y and z stride:
*/
- struct ir3_const_state *const_state = &ctx->so->const_state;
+ struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned cb = regid(const_state->offsets.image_dims, 0) +
const_state->image_dims.off[var->data.driver_location];
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 3eb34f44b14..a35a1518398 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -107,7 +107,7 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
{
/* first four vec4 sysval's reserved for UBOs: */
/* NOTE: dp is in scalar, but there can be >4 dp components: */
- struct ir3_const_state *const_state = &ctx->so->const_state;
+ struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned n = const_state->offsets.driver_param;
unsigned r = regid(n + dp / 4, dp % 4);
return create_uniform(ctx->block, r);
@@ -684,7 +684,7 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
/* UBO addresses are the first driver params, but subtract 2 here to
* account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0
* is the uniforms: */
- struct ir3_const_state *const_state = &ctx->so->const_state;
+ struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned ubo = regid(const_state->offsets.ubo, 0) - 2;
const unsigned ptrsz = ir3_pointer_size(ctx->compiler);
@@ -753,7 +753,7 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
{
/* SSBO size stored as a const starting at ssbo_sizes: */
- struct ir3_const_state *const_state = &ctx->so->const_state;
+ struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned blk_idx = nir_src_as_uint(intr->src[0]);
unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) +
const_state->ssbo_size.off[blk_idx];
@@ -1009,7 +1009,7 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
* bytes-per-pixel should have been emitted in 2nd slot of
* image_dims. See ir3_shader::emit_image_dims().
*/
- struct ir3_const_state *const_state = &ctx->so->const_state;
+ struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned cb = regid(const_state->offsets.image_dims, 0) +
const_state->image_dims.off[var->data.driver_location];
struct ir3_instruction *aux = create_uniform(b, cb + 1);
@@ -2286,7 +2286,7 @@ emit_stream_out(struct ir3_context *ctx)
* stripped out in the backend.
*/
for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
- struct ir3_const_state *const_state = &ctx->so->const_state;
+ struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned stride = strmout->stride[i];
struct ir3_instruction *base, *off;
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index 5b95373b675..7cd87de0f29 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -101,8 +101,6 @@ ir3_context_init(struct ir3_compiler *compiler,
ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
- ir3_setup_const_state(so);
-
return ctx;
}
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c
index 5d46b19d6e5..dedbd8dbb1d 100644
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -298,7 +298,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
}
/* Reallocate for 4 more elements whenever it's necessary */
- struct ir3_const_state *const_state = &ctx->so->const_state;
+ struct ir3_const_state *const_state = &ctx->so->shader->const_state;
if (const_state->immediate_idx == const_state->immediates_size * 4) {
const_state->immediates_size += 4;
const_state->immediates = realloc (const_state->immediates,
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index edb5490d664..c692274d8e3 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -32,6 +32,8 @@
#include "ir3_compiler.h"
#include "ir3_shader.h"
+static void ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir);
+
static const nir_shader_compiler_options options = {
.lower_fpow = true,
.lower_scmp = true,
@@ -274,6 +276,14 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
nir_sweep(s);
+ /* The first time thru, when not creating variant, do the one-time
+ * const_state layout setup. This should be done after ubo range
+ * analysis.
+ */
+ if (!key) {
+ ir3_setup_const_state(shader, s);
+ }
+
return s;
}
@@ -330,13 +340,11 @@ ir3_nir_scan_driver_consts(nir_shader *shader,
}
}
-void
-ir3_setup_const_state(struct ir3_shader_variant *v)
+static void
+ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir)
{
- struct ir3_shader *shader = v->shader;
struct ir3_compiler *compiler = shader->compiler;
- struct ir3_const_state *const_state = &v->const_state;
- nir_shader *nir = shader->nir;
+ struct ir3_const_state *const_state = &shader->const_state;
memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index 6cfe27a56b2..84c09b073f0 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -50,6 +50,4 @@ bool ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader);
nir_ssa_def *
ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift);
-void ir3_setup_const_state(struct ir3_shader_variant *v);
-
#endif /* IR3_NIR_H_ */
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index 29f7fa05f0c..dacccc1329e 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -47,8 +47,6 @@ delete_variant(struct ir3_shader_variant *v)
ir3_destroy(v->ir);
if (v->bo)
fd_bo_del(v->bo);
- if (v->const_state.immediates)
- free(v->const_state.immediates);
free(v);
}
@@ -262,6 +260,7 @@ ir3_shader_destroy(struct ir3_shader *shader)
v = v->next;
delete_variant(t);
}
+ free(shader->const_state.immediates);
ralloc_free(shader->nir);
free(shader);
}
@@ -350,7 +349,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
(regid >> 2), "xyzw"[regid & 0x3], i);
}
- struct ir3_const_state *const_state = &so->const_state;
+ struct ir3_const_state *const_state = &so->shader->const_state;
for (i = 0; i < const_state->immediates_count; i++) {
fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);
fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index a4386d7762d..c13cf1df0bb 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -393,7 +393,6 @@ struct ir3_shader_variant {
bool binning_pass;
struct ir3_shader_variant *binning;
- struct ir3_const_state const_state;
struct ir3_info info;
struct ir3 *ir;
@@ -539,6 +538,7 @@ struct ir3_shader {
struct ir3_compiler *compiler;
struct ir3_ubo_analysis_state ubo_state;
+ struct ir3_const_state const_state;
struct nir_shader *nir;
struct ir3_stream_output_info stream_output;
diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c
index 62f8f91c7f3..8d6ccecdd9c 100644
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@@ -196,10 +196,10 @@ tu_shader_destroy(struct tu_device *dev,
for (uint32_t i = 0; i < 1 + shader->has_binning_pass; i++) {
if (shader->variants[i].ir)
ir3_destroy(shader->variants[i].ir);
- if (shader->variants[i].const_state.immediates)
- free(shader->variants[i].const_state.immediates);
}
+ if (shader->ir3_shader.const_state.immediates)
+ free(shader->ir3_shader.const_state.immediates);
if (shader->binary)
free(shader->binary);
if (shader->binning_binary)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
index 08a7c90aab3..e605e531ecb 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
@@ -241,7 +241,7 @@ emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v,
* the user consts early to avoid HLSQ lockup caused by
* writing too many consts
*/
- const struct ir3_const_state *const_state = &v->const_state;
+ const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t max_const = MIN2(const_state->num_uniforms, v->constlen);
/* and even if the start of the const buffer is before
@@ -281,7 +281,7 @@ static void
emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
{
- const struct ir3_const_state *const_state = &v->const_state;
+ const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.ubo;
if (v->constlen > offset) {
uint32_t params = const_state->num_ubos;
@@ -311,7 +311,7 @@ static void
emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
{
- const struct ir3_const_state *const_state = &v->const_state;
+ const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.ssbo_sizes;
if (v->constlen > offset) {
uint32_t sizes[align(const_state->ssbo_size.count, 4)];
@@ -333,7 +333,7 @@ static void
emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si)
{
- const struct ir3_const_state *const_state = &v->const_state;
+ const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.image_dims;
if (v->constlen > offset) {
uint32_t dims[align(const_state->image_dims.count, 4)];
@@ -386,7 +386,7 @@ static void
emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
{
- const struct ir3_const_state *const_state = &v->const_state;
+ const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t base = const_state->offsets.immediate;
int size = const_state->immediates_count;
@@ -412,7 +412,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
{
/* streamout addresses after driver-params: */
- const struct ir3_const_state *const_state = &v->const_state;
+ const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.tfbo;
if (v->constlen > offset) {
struct fd_streamout_stateobj *so = &ctx->streamout;
@@ -540,7 +540,7 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
/* emit driver params every time: */
/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
if (info) {
- const struct ir3_const_state *const_state = &v->const_state;
+ const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.driver_param;
if (v->constlen > offset) {
uint32_t vertex_params[IR3_DP_VS_COUNT] = {
@@ -635,7 +635,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
/* emit compute-shader driver-params: */
- const struct ir3_const_state *const_state = &v->const_state;
+ const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.driver_param;
if (v->constlen > offset) {
ring_wfi(ctx->batch, ring);