diff options
author | Rob Clark <[email protected]> | 2019-04-25 12:25:02 -0700 |
---|---|---|
committer | Rob Clark <[email protected]> | 2019-04-25 14:13:31 -0700 |
commit | ee2e3a07bb1d58d761bf7250e88822b6955d13bf (patch) | |
tree | 5b6634b9dc25befc6dbb1136c937186515dee3c1 /src | |
parent | 85949c52b493435efe22d40a12172c39b63f8d28 (diff) |
freedreno/ir3: sample-shading support
The compiler support for:
OES_sample_shading
OES_sample_variables
OES_shader_multisample_interpolation
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/freedreno/ir3/ir3.h | 3 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_compiler_nir.c | 97 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_context.h | 2 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_shader.c | 13 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_shader.h | 6 |
5 files changed, 113 insertions, 8 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 5b3544c3542..f3c25ea2792 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1311,6 +1311,9 @@ INSTR1(SQRT) /* cat5 instructions: */ INSTR1(DSX) INSTR1(DSY) +INSTR1F(3D, DSX) +INSTR1F(3D, DSY) +INSTR1(RGETPOS) static inline struct ir3_instruction * ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 73e7dd6b973..34fb7b0969d 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1127,6 +1127,55 @@ static void add_sysval_input(struct ir3_context *ctx, gl_system_value slot, add_sysval_input_compmask(ctx, slot, 0x1, instr); } +static struct ir3_instruction * +get_barycentric_centroid(struct ir3_context *ctx) +{ + if (!ctx->ij_centroid) { + struct ir3_instruction *xy[2]; + struct ir3_instruction *ij; + + ij = create_input_compmask(ctx, 0, 0x3); + ir3_split_dest(ctx->block, xy, ij, 0, 2); + + ctx->ij_centroid = ir3_create_collect(ctx, xy, 2); + + add_sysval_input_compmask(ctx, + SYSTEM_VALUE_BARYCENTRIC_CENTROID, + 0x3, ij); + } + + return ctx->ij_centroid; +} + +static struct ir3_instruction * +get_barycentric_sample(struct ir3_context *ctx) +{ + if (!ctx->ij_sample) { + struct ir3_instruction *xy[2]; + struct ir3_instruction *ij; + + ij = create_input_compmask(ctx, 0, 0x3); + ir3_split_dest(ctx->block, xy, ij, 0, 2); + + ctx->ij_sample = ir3_create_collect(ctx, xy, 2); + + add_sysval_input_compmask(ctx, + SYSTEM_VALUE_BARYCENTRIC_SAMPLE, + 0x3, ij); + } + + return ctx->ij_sample; +} + +static struct ir3_instruction * +get_barycentric_pixel(struct ir3_context *ctx) +{ + /* TODO when tgsi_to_nir supports "new-style" FS inputs switch + * this to create ij_pixel only on demand: + */ + return ctx->ij_pixel; +} + static void emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) { @@ -1168,13 +1217,40 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_ubo: emit_intrinsic_load_ubo(ctx, intr, dst); break; + case nir_intrinsic_load_sample_pos_from_id: { + /* NOTE: blob seems to always use TYPE_F16 and then cov.f16f32, + * but that doesn't seem necessary. + */ + struct ir3_instruction *offset = + ir3_RGETPOS(b, ir3_get_src(ctx, &intr->src[0])[0], 0); + offset->regs[0]->wrmask = 0x3; + offset->cat5.type = TYPE_F32; + + ir3_split_dest(b, dst, offset, 0, 2); + + break; + } + case nir_intrinsic_load_size_ir3: + if (!ctx->ij_size) { + ctx->ij_size = create_input(ctx, 0); + + add_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_SIZE, + ctx->ij_size); + } + dst[0] = ctx->ij_size; + break; case nir_intrinsic_load_barycentric_centroid: + ir3_split_dest(b, dst, get_barycentric_centroid(ctx), 0, 2); + break; + case nir_intrinsic_load_barycentric_sample: + if (ctx->so->key.msaa) { + ir3_split_dest(b, dst, get_barycentric_sample(ctx), 0, 2); + } else { + ir3_split_dest(b, dst, get_barycentric_pixel(ctx), 0, 2); + } + break; case nir_intrinsic_load_barycentric_pixel: - /* NOTE: we still pre-create ij_pixel just to keep things working with - * nir producers that create "old style" frag shader inputs (ie. just - * load_input, vs load_barycentric_* + load_interpolated_input) - */ - ir3_split_dest(b, dst, ctx->ij_pixel, 0, 2); + ir3_split_dest(b, dst, get_barycentric_pixel(ctx), 0, 2); break; case nir_intrinsic_load_interpolated_input: idx = nir_intrinsic_base(intr); @@ -1345,6 +1421,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) dst[0] = ctx->instance_id; break; case nir_intrinsic_load_sample_id: + ctx->so->per_samp = true; + /* fall-thru */ case nir_intrinsic_load_sample_id_no_per_sample: if (!ctx->samp_id) { ctx->samp_id = create_input(ctx, 0); @@ -2282,6 +2360,12 @@ setup_input(struct ir3_context *ctx, nir_variable *in) so->inputs[n].interpolate = in->data.interpolation; if (ctx->so->type == MESA_SHADER_FRAGMENT) { + + /* if any varyings have 'sample' qualifer, that triggers us + * to run in per-sample mode: + */ + so->per_samp |= in->data.sample; + for (int i = 0; i < ncomp; i++) { struct ir3_instruction *instr = NULL; unsigned idx = (n * 4) + i + frac; @@ -2457,6 +2541,9 @@ setup_output(struct ir3_context *ctx, nir_variable *out) case FRAG_RESULT_COLOR: so->color0_mrt = 1; break; + case FRAG_RESULT_SAMPLE_MASK: + so->writes_smask = true; + break; default: if (slot >= FRAG_RESULT_DATA0) break; diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index 8c1dc45e42b..c3e16ba37fa 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -65,7 +65,7 @@ struct ir3_context { * inputs. So we do all the input tracking normally and fix * things up after compile_instructions() */ - struct ir3_instruction *ij_pixel; + struct ir3_instruction *ij_pixel, *ij_sample, *ij_centroid, *ij_size; /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */ struct ir3_instruction *frag_face, *frag_coord; diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index fa4f432e606..3f8e8abdc08 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -262,8 +262,15 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir) NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, (nir_lower_io_options)0); - if (nir->info.stage == MESA_SHADER_FRAGMENT) + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + /* NOTE: lower load_barycentric_at_sample first, since it + * produces load_barycentric_at_offset: + */ + NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_sample); + NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_offset); + NIR_PASS_V(nir, ir3_nir_move_varying_inputs); + } NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); @@ -409,6 +416,10 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) case MESA_SHADER_FRAGMENT: dump_reg(out, "pos (ij_pixel)", ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PIXEL)); + dump_reg(out, "pos (ij_centroid)", + ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_CENTROID)); + dump_reg(out, "pos (ij_size)", + ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_SIZE)); dump_output(out, so, FRAG_RESULT_DEPTH, "posz"); if (so->color0_mrt) { dump_output(out, so, FRAG_RESULT_COLOR, "color"); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index b3481c12990..7f09ee5312f 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -154,6 +154,8 @@ struct ir3_shader_key { /* * Fragment shader variant parameters: */ + unsigned sample_shading : 1; + unsigned msaa : 1; unsigned color_two_side : 1; unsigned half_precision : 1; /* used when shader needs to handle flat varyings (a4xx) @@ -389,7 +391,7 @@ struct ir3_shader_variant { uint8_t slot; uint8_t regid; } outputs[16 + 2]; /* +POSITION +PSIZE */ - bool writes_pos, writes_psize; + bool writes_pos, writes_smask, writes_psize; /* attributes (VS) / varyings (FS): * Note that sysval's should come *after* normal inputs. @@ -439,6 +441,8 @@ struct ir3_shader_variant { /* do we have kill, image write, etc (which prevents early-z): */ bool no_earlyz; + bool per_samp; + /* Layout of constant registers, each section (in vec4). Pointer size * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the * UBO and stream-out consts. |