summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorMichel Dänzer <[email protected]>2012-08-22 18:15:36 +0200
committerMichel Dänzer <[email protected]>2012-08-27 11:51:56 +0200
commitf402acdbe244e5de9b2b616e0a908f5d1416ce89 (patch)
tree433ca9645a319209eba7dab3122cc6f3aec53626 /src/gallium
parent26c7139d2c594edbe33cbbd5f786988a529389c8 (diff)
radeonsi: Use FP16 shader export format when necessary / possible.
Fixes piglit fbo-blending-formats. Signed-off-by: Michel Dänzer <[email protected]> Reviewed-by: Tom Stellard <[email protected]> Reviewed-by: Christian König <[email protected]> Reviewed-by: Alex Deucher <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeon/SIInstructions.td4
-rw-r--r--src/gallium/drivers/radeon/SIIntrinsics.td1
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_pipe.h3
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_shader.c51
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c69
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c4
6 files changed, 114 insertions, 18 deletions
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
index f09d6042457..304732178c6 100644
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -726,7 +726,9 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
-////def V_CVT_PKRTZ_F16_F32 : VOP2_F16 <0x0000002f, "V_CVT_PKRTZ_F16_F32", []>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
+ [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))]
+>;
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td
index 6eadc94458f..b9544f10687 100644
--- a/src/gallium/drivers/radeon/SIIntrinsics.td
+++ b/src/gallium/drivers/radeon/SIIntrinsics.td
@@ -14,6 +14,7 @@
let TargetPrefix = "SI", isTarget = 1 in {
+ def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
/* XXX: We may need a seperate intrinsic here for loading integer values */
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index 989bb49cbee..099b50916f6 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -134,7 +134,8 @@ struct r600_context {
unsigned saved_render_cond_mode;
/* shader information */
unsigned sprite_coord_enable;
- boolean export_16bpc;
+ unsigned export_16bpc;
+ unsigned spi_shader_col_format;
unsigned alpha_ref;
boolean alpha_ref_dirty;
struct r600_textures_info vs_samplers;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index fd614dde388..98866c4ee1f 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -390,13 +390,47 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
unsigned compressed = 0;
unsigned chan;
- for (chan = 0; chan < 4; chan++ ) {
- LLVMValueRef out_ptr =
- si_shader_ctx->radeon_bld.soa.outputs[index][chan];
- /* +5 because the first output value will be
- * the 6th argument to the intrinsic. */
- args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
- out_ptr, "");
+ if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ int cbuf = target - V_008DFC_SQ_EXP_MRT;
+
+ if (cbuf >= 0 && cbuf < 8) {
+ struct r600_context *rctx = si_shader_ctx->rctx;
+ compressed = (rctx->export_16bpc >> cbuf) & 0x1;
+ }
+ }
+
+ if (compressed) {
+ /* Pixel shader needs to pack output values before export */
+ for (chan = 0; chan < 2; chan++ ) {
+ LLVMValueRef *out_ptr =
+ si_shader_ctx->radeon_bld.soa.outputs[index];
+ args[0] = LLVMBuildLoad(base->gallivm->builder,
+ out_ptr[2 * chan], "");
+ args[1] = LLVMBuildLoad(base->gallivm->builder,
+ out_ptr[2 * chan + 1], "");
+ args[chan + 5] =
+ build_intrinsic(base->gallivm->builder,
+ "llvm.SI.packf16",
+ LLVMInt32TypeInContext(base->gallivm->context),
+ args, 2,
+ LLVMReadNoneAttribute);
+ args[chan + 7] = args[chan + 5];
+ }
+
+ /* Set COMPR flag */
+ args[4] = uint->one;
+ } else {
+ for (chan = 0; chan < 4; chan++ ) {
+ LLVMValueRef out_ptr =
+ si_shader_ctx->radeon_bld.soa.outputs[index][chan];
+ /* +5 because the first output value will be
+ * the 6th argument to the intrinsic. */
+ args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
+ out_ptr, "");
+ }
+
+ /* Clear COMPR flag */
+ args[4] = uint->zero;
}
/* XXX: This controls which components of the output
@@ -415,9 +449,6 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
/* Specify the target we are exporting */
args[3] = lp_build_const_int32(base->gallivm, target);
- /* Set COMPR flag */
- args[4] = uint->zero;
-
/* XXX: We probably need to keep track of the output
* values, so we know what we are passing to the next
* stage. */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 5c2e7434ba3..fced24cc4ec 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -996,6 +996,53 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
}
}
+/* Returns the size in bits of the widest component of a CB format */
+static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
+{
+ switch(colorformat) {
+ case V_028C70_COLOR_4_4_4_4:
+ return 4;
+
+ case V_028C70_COLOR_1_5_5_5:
+ case V_028C70_COLOR_5_5_5_1:
+ return 5;
+
+ case V_028C70_COLOR_5_6_5:
+ return 6;
+
+ case V_028C70_COLOR_8:
+ case V_028C70_COLOR_8_8:
+ case V_028C70_COLOR_8_8_8_8:
+ return 8;
+
+ case V_028C70_COLOR_10_10_10_2:
+ case V_028C70_COLOR_2_10_10_10:
+ return 10;
+
+ case V_028C70_COLOR_10_11_11:
+ case V_028C70_COLOR_11_11_10:
+ return 11;
+
+ case V_028C70_COLOR_16:
+ case V_028C70_COLOR_16_16:
+ case V_028C70_COLOR_16_16_16_16:
+ return 16;
+
+ case V_028C70_COLOR_8_24:
+ case V_028C70_COLOR_24_8:
+ return 24;
+
+ case V_028C70_COLOR_32:
+ case V_028C70_COLOR_32_32:
+ case V_028C70_COLOR_32_32_32_32:
+ case V_028C70_COLOR_X24_8_32_FLOAT:
+ return 32;
+ }
+
+ assert(!"Unknown maximum component size");
+ return 0;
+}
+
static uint32_t si_translate_dbformat(enum pipe_format format)
{
switch (format) {
@@ -1409,6 +1456,7 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
const struct util_format_description *desc;
int i;
unsigned blend_clamp = 0, blend_bypass = 0;
+ unsigned max_comp_size;
surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
@@ -1549,6 +1597,17 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
}
si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info);
si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib);
+
+ /* Determine pixel shader export format */
+ max_comp_size = si_colorformat_max_comp_size(format);
+ if (ntype == V_028C70_NUMBER_SRGB ||
+ ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
+ max_comp_size <= 10) ||
+ (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
+ rctx->export_16bpc |= 1 << cb;
+ rctx->spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * cb);
+ } else
+ rctx->spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * cb);
}
static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
@@ -1667,9 +1726,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
/* build states */
rctx->have_depth_fb = 0;
+ rctx->export_16bpc = 0;
+ rctx->spi_shader_col_format = 0;
for (int i = 0; i < state->nr_cbufs; i++) {
si_cb(rctx, pm4, state, i);
}
+ assert(!(rctx->export_16bpc & ~0xff));
si_db(rctx, pm4, state);
shader_mask = 0;
@@ -1706,6 +1768,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000);
si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader_mask);
+ si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
+ rctx->spi_shader_col_format);
si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0x00000000);
si_pm4_set_state(rctx, framebuffer, pm4);
@@ -1727,9 +1791,10 @@ static INLINE unsigned si_shader_selector_key(struct pipe_context *ctx,
if (sel->type == PIPE_SHADER_FRAGMENT) {
if (sel->fs_write_all)
key |= rctx->framebuffer.nr_cbufs;
+ key |= rctx->export_16bpc << 4;
/*if (rctx->queued.named.rasterizer)
- key |= rctx->queued.named.rasterizer->flatshade << 4;*/
- /*key |== rctx->two_side << 5;*/
+ key |= rctx->queued.named.rasterizer->flatshade << 12;*/
+ /*key |== rctx->two_side << 13;*/
}
return key;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 95821dc5f5c..5f8e2118a15 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -186,10 +186,6 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s
/* XXX: Depends on Z buffer format? */
si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, 0);
- /* XXX: Depends on color buffer format? */
- si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
- S_028714_COL0_EXPORT_FORMAT(V_028714_SPI_SHADER_32_ABGR));
-
va = r600_resource_va(ctx->screen, (void *)shader->bo);
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);