From 31c7e6c51db2e5be9e3375b1d1fa0bfdf4c35236 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 24 Nov 2014 12:53:40 +1000 Subject: r600g: merge the TXQ and BUFFER constant buffers (v1.1) We are using 1 more buffer than we have, although in the future the driver should just end up using one buffer in total probably, this is a good first step, it merges the txq cube array and buffer info constants on r600 and evergreen. This should in theory fix geom shader tests on r600. v1.1: fix comments from Glenn. Reviewed-by: Glenn Kennard Cc: "10.4 10.3" (cherry picked from commit 07ae69753c6818bcce5d4edaf2cca39c20e37f4c) Squashed with commit r600g: fix fallout from last patch I accidentally rebased from the wrong machine and missed some fixes that were on my r600 box. doh. this fixes a bunch of geom shader textureSize tests on rv635 from gpu reset to pass. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86760 Reported-by: wolput@onsneteindhoven.nl Cc: "10.4 10.3" Signed-off-by: Dave Airlie (cherry picked from commit b10ddf962f2ca09073a13ad19817bf7c9b158294) Squashed with commit r600g: make llvm code compile this time Actually compiling the code helps make it compile. Cc: "10.3 10.4" Signed-off-by: Dave Airlie (cherry picked from commit 91a827624c01d40613e97322632aaffe319540f1) --- src/gallium/drivers/r600/r600_llvm.c | 3 +- src/gallium/drivers/r600/r600_pipe.h | 16 ++--- src/gallium/drivers/r600/r600_shader.c | 18 ++++-- src/gallium/drivers/r600/r600_state_common.c | 97 ++++++++++++---------------- 4 files changed, 62 insertions(+), 72 deletions(-) diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index c19693a03e6..e8cae000080 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -23,7 +23,6 @@ #define CONSTANT_BUFFER_0_ADDR_SPACE 8 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_UCP_CONST_BUFFER) -#define CONSTANT_TXQ_BUFFER (CONSTANT_BUFFER_0_ADDR_SPACE + R600_TXQ_CONST_BUFFER) #define LLVM_R600_BUFFER_INFO_CONST_BUFFER \ (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER) @@ -690,7 +689,7 @@ static void llvm_emit_tex( if (emit_data->inst->Dst[0].Register.WriteMask & 4) { LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0); LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder, - llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER), + llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER), lp_build_const_int32(gallivm, 0), ""); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), ""); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 40b0328ea20..46b0a2de07d 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -44,20 +44,19 @@ #define R600_TRACE_CS_DWORDS 7 #define R600_MAX_USER_CONST_BUFFERS 13 -#define R600_MAX_DRIVER_CONST_BUFFERS 4 +#define R600_MAX_DRIVER_CONST_BUFFERS 3 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) /* start driver buffers after user buffers */ #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) -#define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) -#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) -#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3) -/* Currently R600_MAX_CONST_BUFFERS is too large, the hardware only has 16 buffers, but the driver is - * trying to use 17. Avoid accidentally aliasing with user UBOs for SAMPLE_POSITIONS by using an id<16. +#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) +#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) +/* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit + * of 16 const buffers. * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id. * - * Fixing this properly would require the driver to combine its buffers into a single hardware buffer, - * which would also allow supporting the d3d 11 mandated minimum of 15 user const buffers. + * In order to support d3d 11 mandated minimum of 15 user const buffers + * we'd have to squash all use cases into one driver buffer. */ #define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) @@ -316,7 +315,6 @@ struct r600_samplerview_state { uint32_t dirty_mask; uint32_t compressed_depthtex_mask; /* which textures are depth */ uint32_t compressed_colortex_mask; - boolean dirty_txq_constants; boolean dirty_buffer_constants; }; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 55fc463f644..eb6486c79d4 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -5035,8 +5035,9 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) alu.op = ALU_OP1_MOV; if (ctx->bc->chip_class >= EVERGREEN) { - alu.src[0].sel = 512 + (id / 4); - alu.src[0].chan = id % 4; + /* channel 0 or 2 of each word */ + alu.src[0].sel = 512 + (id / 2); + alu.src[0].chan = (id % 2) * 2; } else { /* r600 we have them at channel 2 of the second dword */ alu.src[0].sel = 512 + (id * 2) + 1; @@ -5697,9 +5698,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; - alu.src[0].sel = 512 + (id / 4); - alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER; - alu.src[0].chan = id % 4; + if (ctx->bc->chip_class >= EVERGREEN) { + /* channel 1 or 3 of each word */ + alu.src[0].sel = 512 + (id / 2); + alu.src[0].chan = ((id % 2) * 2) + 1; + } else { + /* r600 we have them at channel 2 of the second dword */ + alu.src[0].sel = 512 + (id * 2) + 1; + alu.src[0].chan = 2; + } + alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bytecode_add_alu(ctx->bc, &alu); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c3f21cb93ae..09d8952844f 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -649,7 +649,6 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, dst->views.dirty_mask |= new_mask; dst->views.compressed_depthtex_mask &= dst->views.enabled_mask; dst->views.compressed_colortex_mask &= dst->views.enabled_mask; - dst->views.dirty_txq_constants = TRUE; dst->views.dirty_buffer_constants = TRUE; r600_sampler_views_dirty(rctx, &dst->views); @@ -984,6 +983,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask * then in the shader, we AND the 4 components with 0xffffffff or 0, * then OR the alpha with the value given here. * We use a 6th constant to store the txq buffer size in + * we use 7th slot for number of cube layers in a cube map array. */ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_type) { @@ -1022,6 +1022,7 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty samplers->buffer_constants[offset + 4] = 0; samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6; } } @@ -1033,7 +1034,10 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty pipe_resource_reference(&cb.buffer, NULL); } -/* On evergreen we only need to store the buffer size for TXQ */ +/* On evergreen we store two values + * 1. buffer size for TXQ + * 2. number of cube layers in a cube map array. + */ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type) { struct r600_textures_info *samplers = &rctx->samplers[shader_type]; @@ -1048,12 +1052,16 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type samplers->views.dirty_buffer_constants = FALSE; bits = util_last_bit(samplers->views.enabled_mask); - array_size = bits * sizeof(uint32_t) * 4; + array_size = bits * 2 * sizeof(uint32_t) * 4; samplers->buffer_constants = realloc(samplers->buffer_constants, array_size); memset(samplers->buffer_constants, 0, array_size); - for (i = 0; i < bits; i++) - if (samplers->views.enabled_mask & (1 << i)) - samplers->buffer_constants[i] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + for (i = 0; i < bits; i++) { + if (samplers->views.enabled_mask & (1 << i)) { + uint32_t offset = i * 2; + samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6; + } + } cb.buffer = NULL; cb.user_buffer = samplers->buffer_constants; @@ -1063,35 +1071,6 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type pipe_resource_reference(&cb.buffer, NULL); } -static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int shader_type) -{ - struct r600_textures_info *samplers = &rctx->samplers[shader_type]; - int bits; - uint32_t array_size; - struct pipe_constant_buffer cb; - int i; - - if (!samplers->views.dirty_txq_constants) - return; - - samplers->views.dirty_txq_constants = FALSE; - - bits = util_last_bit(samplers->views.enabled_mask); - array_size = bits * sizeof(uint32_t) * 4; - samplers->txq_constants = realloc(samplers->txq_constants, array_size); - memset(samplers->txq_constants, 0, array_size); - for (i = 0; i < bits; i++) - if (samplers->views.enabled_mask & (1 << i)) - samplers->txq_constants[i] = samplers->views.views[i]->base.texture->array_size / 6; - - cb.buffer = NULL; - cb.user_buffer = samplers->txq_constants; - cb.buffer_offset = 0; - cb.buffer_size = array_size; - rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_TXQ_CONST_BUFFER, &cb); - pipe_resource_reference(&cb.buffer, NULL); -} - /* set sample xy locations as array of fragment shader constants */ void r600_set_sample_locations_constant_buffer(struct r600_context *rctx) { @@ -1175,7 +1154,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) struct pipe_context * ctx = (struct pipe_context*)rctx; bool ps_dirty = false, vs_dirty = false, gs_dirty = false; bool blend_disable; - + bool need_buf_const; if (!rctx->blitter->running) { unsigned i; @@ -1296,29 +1275,35 @@ static bool r600_update_derived_state(struct r600_context *rctx) /* on R600 we stuff masks + txq info into one constant buffer */ /* on evergreen we only need a txq info one */ - if (rctx->b.chip_class < EVERGREEN) { - if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers) - r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); - if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers) - r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); - if (rctx->gs_shader && rctx->gs_shader->current->shader.uses_tex_buffers) - r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY); - } else { - if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers) - eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); - if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers) - eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); - if (rctx->gs_shader && rctx->gs_shader->current->shader.uses_tex_buffers) - eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY); + if (rctx->ps_shader) { + need_buf_const = rctx->ps_shader->current->shader.uses_tex_buffers || rctx->ps_shader->current->shader.has_txq_cube_array_z_comp; + if (need_buf_const) { + if (rctx->b.chip_class < EVERGREEN) + r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); + else + eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); + } } + if (rctx->vs_shader) { + need_buf_const = rctx->vs_shader->current->shader.uses_tex_buffers || rctx->vs_shader->current->shader.has_txq_cube_array_z_comp; + if (need_buf_const) { + if (rctx->b.chip_class < EVERGREEN) + r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); + else + eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); + } + } - if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_cube_array_z_comp) - r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_FRAGMENT); - if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_cube_array_z_comp) - r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_VERTEX); - if (rctx->gs_shader && rctx->gs_shader->current->shader.has_txq_cube_array_z_comp) - r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_GEOMETRY); + if (rctx->gs_shader) { + need_buf_const = rctx->gs_shader->current->shader.uses_tex_buffers || rctx->gs_shader->current->shader.has_txq_cube_array_z_comp; + if (need_buf_const) { + if (rctx->b.chip_class < EVERGREEN) + r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY); + else + eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY); + } + } if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) { if (!r600_adjust_gprs(rctx)) { -- cgit v1.2.3