diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 96 |
3 files changed, 61 insertions, 68 deletions
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 40b0328ea20..da959dd537b 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -44,20 +44,19 @@ #define R600_TRACE_CS_DWORDS 7 #define R600_MAX_USER_CONST_BUFFERS 13 -#define R600_MAX_DRIVER_CONST_BUFFERS 4 +#define R600_MAX_DRIVER_CONST_BUFFERS 3 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) /* start driver buffers after user buffers */ #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) -#define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) -#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) -#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3) -/* Currently R600_MAX_CONST_BUFFERS is too large, the hardware only has 16 buffers, but the driver is - * trying to use 17. Avoid accidentally aliasing with user UBOs for SAMPLE_POSITIONS by using an id<16. +#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) +#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) +/* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit + * of 16 const buffers. * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id. * - * Fixing this properly would require the driver to combine its buffers into a single hardware buffer, - * which would also allow supporting the d3d 11 mandated minimum of 15 user const buffers. + * In order to support d3d 11 mandated minimum of 15 user const buffers + * we'd have to squash all use cases into one driver buffer. */ #define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 7a2609a2c1d..7bb98728a99 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -5035,8 +5035,9 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) alu.op = ALU_OP1_MOV; if (ctx->bc->chip_class >= EVERGREEN) { - alu.src[0].sel = 512 + (id / 4); - alu.src[0].chan = id % 4; + /* channel 0 or 2 of each word */ + alu.src[0].sel = 512 + (id / 2); + alu.src[0].chan = (id % 2) * 2; } else { /* r600 we have them at channel 2 of the second dword */ alu.src[0].sel = 512 + (id * 2) + 1; @@ -5697,9 +5698,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; - alu.src[0].sel = 512 + (id / 4); - alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER; - alu.src[0].chan = id % 4; + if (ctx->bc->chip_class >= EVERGREEN) { + /* channel 1 or 3 of each word */ + alu.src[0].sel = 512 + (id / 2); + alu.src[0].chan = ((id % 2) * 2) + 1; + } else { + /* r600 we have them at channel 2 of the second dword */ + alu.src[0].sel = 512 + (id * 2) + 1; + alu.src[0].chan = 2; + } + alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bytecode_add_alu(ctx->bc, &alu); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c3f21cb93ae..d35c9b363ef 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -984,6 +984,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask * then in the shader, we AND the 4 components with 0xffffffff or 0, * then OR the alpha with the value given here. * We use a 6th constant to store the txq buffer size in + * we use 7th slot for number of cube layers in a cube map array. */ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_type) { @@ -1022,6 +1023,7 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty samplers->buffer_constants[offset + 4] = 0; samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6; } } @@ -1033,7 +1035,10 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty pipe_resource_reference(&cb.buffer, NULL); } -/* On evergreen we only need to store the buffer size for TXQ */ +/* On evergreen we store two values + * 1. buffer size for TXQ + * 2. number of cube layers in a cube map array. + */ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type) { struct r600_textures_info *samplers = &rctx->samplers[shader_type]; @@ -1048,12 +1053,16 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type samplers->views.dirty_buffer_constants = FALSE; bits = util_last_bit(samplers->views.enabled_mask); - array_size = bits * sizeof(uint32_t) * 4; + array_size = bits * 2 * sizeof(uint32_t) * 4; samplers->buffer_constants = realloc(samplers->buffer_constants, array_size); memset(samplers->buffer_constants, 0, array_size); - for (i = 0; i < bits; i++) - if (samplers->views.enabled_mask & (1 << i)) - samplers->buffer_constants[i] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + for (i = 0; i < bits; i++) { + if (samplers->views.enabled_mask & (1 << i)) { + uint32_t offset = i * 2; + samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6; + } + } cb.buffer = NULL; cb.user_buffer = samplers->buffer_constants; @@ -1063,35 +1072,6 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type pipe_resource_reference(&cb.buffer, NULL); } -static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int shader_type) -{ - struct r600_textures_info *samplers = &rctx->samplers[shader_type]; - int bits; - uint32_t array_size; - struct pipe_constant_buffer cb; - int i; - - if (!samplers->views.dirty_txq_constants) - return; - - samplers->views.dirty_txq_constants = FALSE; - - bits = util_last_bit(samplers->views.enabled_mask); - array_size = bits * sizeof(uint32_t) * 4; - samplers->txq_constants = realloc(samplers->txq_constants, array_size); - memset(samplers->txq_constants, 0, array_size); - for (i = 0; i < bits; i++) - if (samplers->views.enabled_mask & (1 << i)) - samplers->txq_constants[i] = samplers->views.views[i]->base.texture->array_size / 6; - - cb.buffer = NULL; - cb.user_buffer = samplers->txq_constants; - cb.buffer_offset = 0; - cb.buffer_size = array_size; - rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_TXQ_CONST_BUFFER, &cb); - pipe_resource_reference(&cb.buffer, NULL); -} - /* set sample xy locations as array of fragment shader constants */ void r600_set_sample_locations_constant_buffer(struct r600_context *rctx) { @@ -1175,7 +1155,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) struct pipe_context * ctx = (struct pipe_context*)rctx; bool ps_dirty = false, vs_dirty = false, gs_dirty = false; bool blend_disable; - + bool need_buf_const; if (!rctx->blitter->running) { unsigned i; @@ -1296,29 +1276,35 @@ static bool r600_update_derived_state(struct r600_context *rctx) /* on R600 we stuff masks + txq info into one constant buffer */ /* on evergreen we only need a txq info one */ - if (rctx->b.chip_class < EVERGREEN) { - if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers) - r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); - if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers) - r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); - if (rctx->gs_shader && rctx->gs_shader->current->shader.uses_tex_buffers) - r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY); - } else { - if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers) - eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); - if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers) - eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); - if (rctx->gs_shader && rctx->gs_shader->current->shader.uses_tex_buffers) - eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY); + if (rctx->ps_shader) { + need_buf_const = rctx->ps_shader->current->shader.uses_tex_buffers || rctx->ps_shader->current->shader.has_txq_cube_array_z_comp; + if (need_buf_const) { + if (rctx->b.chip_class < EVERGREEN) + r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); + else + eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); + } } + if (rctx->vs_shader) { + need_buf_const = rctx->vs_shader->current->shader.uses_tex_buffers || rctx->vs_shader->current->shader.has_txq_cube_array_z_comp; + if (need_buf_const) { + if (rctx->b.chip_class < EVERGREEN) + r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); + else + eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); + } + } - if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_cube_array_z_comp) - r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_FRAGMENT); - if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_cube_array_z_comp) - r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_VERTEX); - if (rctx->gs_shader && rctx->gs_shader->current->shader.has_txq_cube_array_z_comp) - r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_GEOMETRY); + if (rctx->gs_shader) { + need_buf_const = rctx->gs_shader->current->shader.uses_tex_buffers || rctx->gs_shader->current->shader.has_txq_cube_array_z_comp; + if (need_buf_const) { + if (rctx->b.chip_class < EVERGREEN) + r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY); + else + eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY); + } + } if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) { if (!r600_adjust_gprs(rctx)) { |