diff options
Diffstat (limited to 'src/gallium/winsys')
-rw-r--r-- | src/gallium/winsys/r600/drm/evergreen_hw_context.c | 142 | ||||
-rw-r--r-- | src/gallium/winsys/r600/drm/r600_hw_context.c | 397 | ||||
-rw-r--r-- | src/gallium/winsys/r600/drm/r600_priv.h | 4 |
3 files changed, 314 insertions, 229 deletions
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index cf8ae5185b4..3cf41c1f9f9 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -43,31 +43,31 @@ static const struct r600_reg evergreen_config_reg_list[] = { {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, {R_008A14_PA_CL_ENHANCE, 0, 0, 0}, - {R_008C00_SQ_CONFIG, 0, 0, 0}, - {R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0}, - {R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008C0C_SQ_THREAD_RESOURCE_MGMT, 0, 0, 0}, - {R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0, 0, 0}, - {R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0, 0, 0}, - {R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008C28_SQ_STACK_RESOURCE_MGMT_3, 0, 0, 0}, - {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0}, - {R_009100_SPI_CONFIG_CNTL, 0, 0, 0}, - {R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0}, + {R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C08_SQ_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C0C_SQ_THREAD_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C18_SQ_THREAD_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C20_SQ_STACK_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C24_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C28_SQ_STACK_RESOURCE_MGMT_3, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, }; static const struct r600_reg cayman_config_reg_list[] = { {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, {R_008A14_PA_CL_ENHANCE, 0, 0, 0}, - {R_008C00_SQ_CONFIG, 0, 0, 0}, - {R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0}, - {CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0, 0}, - {CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0}, - {R_009100_SPI_CONFIG_CNTL, 0, 0, 0}, - {R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0}, + {R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, }; static const struct r600_reg evergreen_ctl_const_list[] = { @@ -817,7 +817,7 @@ static const struct r600_reg cayman_context_reg_list[] = { }; /* SHADER RESOURCE R600/R700 */ -static int evergreen_state_resource_init(struct r600_context *ctx, u32 offset) +static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride) { struct r600_reg r600_shader_resource[] = { {R_030000_RESOURCE0_WORD0, 0, 0, 0}, @@ -831,10 +831,7 @@ static int evergreen_state_resource_init(struct r600_context *ctx, u32 offset) }; unsigned nreg = Elements(r600_shader_resource); - for (int i = 0; i < nreg; i++) { - r600_shader_resource[i].offset += offset; - } - return r600_context_add_block(ctx, r600_shader_resource, nreg, PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET); + return r600_resource_init(ctx, range, offset, nblocks, stride, r600_shader_resource, nreg, EVERGREEN_RESOURCE_OFFSET); } /* SHADER SAMPLER R600/R700 */ @@ -907,6 +904,10 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) ctx->radeon = radeon; LIST_INITHEAD(&ctx->query_list); + /* init dirty list */ + LIST_INITHEAD(&ctx->dirty); + LIST_INITHEAD(&ctx->enable_list); + ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range)); if (!ctx->range) { r = -ENOMEM; @@ -960,24 +961,19 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; } - /* PS RESOURCE */ - for (int j = 0, offset = 0; j < 176; j++, offset += 0x20) { - r = evergreen_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* VS RESOURCE */ - for (int j = 0, offset = 0x1600; j < 160; j++, offset += 0x20) { - r = evergreen_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* FS RESOURCE */ - for (int j = 0, offset = 0x7C00; j < 16; j++, offset += 0x20) { - r = evergreen_state_resource_init(ctx, offset); - if (r) - goto out_err; - } + + ctx->num_ps_resources = 176; + ctx->num_vs_resources = 160; + ctx->num_fs_resources = 16; + r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 176, 0x20); + if (r) + goto out_err; + r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1600, 160, 0x20); + if (r) + goto out_err; + r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x7C00, 16, 0x20); + if (r) + goto out_err; /* PS loop const */ evergreen_loop_const_init(ctx, 0); @@ -1015,33 +1011,31 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) LIST_INITHEAD(&ctx->fenced_bo); - /* init dirty list */ - LIST_INITHEAD(&ctx->dirty); return 0; out_err: r600_context_fini(ctx); return r; } -void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x20 * rid; + struct r600_block *block = ctx->ps_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } -void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x1600 + 0x20 * rid; + struct r600_block *block = ctx->vs_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } -void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x7C00 + 0x20 * rid; + struct r600_block *block = ctx->fs_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } static inline void evergreen_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) @@ -1056,6 +1050,7 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } dirty = block->status & R600_BLOCK_STATUS_DIRTY; @@ -1066,8 +1061,8 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context block->reg[i] = state->regs[i].value; } } - - r600_context_dirty_block(ctx, block, dirty, 2); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, 2); } static inline void evergreen_context_ps_partial_flush(struct r600_context *ctx) @@ -1094,6 +1089,7 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } if (state->nregs <= 3) { @@ -1119,7 +1115,8 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c if (dirty & R600_BLOCK_STATUS_DIRTY) evergreen_context_ps_partial_flush(ctx); - r600_context_dirty_block(ctx, block, dirty, 4); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, 4); } void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) @@ -1146,6 +1143,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr unsigned ndwords = 7; struct r600_block *dirty_block = NULL; struct r600_block *next_block; + uint32_t *pm4; if (draw->indices) { ndwords = 11; @@ -1187,24 +1185,26 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr } /* draw packet */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; + pm4 = &ctx->pm4[ctx->pm4_cdwords]; + pm4[0] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); + pm4[1] = draw->vgt_index_type; + pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); + pm4[3] = draw->vgt_num_instances; if (draw->indices) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); + pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); + pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + pm4[6] = 0; + pm4[7] = draw->vgt_num_indices; + pm4[8] = draw->vgt_draw_initiator; + pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); + pm4[10] = 0; + r600_context_bo_reloc(ctx, &pm4[10], draw->indices); } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); + pm4[5] = draw->vgt_num_indices; + pm4[6] = draw->vgt_draw_initiator; } + ctx->pm4_cdwords += ndwords; ctx->flags |= (R600_CONTEXT_DRAW_PENDING | R600_CONTEXT_DST_CACHES_DIRTY); diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index a3c8945a722..cb244f2b9ee 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -79,6 +79,66 @@ static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsig } } +static void r600_init_block(struct r600_context *ctx, + struct r600_block *block, + const struct r600_reg *reg, int index, int nreg, + unsigned opcode, unsigned offset_base) +{ + int i = index; + int j, n = nreg; + + /* initialize block */ + block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */ + block->start_offset = reg[i].offset; + block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0); + block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2; + block->reg = &block->pm4[block->pm4_ndwords]; + block->pm4_ndwords += n; + block->nreg = n; + block->nreg_dirty = n; + block->flags = 0; + LIST_INITHEAD(&block->list); + LIST_INITHEAD(&block->enable_list); + + for (j = 0; j < n; j++) { + if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) { + block->flags |= REG_FLAG_DIRTY_ALWAYS; + } + if (reg[i+j].flags & REG_FLAG_ENABLE_ALWAYS) { + block->status |= R600_BLOCK_STATUS_ENABLED; + LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); + } + + if (reg[i+j].flags & REG_FLAG_NEED_BO) { + block->nbo++; + assert(block->nbo < R600_BLOCK_MAX_BO); + block->pm4_bo_index[j] = block->nbo; + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); + block->pm4[block->pm4_ndwords++] = 0x00000000; + if (reg[i+j].flags & REG_FLAG_RV6XX_SBU) { + block->reloc[block->nbo].flush_flags = 0; + block->reloc[block->nbo].flush_mask = 0; + } else { + block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; + block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; + } + block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1; + } + if ((ctx->radeon->family > CHIP_R600) && + (ctx->radeon->family < CHIP_RV770) && reg[i+j].flags & REG_FLAG_RV6XX_SBU) { + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); + block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags; + } + } + for (j = 0; j < n; j++) { + if (reg[i+j].flush_flags) { + block->pm4_flush_ndwords += 7; + } + } + /* check that we stay in limit */ + assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); +} + int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, unsigned opcode, unsigned offset_base) { @@ -87,8 +147,6 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, int offset; for (unsigned i = 0, n = 0; i < nreg; i += n) { - u32 j; - /* ignore new block balise */ if (reg[i].offset == GROUP_FORCE_NEW_BLOCK) { n = 1; @@ -131,50 +189,8 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, range->blocks[CTX_BLOCK_ID(reg[i + j].offset)] = block; } - /* initialize block */ - block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */ - block->start_offset = reg[i].offset; - block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0); - block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2; - block->reg = &block->pm4[block->pm4_ndwords]; - block->pm4_ndwords += n; - block->nreg = n; - block->nreg_dirty = n; - block->flags = 0; - LIST_INITHEAD(&block->list); - - for (j = 0; j < n; j++) { - if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) { - block->flags |= REG_FLAG_DIRTY_ALWAYS; - } - if (reg[i+j].flags & REG_FLAG_NEED_BO) { - block->nbo++; - assert(block->nbo < R600_BLOCK_MAX_BO); - block->pm4_bo_index[j] = block->nbo; - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); - block->pm4[block->pm4_ndwords++] = 0x00000000; - if (reg[i+j].flags & REG_FLAG_RV6XX_SBU) { - block->reloc[block->nbo].flush_flags = 0; - block->reloc[block->nbo].flush_mask = 0; - } else { - block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; - block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; - } - block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1; - } - if ((ctx->radeon->family > CHIP_R600) && - (ctx->radeon->family < CHIP_RV770) && reg[i+j].flags & REG_FLAG_RV6XX_SBU) { - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); - block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags; - } - } - for (j = 0; j < n; j++) { - if (reg[i+j].flush_flags) { - block->pm4_flush_ndwords += 7; - } - } - /* check that we stay in limit */ - assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); + r600_init_block(ctx, block, reg, i, n, opcode, offset_base); + } return 0; } @@ -552,7 +568,31 @@ static const struct r600_reg r600_context_reg_list[] = { }; /* SHADER RESOURCE R600/R700 */ -static int r600_state_resource_init(struct r600_context *ctx, u32 offset) +int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base) +{ + int i; + struct r600_block *block; + range->blocks = calloc(nblocks, sizeof(struct r600_block *)); + if (range->blocks == NULL) + return -ENOMEM; + + reg[0].offset += offset; + for (i = 0; i < nblocks; i++) { + block = calloc(1, sizeof(struct r600_block)); + if (block == NULL) { + return -ENOMEM; + } + ctx->nblocks++; + range->blocks[i] = block; + r600_init_block(ctx, block, reg, 0, nreg, PKT3_SET_RESOURCE, offset_base); + + reg[0].offset += stride; + } + return 0; +} + + +static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride) { struct r600_reg r600_shader_resource[] = { {R_038000_RESOURCE0_WORD0, 0, 0, 0}, @@ -565,10 +605,7 @@ static int r600_state_resource_init(struct r600_context *ctx, u32 offset) }; unsigned nreg = Elements(r600_shader_resource); - for (int i = 0; i < nreg; i++) { - r600_shader_resource[i].offset += offset; - } - return r600_context_add_block(ctx, r600_shader_resource, nreg, PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET); + return r600_resource_init(ctx, range, offset, nblocks, stride, r600_shader_resource, nreg, R600_RESOURCE_OFFSET); } /* SHADER SAMPLER R600/R700 */ @@ -630,6 +667,22 @@ static void r600_context_clear_fenced_bo(struct r600_context *ctx) } } +static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks) +{ + struct r600_block *block; + int i; + for (i = 0; i < nblocks; i++) { + block = range->blocks[i]; + if (block) { + for (int k = 1; k <= block->nbo; k++) + r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); + free(block); + } + } + free(range->blocks); + +} + /* initialize */ void r600_context_fini(struct r600_context *ctx) { @@ -654,6 +707,9 @@ void r600_context_fini(struct r600_context *ctx) } free(ctx->range[i].blocks); } + r600_free_resource_range(ctx, &ctx->ps_resources, ctx->num_ps_resources); + r600_free_resource_range(ctx, &ctx->vs_resources, ctx->num_vs_resources); + r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources); free(ctx->range); free(ctx->blocks); free(ctx->reloc); @@ -664,13 +720,26 @@ void r600_context_fini(struct r600_context *ctx) memset(ctx, 0, sizeof(struct r600_context)); } +static void r600_add_resource_block(struct r600_context *ctx, struct r600_range *range, int num_blocks, int *index) +{ + int c = *index; + for (int j = 0; j < num_blocks; j++) { + if (!range->blocks[j]) + continue; + + ctx->blocks[c++] = range->blocks[j]; + } + *index = c; +} + int r600_setup_block_table(struct r600_context *ctx) { /* setup block table */ + int c = 0; ctx->blocks = calloc(ctx->nblocks, sizeof(void*)); if (!ctx->blocks) return -ENOMEM; - for (int i = 0, c = 0; i < NUM_RANGES; i++) { + for (int i = 0; i < NUM_RANGES; i++) { if (!ctx->range[i].blocks) continue; for (int j = 0, add; j < (1 << HASH_SHIFT); j++) { @@ -691,6 +760,10 @@ int r600_setup_block_table(struct r600_context *ctx) } } } + + r600_add_resource_block(ctx, &ctx->ps_resources, ctx->num_ps_resources, &c); + r600_add_resource_block(ctx, &ctx->vs_resources, ctx->num_vs_resources, &c); + r600_add_resource_block(ctx, &ctx->fs_resources, ctx->num_fs_resources, &c); return 0; } @@ -702,6 +775,10 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) ctx->radeon = radeon; LIST_INITHEAD(&ctx->query_list); + /* init dirty list */ + LIST_INITHEAD(&ctx->dirty); + LIST_INITHEAD(&ctx->enable_list); + ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range)); if (!ctx->range) { r = -ENOMEM; @@ -747,24 +824,19 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; } - /* PS RESOURCE */ - for (int j = 0, offset = 0; j < 160; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* VS RESOURCE */ - for (int j = 0, offset = 0x1180; j < 160; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* FS RESOURCE */ - for (int j = 0, offset = 0x2300; j < 16; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } + + ctx->num_ps_resources = 160; + ctx->num_vs_resources = 160; + ctx->num_fs_resources = 16; + r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 160, 0x1c); + if (r) + goto out_err; + r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1180, 160, 0x1c); + if (r) + goto out_err; + r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x2300, 16, 0x1c); + if (r) + goto out_err; /* PS loop const */ r600_loop_const_init(ctx, 0); @@ -800,9 +872,6 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) LIST_INITHEAD(&ctx->fenced_bo); - /* init dirty list */ - LIST_INITHEAD(&ctx->dirty); - ctx->max_db = 4; return 0; @@ -920,20 +989,24 @@ void r600_context_reg(struct r600_context *ctx, dirty |= R600_BLOCK_STATUS_DIRTY; block->reg[id] = new_val; } - r600_context_dirty_block(ctx, block, dirty, id); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, id); } -void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block, +void r600_context_dirty_block(struct r600_context *ctx, + struct r600_block *block, int dirty, int index) { - if (dirty && (index + 1) > block->nreg_dirty) + if ((index + 1) > block->nreg_dirty) block->nreg_dirty = index + 1; if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { - - block->status |= R600_BLOCK_STATUS_ENABLED; block->status |= R600_BLOCK_STATUS_DIRTY; ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); + } LIST_ADDTAIL(&block->list,&ctx->dirty); } } @@ -970,20 +1043,18 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat dirty |= R600_BLOCK_STATUS_DIRTY; } - r600_context_dirty_block(ctx, block, dirty, id); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, id); } } -void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, struct r600_block *block) { - struct r600_range *range; - struct r600_block *block; int i; int dirty; int num_regs = ctx->radeon->chip_class >= EVERGREEN ? 8 : 7; + boolean is_vertex; - range = &ctx->range[CTX_RANGE_ID(offset)]; - block = range->blocks[CTX_BLOCK_ID(offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); if (block->reloc[1].bo) @@ -992,15 +1063,17 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } + is_vertex = ((state->val[num_regs-1] & 0xc0000000) == 0xc0000000); dirty = block->status & R600_BLOCK_STATUS_DIRTY; for (i = 0; i < num_regs; i++) { - if (block->reg[i] != state->regs[i].value) { + if (dirty || (block->reg[i] != state->val[i])) { dirty |= R600_BLOCK_STATUS_DIRTY; - block->reg[i] = state->regs[i].value; + block->reg[i] = state->val[i]; } } @@ -1009,64 +1082,65 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ dirty |= R600_BLOCK_STATUS_DIRTY; if (!dirty) { - if (state->regs[0].bo) { - if ((block->reloc[1].bo->bo->handle != state->regs[0].bo->bo->handle) || - (block->reloc[2].bo->bo->handle != state->regs[0].bo->bo->handle)) + if (is_vertex) { + if ((block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) || + (block->reloc[2].bo->bo->handle != state->bo[0]->bo->handle)) dirty |= R600_BLOCK_STATUS_DIRTY; } else { - if ((block->reloc[1].bo->bo->handle != state->regs[2].bo->bo->handle) || - (block->reloc[2].bo->bo->handle != state->regs[3].bo->bo->handle)) + if ((block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) || + (block->reloc[2].bo->bo->handle != state->bo[1]->bo->handle)) dirty |= R600_BLOCK_STATUS_DIRTY; } } if (!dirty) { - if (state->regs[0].bo) - state->regs[0].bo->fence = ctx->radeon->fence; + if (is_vertex) + state->bo[0]->fence = ctx->radeon->fence; else { - state->regs[2].bo->fence = ctx->radeon->fence; - state->regs[3].bo->fence = ctx->radeon->fence; + state->bo[0]->fence = ctx->radeon->fence; + state->bo[1]->fence = ctx->radeon->fence; } } else { r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); - if (state->regs[0].bo) { + if (is_vertex) { /* VERTEX RESOURCE, we preted there is 2 bo to relocate so * we have single case btw VERTEX & TEXTURE resource */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo); - state->regs[0].bo->fence = ctx->radeon->fence; + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[0]); + state->bo[0]->fence = ctx->radeon->fence; } else { /* TEXTURE RESOURCE */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); - state->regs[2].bo->fence = ctx->radeon->fence; - state->regs[3].bo->fence = ctx->radeon->fence; - state->regs[2].bo->bo->binding |= BO_BOUND_TEXTURE; + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]); + state->bo[0]->fence = ctx->radeon->fence; + state->bo[1]->fence = ctx->radeon->fence; + state->bo[0]->bo->binding |= BO_BOUND_TEXTURE; } } - r600_context_dirty_block(ctx, block, dirty, num_regs - 1); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, num_regs - 1); } -void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1C * rid; + struct r600_block *block = ctx->ps_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } -void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1180 + 0x1C * rid; + struct r600_block *block = ctx->vs_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } -void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x2300 + 0x1C * rid; + struct r600_block *block = ctx->fs_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) @@ -1081,6 +1155,7 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } dirty = block->status & R600_BLOCK_STATUS_DIRTY; @@ -1091,7 +1166,8 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, } } - r600_context_dirty_block(ctx, block, dirty, 2); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, 2); } static inline void r600_context_ps_partial_flush(struct r600_context *ctx) @@ -1117,6 +1193,7 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } if (state->nregs <= 3) { @@ -1135,8 +1212,8 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex * will end up using the new border color. */ if (dirty & R600_BLOCK_STATUS_DIRTY) r600_context_ps_partial_flush(ctx); - - r600_context_dirty_block(ctx, block, dirty, 3); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, 3); } void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) @@ -1179,31 +1256,33 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block * { int id; int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); - int cp_dwords = block->pm4_ndwords, start_dword; - int new_dwords; + int cp_dwords = block->pm4_ndwords, start_dword = 0; + int new_dwords = 0; if (block->nreg_dirty == 0 && optional) { goto out; } - optional &= (block->nreg_dirty != block->nreg); - - ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; - for (int j = 0; j < block->nreg; j++) { - if (block->pm4_bo_index[j]) { - /* find relocation */ - id = block->pm4_bo_index[j]; - r600_context_bo_reloc(ctx, - &block->pm4[block->reloc[id].bo_pm4_index], - block->reloc[id].bo); - r600_context_bo_flush(ctx, - block->reloc[id].flush_flags, - block->reloc[id].flush_mask, - block->reloc[id].bo); + if (block->nbo) { + ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; + + for (int j = 0; j < block->nreg; j++) { + if (block->pm4_bo_index[j]) { + /* find relocation */ + id = block->pm4_bo_index[j]; + r600_context_bo_reloc(ctx, + &block->pm4[block->reloc[id].bo_pm4_index], + block->reloc[id].bo); + r600_context_bo_flush(ctx, + block->reloc[id].flush_flags, + block->reloc[id].flush_mask, + block->reloc[id].bo); + } } + ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; } - ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; + optional &= (block->nreg_dirty != block->nreg); if (optional) { new_dwords = block->nreg_dirty; start_dword = ctx->pm4_cdwords; @@ -1268,6 +1347,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) unsigned ndwords = 7; struct r600_block *dirty_block = NULL; struct r600_block *next_block; + uint32_t *pm4; if (draw->indices) { ndwords = 11; @@ -1310,24 +1390,27 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) } /* draw packet */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; + pm4 = &ctx->pm4[ctx->pm4_cdwords]; + + pm4[0] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); + pm4[1] = draw->vgt_index_type; + pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); + pm4[3] = draw->vgt_num_instances; if (draw->indices) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); + pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); + pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + pm4[6] = 0; + pm4[7] = draw->vgt_num_indices; + pm4[8] = draw->vgt_draw_initiator; + pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); + pm4[10] = 0; + r600_context_bo_reloc(ctx, &pm4[10], draw->indices); } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); + pm4[5] = draw->vgt_num_indices; + pm4[6] = draw->vgt_draw_initiator; } + ctx->pm4_cdwords += ndwords; ctx->flags |= (R600_CONTEXT_DST_CACHES_DIRTY | R600_CONTEXT_DRAW_PENDING); @@ -1342,6 +1425,7 @@ void r600_context_flush(struct r600_context *ctx) uint64_t chunk_array[2]; unsigned fence; int r; + struct r600_block *enable_block = NULL, *next_block; if (!ctx->pm4_cdwords) return; @@ -1415,15 +1499,14 @@ void r600_context_flush(struct r600_context *ctx) /* set all valid group as dirty so they get reemited on * next draw command */ - for (int i = 0; i < ctx->nblocks; i++) { - if (ctx->blocks[i]->status & R600_BLOCK_STATUS_ENABLED) { - if(!(ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY)) { - LIST_ADDTAIL(&ctx->blocks[i]->list,&ctx->dirty); - } - ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords; - ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY; - ctx->blocks[i]->nreg_dirty = ctx->blocks[i]->nreg; + LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) { + if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) { + LIST_ADDTAIL(&enable_block->list,&ctx->dirty); } + ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords + + enable_block->pm4_flush_ndwords; + enable_block->status |= R600_BLOCK_STATUS_DIRTY; + enable_block->nreg_dirty = enable_block->nreg; } } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 9be5c358f85..d9cb52409cd 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -66,6 +66,7 @@ struct radeon { #define REG_FLAG_DIRTY_ALWAYS 2 #define REG_FLAG_RV6XX_SBU 4 #define REG_FLAG_NOT_R600 8 +#define REG_FLAG_ENABLE_ALWAYS 16 struct r600_reg { unsigned offset; @@ -158,7 +159,7 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset); int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, unsigned opcode, unsigned offset_base); -void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset); +void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, struct r600_block *block); void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block); void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block, int dirty, int index); @@ -167,6 +168,7 @@ void r600_context_reg(struct r600_context *ctx, unsigned offset, unsigned value, unsigned mask); void r600_init_cs(struct r600_context *ctx); +int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base); /* * r600_bo.c */ |