diff options
-rw-r--r-- | src/gallium/drivers/r600/evergreen_hw_context.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 49 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 47 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_resource.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_resource.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 14 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 209 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 2 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 34 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 4 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 23 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_winsys.h | 11 |
13 files changed, 370 insertions, 48 deletions
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index bd1d969eca3..e75eaf2b79f 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -1135,6 +1135,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr struct r600_block *dirty_block = NULL; struct r600_block *next_block; uint32_t *pm4; + uint64_t va; if (draw->indices) { ndwords = 11; @@ -1174,9 +1175,11 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); pm4[3] = draw->vgt_num_instances; if (draw->indices) { - pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - pm4[5] = draw->indices_bo_offset; - pm4[6] = 0; + va = r600_resource_va(&ctx->screen->screen, (void*)draw->indices); + va += draw->indices_bo_offset; + pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); + pm4[5] = va; + pm4[6] = (va >> 32UL) & 0xFF; pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index f3aab69dec5..df6ad28681e 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1105,8 +1105,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->val[1] = (S_030004_TEX_HEIGHT(height - 1) | S_030004_TEX_DEPTH(depth - 1) | S_030004_ARRAY_MODE(array_mode)); - rstate->val[2] = tmp->offset[0] >> 8; - rstate->val[3] = tmp->offset[1] >> 8; + rstate->val[2] = (tmp->offset[0] + r600_resource_va(ctx->screen, texture)) >> 8; + rstate->val[3] = (tmp->offset[1] + r600_resource_va(ctx->screen, texture)) >> 8; rstate->val[4] = (word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_030010_ENDIAN_SWAP(endian) | @@ -1343,7 +1343,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype, endian; - unsigned offset; + uint64_t offset; unsigned tile_type; const struct util_format_description *desc; int i; @@ -1443,10 +1443,13 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state } else /* workaround for linear buffers */ tile_type = 1; + offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture); + offset >>= 8; + /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, - offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); + offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028C78_CB_COLOR0_DIM + cb * 0x3C, 0x0, 0xFFFFFFFF, NULL, 0); @@ -1475,7 +1478,8 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state { struct r600_resource_texture *rtex; struct r600_surface *surf; - unsigned level, first_layer, pitch, slice, format, offset, array_mode; + unsigned level, first_layer, pitch, slice, format, array_mode; + uint64_t offset; if (state->zsbuf == NULL) return; @@ -1494,20 +1498,26 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(rtex->real_format); + offset += r600_resource_va(rctx->context.screen, surf->base.texture); + offset >>= 8; + r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, - offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); + offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, - offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); + offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0); if (rtex->stencil) { - uint32_t stencil_offset = + uint64_t stencil_offset = r600_texture_get_offset(rtex->stencil, level, first_layer); + stencil_offset += r600_resource_va(rctx->context.screen, (void*)rtex->stencil); + stencil_offset >>= 8; + r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, - stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE); + stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, - stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE); + stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, 1, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE); } else { @@ -2383,7 +2393,8 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); + r600_resource_va(ctx->screen, (void *)shader->bo) >> 8, + 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_028844_SQ_PGM_RESOURCES_PS, S_028844_NUM_GPRS(rshader->bc.ngpr) | @@ -2457,7 +2468,8 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02885C_SQ_PGM_START_VS, - 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); + r600_resource_va(ctx->screen, (void *)shader->bo) >> 8, + 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, @@ -2474,7 +2486,7 @@ void evergreen_fetch_shader(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - 0, + r600_resource_va(ctx->screen, (void *)ve->fetch_shader) >> 8, 0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ); } @@ -2521,15 +2533,20 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, } -void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, +void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx, + struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride, enum radeon_bo_usage usage) { + uint64_t va; + + va = r600_resource_va(ctx->screen, (void *)rbuffer); rstate->bo[0] = rbuffer; rstate->bo_usage[0] = usage; - rstate->val[0] = offset; + rstate->val[0] = (offset + va) & 0xFFFFFFFFUL; rstate->val[1] = rbuffer->buf->size - offset - 1; rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_030008_STRIDE(stride); + S_030008_STRIDE(stride) | + (((va + offset) >> 32UL) & 0xFF); } diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 1dba96642aa..b0a28d98215 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -978,6 +978,8 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, unsigned flush_mask, struct r600_resource *bo) { + uint64_t va = 0; + /* if bo has already been flushed */ if (!(~bo->cs_buf->last_flush & flush_flags)) { bo->cs_buf->last_flush &= flush_mask; @@ -1007,10 +1009,11 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; } } else { + va = r600_resource_va(&ctx->screen->screen, (void *)bo); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); ctx->pm4[ctx->pm4_cdwords++] = flush_flags; ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->size + 255) >> 8; - ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; + ctx->pm4[ctx->pm4_cdwords++] = va >> 8; ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE); @@ -1590,14 +1593,20 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value) { + uint64_t va; + r600_need_cs_space(ctx, 10, FALSE); + va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); + va = va + (offset << 2); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = offset << 2; /* ADDRESS_LO */ - ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); /* DATA_SEL | INT_EN | ADDRESS_HI */ + ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */ + /* DATA_SEL | INT_EN | ADDRESS_HI */ + ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF); ctx->pm4[ctx->pm4_cdwords++] = value; /* DATA_LO */ ctx->pm4[ctx->pm4_cdwords++] = 0; /* DATA_HI */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); @@ -1707,6 +1716,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { unsigned new_results_end, i; u32 *results; + uint64_t va; r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE); @@ -1751,13 +1761,16 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) } /* emit begin query */ + va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); + va += query->results_end; + switch (query->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end; - ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = va; + ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFF; break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: @@ -1771,8 +1784,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) case PIPE_QUERY_TIME_ELAPSED: ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end; - ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); + ctx->pm4[ctx->pm4_cdwords++] = va; + ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFF); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; break; @@ -1787,14 +1800,18 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) void r600_query_end(struct r600_context *ctx, struct r600_query *query) { + uint64_t va; + + va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); /* emit end query */ switch (query->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: + va += query->results_end + 8; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8; - ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = va; + ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFF; break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: @@ -1806,10 +1823,11 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) ctx->pm4[ctx->pm4_cdwords++] = 0; break; case PIPE_QUERY_TIME_ELAPSED: + va += query->results_end + query->result_size/2; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + query->result_size/2; - ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); + ctx->pm4[ctx->pm4_cdwords++] = va; + ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFF); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; break; @@ -1826,6 +1844,8 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, int flag_wait) { + uint64_t va; + if (operation == PREDICATION_OP_CLEAR) { r600_need_cs_space(ctx, 3, FALSE); @@ -1845,12 +1865,13 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); + va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); /* emit predicate packets for all data blocks */ while (results_base != query->results_end) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); - ctx->pm4[ctx->pm4_cdwords++] = results_base; - ctx->pm4[ctx->pm4_cdwords++] = op; + ctx->pm4[ctx->pm4_cdwords++] = (va + results_base) & 0xFFFFFFFFUL; + ctx->pm4[ctx->pm4_cdwords++] = op | (((va + results_base) >> 32UL) & 0xFF); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_READ); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 447b9dc13a4..bd782438354 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -243,7 +243,8 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, struct r600_pipe_resource_state *rstate); -void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, +void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx, + struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride, enum radeon_bo_usage usage); diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index f3ab3613c84..01db97ad42c 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -62,3 +62,14 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600) r600->context.transfer_destroy = u_transfer_destroy_vtbl; r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; } + +uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource) +{ + struct r600_screen *rscreen = (struct r600_screen*)screen; + struct r600_resource *rresource = (struct r600_resource*)resource; + + if (rresource->buf) { + return rscreen->ws->buffer_get_virtual_address(rresource->buf); + } + return 0; +} diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 1ca67298d05..f39ac55e877 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -102,4 +102,6 @@ struct r600_pipe_context; void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource **rbuffer, uint32_t *offset); +uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource); + #endif diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 054ab90595c..034a560a7ec 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -337,6 +337,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer = r600_resource(buffer); struct r600_pipe_resource_state *rstate; + uint64_t va_offset; uint32_t offset; /* Note that the state tracker can unbind constant buffers by @@ -347,6 +348,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } r600_upload_const_buffer(rctx, &rbuffer, &offset); + va_offset = r600_resource_va(ctx->screen, (void*)rbuffer); + va_offset += offset; + va_offset >>= 8; switch (shader) { case PIPE_SHADER_VERTEX: @@ -357,7 +361,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ); + va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); rstate = &rctx->vs_const_buffer_resource[index]; @@ -370,7 +374,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); + evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); @@ -385,7 +389,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ); + va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); rstate = &rctx->ps_const_buffer_resource[index]; @@ -397,7 +401,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); + evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); @@ -522,7 +526,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); + evergreen_pipe_mod_buffer_resource(&rctx->context, rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index d4746ffc535..de2906faef9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -30,6 +30,7 @@ #include "util/u_hash_table.h" #include "util/u_memory.h" #include "util/u_simple_list.h" +#include "util/u_double_list.h" #include "os/os_thread.h" #include "os/os_mman.h" @@ -39,6 +40,11 @@ #include <xf86drm.h> #include <errno.h> +/* + * this are copy from radeon_drm, once an updated libdrm is released + * we should bump configure.ac requirement for it and remove the following + * field + */ #define RADEON_BO_FLAGS_MACRO_TILE 1 #define RADEON_BO_FLAGS_MICRO_TILE 2 #define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20 @@ -57,6 +63,33 @@ struct drm_radeon_gem_wait { #endif +#ifndef RADEON_VA_MAP + +#define RADEON_VA_MAP 1 +#define RADEON_VA_UNMAP 2 + +#define RADEON_VA_RESULT_OK 0 +#define RADEON_VA_RESULT_ERROR 1 +#define RADEON_VA_RESULT_VA_EXIST 2 + +#define RADEON_VM_PAGE_VALID (1 << 0) +#define RADEON_VM_PAGE_READABLE (1 << 1) +#define RADEON_VM_PAGE_WRITEABLE (1 << 2) +#define RADEON_VM_PAGE_SYSTEM (1 << 3) +#define RADEON_VM_PAGE_SNOOPED (1 << 4) + +struct drm_radeon_gem_va { + uint32_t handle; + uint32_t operation; + uint32_t vm_id; + uint32_t flags; + uint64_t offset; +}; + +#define DRM_RADEON_GEM_VA 0x2b +#endif + + extern const struct pb_vtbl radeon_bo_vtbl; @@ -67,6 +100,12 @@ static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo) return (struct radeon_bo *)bo; } +struct radeon_bo_va_hole { + struct list_head list; + uint64_t offset; + uint64_t size; +}; + struct radeon_bomgr { /* Base class. */ struct pb_manager base; @@ -77,6 +116,12 @@ struct radeon_bomgr { /* List of buffer handles and its mutex. */ struct util_hash_table *bo_handles; pipe_mutex bo_handles_mutex; + pipe_mutex bo_va_mutex; + + /* is virtual address supported */ + bool va; + unsigned va_offset; + struct list_head va_holes; }; static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr) @@ -151,9 +196,94 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf, } } +static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size) +{ + struct radeon_bo_va_hole *hole, *n; + uint64_t offset = 0; + + pipe_mutex_lock(mgr->bo_va_mutex); + /* first look for a hole */ + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) { + if (hole->size == size) { + offset = hole->offset; + list_del(&hole->list); + FREE(hole); + pipe_mutex_unlock(mgr->bo_va_mutex); + return offset; + } + if (hole->size > size) { + offset = hole->offset; + hole->size -= size; + hole->offset += size; + pipe_mutex_unlock(mgr->bo_va_mutex); + return offset; + } + } + + offset = mgr->va_offset; + mgr->va_offset += size; + pipe_mutex_unlock(mgr->bo_va_mutex); + return offset; +} + +static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size) +{ + pipe_mutex_lock(mgr->bo_va_mutex); + if (va >= mgr->va_offset) { + if (va > mgr->va_offset) { + struct radeon_bo_va_hole *hole; + hole = CALLOC_STRUCT(radeon_bo_va_hole); + if (hole) { + hole->size = va - mgr->va_offset; + hole->offset = mgr->va_offset; + list_add(&hole->list, &mgr->va_holes); + } + } + mgr->va_offset = va + size; + } else { + struct radeon_bo_va_hole *hole, *n; + uint64_t stmp, etmp; + + /* free all holes that fall into the range + * NOTE that we might lose virtual address space + */ + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) { + stmp = hole->offset; + etmp = stmp + hole->size; + if (va >= stmp && va < etmp) { + list_del(&hole->list); + FREE(hole); + } + } + } + pipe_mutex_unlock(mgr->bo_va_mutex); +} + +static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size) +{ + pipe_mutex_lock(mgr->bo_va_mutex); + if ((va + size) == mgr->va_offset) { + mgr->va_offset = va; + } else { + struct radeon_bo_va_hole *hole; + + /* FIXME on allocation failure we just lose virtual address space + * maybe print a warning + */ + hole = CALLOC_STRUCT(radeon_bo_va_hole); + if (hole) { + hole->size = size; + hole->offset = va; + list_add(&hole->list, &mgr->va_holes); + } + } + pipe_mutex_unlock(mgr->bo_va_mutex); +} + static void radeon_bo_destroy(struct pb_buffer *_buf) { struct radeon_bo *bo = radeon_bo(_buf); + struct radeon_bomgr *mgr = bo->mgr; struct drm_gem_close args; memset(&args, 0, sizeof(args)); @@ -168,6 +298,10 @@ static void radeon_bo_destroy(struct pb_buffer *_buf) if (bo->ptr) os_munmap(bo->ptr, bo->base.size); + if (mgr->va) { + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); + } + /* Close object. */ args.handle = bo->handle; drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args); @@ -343,6 +477,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr, struct radeon_bo *bo; struct drm_radeon_gem_create args; struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc; + int r; memset(&args, 0, sizeof(args)); @@ -375,8 +510,38 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr, bo->mgr = mgr; bo->rws = mgr->rws; bo->handle = args.handle; + bo->va = 0; pipe_mutex_init(bo->map_mutex); + if (mgr->va) { + struct drm_radeon_gem_va va; + + bo->va_size = align(size, 4096); + bo->va = radeon_bomgr_find_va(mgr, bo->va_size); + + va.handle = bo->handle; + va.vm_id = 0; + va.operation = RADEON_VA_MAP; + va.flags = RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_WRITEABLE | + RADEON_VM_PAGE_SNOOPED; + va.offset = bo->va; + r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); + if (r && va.operation == RADEON_VA_RESULT_ERROR) { + fprintf(stderr, "radeon: Failed to allocate a buffer:\n"); + fprintf(stderr, "radeon: size : %d bytes\n", size); + fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment); + fprintf(stderr, "radeon: domains : %d\n", args.initial_domain); + radeon_bo_destroy(&bo->base); + return NULL; + } + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); + bo->va = va.offset; + radeon_bomgr_force_va(mgr, bo->va, bo->va_size); + } + } + return &bo->base; } @@ -407,6 +572,7 @@ static void radeon_bomgr_destroy(struct pb_manager *_mgr) struct radeon_bomgr *mgr = radeon_bomgr(_mgr); util_hash_table_destroy(mgr->bo_handles); pipe_mutex_destroy(mgr->bo_handles_mutex); + pipe_mutex_destroy(mgr->bo_va_mutex); FREE(mgr); } @@ -438,6 +604,12 @@ struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws) mgr->rws = rws; mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare); pipe_mutex_init(mgr->bo_handles_mutex); + pipe_mutex_init(mgr->bo_va_mutex); + + mgr->va = rws->info.r600_virtual_address; + mgr->va_offset = rws->info.r600_va_start; + list_inithead(&mgr->va_holes); + return &mgr->base; } @@ -560,6 +732,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, struct radeon_bo *bo; struct radeon_bomgr *mgr = radeon_bomgr(ws->kman); struct drm_gem_open open_arg = {}; + int r; memset(&open_arg, 0, sizeof(open_arg)); @@ -603,6 +776,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, bo->base.vtbl = &radeon_bo_vtbl; bo->mgr = mgr; bo->rws = mgr->rws; + bo->va = 0; pipe_mutex_init(bo->map_mutex); util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo); @@ -613,6 +787,33 @@ done: if (stride) *stride = whandle->stride; + if (mgr->va) { + struct drm_radeon_gem_va va; + + bo->va_size = ((bo->base.size + 4095) & ~4095); + bo->va = radeon_bomgr_find_va(mgr, bo->va_size); + + va.handle = bo->handle; + va.operation = RADEON_VA_MAP; + va.vm_id = 0; + va.offset = bo->va; + va.flags = RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_WRITEABLE | + RADEON_VM_PAGE_SNOOPED; + va.offset = bo->va; + r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); + if (r && va.operation == RADEON_VA_RESULT_ERROR) { + fprintf(stderr, "radeon: Failed to assign virtual address space\n"); + radeon_bo_destroy(&bo->base); + return NULL; + } + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); + bo->va = va.offset; + radeon_bomgr_force_va(mgr, bo->va, bo->va_size); + } + } + return (struct pb_buffer*)bo; fail: @@ -649,6 +850,13 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer, return TRUE; } +static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer) +{ + struct radeon_bo *bo = get_radeon_bo(buffer); + + return bo->va; +} + void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) { ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; @@ -661,4 +869,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) ws->base.buffer_create = radeon_winsys_bo_create; ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; + ws->base.buffer_get_virtual_address = radeon_winsys_bo_va; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index 35d25e87eb3..21cfe995510 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -59,6 +59,8 @@ struct radeon_bo { uint32_t handle; uint32_t name; + uint64_t va; + uint64_t va_size; /* how many command streams is this bo referenced in? */ int num_cs_references; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index e6109afd7ea..cf8f25bc5b4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -71,18 +71,32 @@ #include <stdint.h> #include <xf86drm.h> +/* + * this are copy from radeon_drm, once an updated libdrm is released + * we should bump configure.ac requirement for it and remove the following + * field + */ #ifndef RADEON_CHUNK_ID_FLAGS -#define RADEON_CHUNK_ID_FLAGS 0x03 +#define RADEON_CHUNK_ID_FLAGS 0x03 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */ #define RADEON_CS_KEEP_TILING_FLAGS 0x01 #endif +#ifndef RADEON_CS_USE_VM +#define RADEON_CS_USE_VM 0x02 +/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */ +#define RADEON_CS_RING_GFX 0 +#define RADEON_CS_RING_COMPUTE 1 +#endif + + #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) -static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd) +static boolean radeon_init_cs_context(struct radeon_cs_context *csc, + struct radeon_drm_winsys *ws) { - csc->fd = fd; + csc->fd = ws->fd; csc->nrelocs = 512; csc->relocs_bo = (struct radeon_bo**) CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); @@ -157,11 +171,11 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws) cs->ws = ws; - if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) { + if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { FREE(cs); return NULL; } - if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) { + if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { radeon_destroy_cs_context(&cs->csc1); FREE(cs); return NULL; @@ -440,11 +454,15 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } + cs->cst->flags = 0; + cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { + cs->cst->flags |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; - cs->cst->flags = RADEON_CS_KEEP_TILING_FLAGS; - } else { - cs->cst->cs.num_chunks = 2; + } + if (cs->ws->info.r600_virtual_address) { + cs->cst->cs.num_chunks = 3; + cs->cst->flags |= RADEON_CS_USE_VM; } if (cs->thread && diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 904000d6933..05b9a487645 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -51,8 +51,8 @@ struct radeon_cs_context { struct drm_radeon_cs_reloc *relocs_hashlist[256]; unsigned reloc_indices_hashlist[256]; - unsigned used_vram; - unsigned used_gart; + unsigned used_vram; + unsigned used_gart; }; struct radeon_drm_cs { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index f337411e223..051a390ed22 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -41,6 +41,11 @@ #include <xf86drm.h> #include <stdio.h> +/* + * this are copy from radeon_drm, once an updated libdrm is released + * we should bump configure.ac requirement for it and remove the following + * field + */ #ifndef RADEON_INFO_TILING_CONFIG #define RADEON_INFO_TILING_CONFIG 6 #endif @@ -69,6 +74,14 @@ #define RADEON_INFO_BACKEND_MAP 0xd #endif +#ifndef RADEON_INFO_VA_START +/* virtual address start, va < start are reserved by the kernel */ +#define RADEON_INFO_VA_START 0x0e +/* maximum size of ib using the virtual memory cs */ +#define RADEON_INFO_IB_VM_MAX_SIZE 0x0f +#endif + + /* Enable/disable feature access for one command stream. * If enable == TRUE, return TRUE on success. * Otherwise, return FALSE. @@ -265,6 +278,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) &ws->info.r600_backend_map)) ws->info.r600_backend_map_valid = TRUE; } + ws->info.r600_virtual_address = FALSE; + if (ws->info.drm_minor >= 13) { + ws->info.r600_virtual_address = TRUE; + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, + &ws->info.r600_va_start)) + ws->info.r600_virtual_address = FALSE; + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL, + &ws->info.r600_ib_vm_max_size)) + ws->info.r600_virtual_address = FALSE; + } } return TRUE; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 59c1aad3308..d33eaa7059c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -103,6 +103,9 @@ struct radeon_info { uint32_t r600_num_tile_pipes; uint32_t r600_backend_map; boolean r600_backend_map_valid; + boolean r600_virtual_address; + uint32_t r600_va_start; + uint32_t r600_ib_vm_max_size; }; enum radeon_feature_id { @@ -250,6 +253,14 @@ struct radeon_winsys { unsigned stride, struct winsys_handle *whandle); + /** + * Return the virtual address of a buffer. + * + * \param buf A winsys buffer object + * \return virtual address + */ + uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf); + /************************************************************************** * Command submission. * |