diff options
Diffstat (limited to 'src/gallium/drivers/r600')
23 files changed, 1483 insertions, 835 deletions
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index a484f38e9f1..b476b9af3b8 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \ evergreen_state.c \ eg_asm.c \ r600_translate.c \ - r600_state_common.c + r600_state_common.c \ + r600_upload.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 3fc1fa94c27..64980140963 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -28,6 +28,7 @@ r600 = env.ConvenienceLibrary( 'r600_state_common.c', 'r600_texture.c', 'r600_translate.c', + 'r600_upload.c', 'r700_asm.c', 'evergreen_state.c', 'eg_asm.c', diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index c30f09c394b..b79875c7c75 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -27,6 +27,7 @@ #include "r600_asm.h" #include "eg_sq.h" #include "r600_opcodes.h" +#include "evergreend.h" int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { @@ -74,6 +75,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | S_SQ_CF_WORD1_BARRIER(1) | @@ -87,3 +90,37 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) } return 0; } + +void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +{ + struct r600_pipe_state *rstate; + unsigned i = 0; + + if (count > 8) { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(8 - 1); + bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count - 8 - 1); + } else { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count - 1); + } + bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | + S_SQ_CF_WORD1_BARRIER(1); + + rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, + (r600_bo_offset(ve->fetch_shader)) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 59641976403..ecea1db4f15 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -290,6 +290,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) switch (format) { /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: + return V_028C70_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_R8_UNORM: @@ -311,6 +312,8 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_028C70_SWAP_STD; + case PIPE_FORMAT_L8A8_UNORM: + return V_028C70_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_028C70_SWAP_STD; @@ -400,6 +403,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_028C70_COLOR_16; + case PIPE_FORMAT_L8A8_UNORM: case PIPE_FORMAT_R8G8_UNORM: return V_028C70_COLOR_8_8; @@ -447,8 +451,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_16_16; /* 64-bit buffers. */ - case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16A16_USCALED: case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: return V_028C70_COLOR_16_16_16_16; @@ -460,6 +466,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R32G32_FLOAT: return V_028C70_COLOR_32_32_FLOAT; + case PIPE_FORMAT_R32G32_USCALED: case PIPE_FORMAT_R32G32_SSCALED: return V_028C70_COLOR_32_32; @@ -634,38 +641,4 @@ out_unknown: return ~0; } -static INLINE uint32_t r600_translate_vertex_data_swizzle(enum pipe_format format) -{ - const struct util_format_description *desc = util_format_description(format); - unsigned i; - uint32_t word3; - - assert(format); - - if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { - fprintf(stderr, "r600: Bad format %s in %s:%d\n", - util_format_short_name(format), __FUNCTION__, __LINE__); - return 0; - } - - word3 = 0; - for (i = 0; i < desc->nr_channels; i++) { - switch (i) { - case 0: - word3 |= S_03000C_DST_SEL_X(desc->swizzle[0]); - break; - case 1: - word3 |= S_03000C_DST_SEL_Y(desc->swizzle[1]); - break; - case 2: - word3 |= S_03000C_DST_SEL_Z(desc->swizzle[2]); - break; - case 3: - word3 |= S_03000C_DST_SEL_W(desc->swizzle[3]); - break; - } - } - return word3; -} - #endif diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4725b5e75e2..07496ebf51e 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -323,11 +323,11 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); /* FIXME LOD it depends on texture base level ... */ r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)), + S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | + S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, - S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)) | + S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | S_03C008_TYPE(1), 0xFFFFFFFF, NULL); @@ -410,9 +410,9 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | - S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); + S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, - S_030014_LAST_LEVEL(state->last_level) | + S_030014_LAST_LEVEL(state->u.tex.last_level) | S_030014_BASE_ARRAY(0) | S_030014_LAST_ARRAY(0), 0xffffffff, NULL); r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x0, 0xFFFFFFFF, NULL); @@ -431,7 +431,7 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou for (int i = 0; i < count; i++) { if (resource[i]) { - evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); } } } @@ -501,16 +501,16 @@ static void evergreen_set_clip_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_CLIP; for (int i = 0; i < state->nr; i++) { r600_pipe_state_add_reg(rstate, - R_0285BC_PA_CL_UCP0_X + i * 4, + R_0285BC_PA_CL_UCP0_X + i * 16, fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0285C0_PA_CL_UCP0_Y + i * 4, + R_0285C0_PA_CL_UCP0_Y + i * 16, fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0285C4_PA_CL_UCP0_Z + i * 4, + R_0285C4_PA_CL_UCP0_Z + i * 16, fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0285C8_PA_CL_UCP0_W + i * 4, + R_0285C8_PA_CL_UCP0_W + i * 16, fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); } r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, @@ -633,10 +633,11 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state struct r600_resource_texture *rtex; struct r600_resource *rbuffer; struct r600_surface *surf; - unsigned level = state->cbufs[cb]->level; + unsigned level = state->cbufs[cb]->u.tex.level; unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype; + unsigned offset; const struct util_format_description *desc; struct r600_bo *bo[3]; @@ -647,6 +648,9 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; + /* XXX quite sure for dx10+ hw don't need any offset hacks */ + offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, + level, state->cbufs[cb]->u.tex.first_layer); pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; ntype = 0; @@ -660,13 +664,13 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(1) | S_028C70_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) color_info |= S_028C70_SOURCE_FORMAT(1); /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, - (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_028C78_CB_COLOR0_DIM + cb * 0x3C, 0x0, 0xFFFFFFFF, NULL); @@ -698,11 +702,12 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state struct r600_surface *surf; unsigned level; unsigned pitch, slice, format, stencil_format; + unsigned offset; if (state->zsbuf == NULL) return; - level = state->zsbuf->level; + level = state->zsbuf->u.tex.level; surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; @@ -712,24 +717,27 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state rtex->depth = 1; rbuffer = &rtex->resource; + /* XXX quite sure for dx10+ hw don't need any offset hacks */ + offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, + level, state->zsbuf->u.tex.first_layer); pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, - (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, - (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); if (stencil_format) { uint32_t stencil_offset; stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255; r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, - (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, - (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); } r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); @@ -825,6 +833,10 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; r600_context_pipe_state_set(&rctx->ctx, rstate); + + if (state->zsbuf) { + evergreen_polygon_offset_update(rctx); + } } static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, @@ -1036,11 +1048,33 @@ void evergreen_init_config(struct r600_pipe_context *rctx) num_hs_stack_entries = 85; num_ls_stack_entries = 85; break; + case CHIP_PALM: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 16; + num_gs_threads = 16; + num_es_threads = 16; + num_hs_threads = 16; + num_ls_threads = 16; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; } tmp = 0x00000000; switch (family) { case CHIP_CEDAR: + case CHIP_PALM: break; default: tmp |= S_008C00_VC_ENABLE(1); @@ -1172,29 +1206,101 @@ void evergreen_init_config(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); -r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, - 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); r600_context_pipe_state_set(&rctx->ctx, rstate); } -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) +void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state state; + + state.id = R600_PIPE_STATE_POLYGON_OFFSET; + state.nregs = 0; + if (rctx->rasterizer && rctx->framebuffer.zsbuf) { + float offset_units = rctx->rasterizer->offset_units; + unsigned offset_db_fmt_cntl = 0, depth; + + switch (rctx->framebuffer.zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + return; + } + /* FIXME some of those reg can be computed with cso */ + offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + r600_pipe_state_add_reg(&state, + R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, + offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, &state); + } +} + +static void evergreen_spi_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_shader *shader = rctx->ps_shader; + struct r600_pipe_state rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp; + + rstate.nregs = 0; + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rctx->flatshade); + } + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &rstate); +} + +void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate; struct r600_resource *rbuffer; - unsigned i, j, offset, prim; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; struct pipe_vertex_buffer *vertex_buffer; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - struct r600_drawl draw; - boolean translate = FALSE; + unsigned i, offset; + + /* we don't update until we know vertex elements */ + if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) + return; if (rctx->vertex_elements->incompatible_layout) { + /* translate rebind new vertex elements so + * return once translated + */ r600_begin_vertex_translate(rctx); - translate = TRUE; + return; } if (rctx->any_user_vbs) { @@ -1202,6 +1308,72 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) rctx->any_user_vbs = FALSE; } + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + rctx->nvs_resource = rctx->vertex_elements->count; + } else { + /* bind vertex buffer once */ + rctx->nvs_resource = rctx->nvertex_buffer; + } + + for (i = 0 ; i < rctx->nvs_resource; i++) { + rstate = &rctx->vs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + unsigned vbuffer_index; + vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->vbuffer_offset[i] + + vertex_buffer->buffer_offset + + r600_bo_offset(rbuffer->bo); + } else { + /* bind vertex buffer once */ + vertex_buffer = &rctx->vertex_buffer[i]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = vertex_buffer->buffer_offset + + r600_bo_offset(rbuffer->bo); + } + + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, + S_030008_STRIDE(vertex_buffer->stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, + S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, + 0xC0000000, 0xFFFFFFFF, NULL); + evergreen_fs_resource_set(&rctx->ctx, rstate, i); + } +} + +int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); +void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + struct r600_drawl draw; + unsigned prim; + memset(&draw, 0, sizeof(struct r600_drawl)); draw.ctx = ctx; draw.mode = info->mode; @@ -1250,45 +1422,23 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) } if (r600_conv_pipe_prim(draw.mode, &prim)) return; - - /* rebuild vertex shader if input format changed */ - if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader)) + if (unlikely(rctx->ps_shader == NULL)) { + R600_ERR("missing vertex shader\n"); return; - if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader)) + } + if (unlikely(rctx->vs_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + /* there should be enough input */ + if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); return; - - for (i = 0 ; i < rctx->vertex_elements->count; i++) { - uint32_t word3, word2; - uint32_t format; - rstate = &rctx->vs_resource[i]; - - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - j = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[j]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + - vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - - format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]); - - word2 = format | S_030008_STRIDE(vertex_buffer->stride); - - word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->hw_format[i]); - - r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, word3, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL); - evergreen_vs_resource_set(&rctx->ctx, rstate, i); } + evergreen_spi_update(rctx); + mask = 0; for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { mask |= (0xF << (i * 4)); @@ -1303,46 +1453,6 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - - if (rctx->rasterizer && rctx->framebuffer.zsbuf) { - float offset_units = rctx->rasterizer->offset_units; - unsigned offset_db_fmt_cntl = 0, depth; - - switch (rctx->framebuffer.zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - return; - } - offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - r600_pipe_state_add_reg(&vgt, - R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); - } r600_context_pipe_state_set(&rctx->ctx, &vgt); rdraw.vgt_num_indices = draw.count; @@ -1357,28 +1467,22 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) } evergreen_context_draw(&rctx->ctx, &rdraw); - if (translate) - r600_end_vertex_translate(rctx); - pipe_resource_reference(&draw.index_buffer, NULL); } void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; int pos_index = -1, face_index = -1; int ninterp = 0; boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; unsigned spi_baryc_cntl; - /* clear previous register */ rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); /* evergreen NUM_INTERP only contains values interpolated into the LDS, POSITION goes via GPRs from the SC so isn't counted */ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) @@ -1396,16 +1500,6 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader if (rshader->input[i].centroid) have_centroid = TRUE; } - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) @@ -1464,8 +1558,8 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader S_0286E0_PERSP_CENTROID_ENA(have_centroid); if (have_linear) spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) | - S_0286E0_LINEAR_CENTROID_ENA(have_centroid); - + S_0286E0_LINEAR_CENTROID_ENA(have_centroid); + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, @@ -1544,14 +1638,8 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader R_028864_SQ_PGM_RESOURCES_2_VS, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0288A8_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02885C_SQ_PGM_START_VS, (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, - R_0288A4_SQ_PGM_START_FS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index a337916c098..e67254b2560 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -45,6 +45,15 @@ #define EVENT_TYPE_ZPASS_DONE 0x15 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + * 5 - TS events + */ #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 17858b2d381..aa456d493f7 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -91,6 +91,7 @@ enum radeon_family { CHIP_JUNIPER, CHIP_CYPRESS, CHIP_HEMLOCK, + CHIP_PALM, CHIP_LAST, }; @@ -264,6 +265,7 @@ void r600_context_fini(struct r600_context *ctx); void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state); void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_flush(struct r600_context *ctx); @@ -284,10 +286,14 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); +struct radeon *radeon_decref(struct radeon *radeon); + #endif diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 8a7f3ce575c..1f41269534a 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -22,11 +22,15 @@ */ #include <stdio.h> #include <errno.h> +#include "util/u_format.h" #include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" #include "r600_pipe.h" #include "r600_sq.h" #include "r600_opcodes.h" #include "r600_asm.h" +#include "r600_formats.h" +#include "r600d.h" static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) { @@ -55,8 +59,8 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: - return 2; - + return 2; + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: @@ -74,7 +78,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) default: R600_ERR( "Need instruction operand number for 0x%x.\n", alu->inst); }; - + return 3; } @@ -137,20 +141,21 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_RV635: case CHIP_RS780: case CHIP_RS880: - bc->chiprev = 0; + bc->chiprev = CHIPREV_R600; break; case CHIP_RV770: case CHIP_RV730: case CHIP_RV710: case CHIP_RV740: - bc->chiprev = 1; + bc->chiprev = CHIPREV_R700; break; case CHIP_CEDAR: case CHIP_REDWOOD: case CHIP_JUNIPER: case CHIP_CYPRESS: case CHIP_HEMLOCK: - bc->chiprev = 2; + case CHIP_PALM: + bc->chiprev = CHIPREV_EVERGREEN; break; default: R600_ERR("unknown family %d\n", bc->family); @@ -199,9 +204,9 @@ const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000 SQ_ALU_VEC_012}; //111 const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000 - SQ_ALU_SCL_122, //001 + SQ_ALU_SCL_122, //001 SQ_ALU_SCL_122, //010 - + SQ_ALU_SCL_221, //011 SQ_ALU_SCL_212, //100 SQ_ALU_SCL_122, //101 @@ -592,10 +597,34 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) /* common to all 3 families */ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { - bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | - S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | - S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); + unsigned fetch_resource_start = 0; + + /* check if we are fetch shader */ + /* fetch shader can also access vertex resource, + * first fetch shader resource is at 160 + */ + if (bc->type == -1) { + switch (bc->chiprev) { + /* r600 */ + case CHIPREV_R600: + /* r700 */ + case CHIPREV_R700: + fetch_resource_start = 160; + break; + /* evergreen */ + case CHIPREV_EVERGREEN: + fetch_resource_start = 0; + break; + default: + fprintf(stderr, "%s:%s:%d unknown chiprev %d\n", + __FILE__, __func__, __LINE__, bc->chiprev); + break; + } + } + bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) | + S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | + S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | + S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | @@ -678,8 +707,8 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | - S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | - S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); + S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | + S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } if (alu->last) { if (alu->nliteral && !alu->literal_added) { @@ -710,7 +739,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | - S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == 0 ? cf->r6xx_uses_waterfall : 0) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: @@ -742,6 +771,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | S_SQ_CF_WORD1_BARRIER(1) | @@ -766,7 +797,10 @@ int r600_bc_build(struct r600_bc *bc) int r; if (bc->callstack[0].max > 0) - bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; + bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; + if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) { + bc->nstack = 1; + } /* first path compute addr of each CF block */ /* addr start after all the CF instructions */ @@ -795,6 +829,8 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); @@ -810,7 +846,7 @@ int r600_bc_build(struct r600_bc *bc) return -ENOMEM; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; - if (bc->chiprev == 2) + if (bc->chiprev == CHIPREV_EVERGREEN) r = eg_bc_cf_build(bc, cf); else r = r600_bc_cf_build(bc, cf); @@ -821,11 +857,11 @@ int r600_bc_build(struct r600_bc *bc) case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { switch(bc->chiprev) { - case 0: + case CHIPREV_R600: r = r600_bc_alu_build(bc, alu, addr); break; - case 1: - case 2: /* eg alu is same encoding as r700 */ + case CHIPREV_R700: + case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */ r = r700_bc_alu_build(bc, alu, addr); break; default: @@ -868,6 +904,8 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: case V_SQ_CF_WORD1_SQ_CF_INST_POP: + case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); @@ -912,3 +950,342 @@ void r600_bc_clear(struct r600_bc *bc) LIST_INITHEAD(&cf->list); } + +void r600_bc_dump(struct r600_bc *bc) +{ + unsigned i; + char chip = '6'; + + switch (bc->chiprev) { + case 1: + chip = '7'; + break; + case 2: + chip = 'E'; + break; + case 0: + default: + chip = '6'; + break; + } + fprintf(stderr, "bytecode %d dw -----------------------\n", bc->ndw); + fprintf(stderr, " %c\n", chip); + for (i = 0; i < bc->ndw; i++) { + fprintf(stderr, "0x%08X\n", bc->bytecode[i]); + } + fprintf(stderr, "--------------------------------------\n"); +} + +void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +{ + struct r600_pipe_state *rstate; + unsigned i = 0; + + if (count > 8) { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(8 - 1); + bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count - 8 - 1); + } else { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count - 1); + } + bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | + S_SQ_CF_WORD1_BARRIER(1); + + rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, + r600_bo_offset(ve->fetch_shader) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} + +void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +{ + struct r600_pipe_state *rstate; + unsigned i = 0; + + if (count > 8) { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(8 - 1); + bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT((count - 8) - 1); + } else { + bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count - 1); + } + bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); + bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | + S_SQ_CF_WORD1_BARRIER(1); + + rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, + r600_bo_offset(ve->fetch_shader) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} + +static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, + unsigned *num_format, unsigned *format_comp) +{ + const struct util_format_description *desc; + unsigned i; + + *format = 0; + *num_format = 0; + *format_comp = 0; + + desc = util_format_description(pformat); + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { + goto out_unknown; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + switch (desc->channel[i].type) { + /* Half-floats, floats, doubles */ + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[i].size) { + case 16: + switch (desc->nr_channels) { + case 1: + *format = FMT_16_FLOAT; + break; + case 2: + *format = FMT_16_16_FLOAT; + break; + case 3: + *format = FMT_16_16_16_FLOAT; + break; + case 4: + *format = FMT_16_16_16_16_FLOAT; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + *format = FMT_32_FLOAT; + break; + case 2: + *format = FMT_32_32_FLOAT; + break; + case 3: + *format = FMT_32_32_32_FLOAT; + break; + case 4: + *format = FMT_32_32_32_32_FLOAT; + break; + } + break; + default: + goto out_unknown; + } + break; + /* Unsigned ints */ + case UTIL_FORMAT_TYPE_UNSIGNED: + /* Signed ints */ + case UTIL_FORMAT_TYPE_SIGNED: + switch (desc->channel[i].size) { + case 8: + switch (desc->nr_channels) { + case 1: + *format = FMT_8; + break; + case 2: + *format = FMT_8_8; + break; + case 3: + // *format = FMT_8_8_8; /* fails piglit draw-vertices test */ + // break; + case 4: + *format = FMT_8_8_8_8; + break; + } + break; + case 16: + switch (desc->nr_channels) { + case 1: + *format = FMT_16; + break; + case 2: + *format = FMT_16_16; + break; + case 3: + // *format = FMT_16_16_16; /* fails piglit draw-vertices test */ + // break; + case 4: + *format = FMT_16_16_16_16; + break; + } + break; + case 32: + switch (desc->nr_channels) { + case 1: + *format = FMT_32; + break; + case 2: + *format = FMT_32_32; + break; + case 3: + *format = FMT_32_32_32; + break; + case 4: + *format = FMT_32_32_32_32; + break; + } + break; + default: + goto out_unknown; + } + break; + default: + goto out_unknown; + } + + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + *format_comp = 1; + } + if (desc->channel[i].normalized) { + *num_format = 0; + } else { + *num_format = 2; + } + return; +out_unknown: + R600_ERR("unsupported vertex format %s\n", util_format_name(pformat)); +} + +static void r600_bc(unsigned ndw, unsigned chiprev, u32 *bytecode) +{ + unsigned i; + char chip = '6'; + + switch (chiprev) { + case 1: + chip = '7'; + break; + case 2: + chip = 'E'; + break; + case 0: + default: + chip = '6'; + break; + } + fprintf(stderr, "bytecode %d dw -----------------------\n", ndw); + fprintf(stderr, " %c\n", chip); + for (i = 0; i < ndw; i++) { + fprintf(stderr, "0x%08X\n", bytecode[i]); + } + fprintf(stderr, "--------------------------------------\n"); +} + +int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve) +{ + unsigned ndw, i; + u32 *bytecode; + unsigned fetch_resource_start = 0, format, num_format, format_comp; + struct pipe_vertex_element *elements = ve->elements; + const struct util_format_description *desc; + + /* 2 dwords for cf aligned to 4 + 4 dwords per input */ + ndw = 8 + ve->count * 4; + ve->fs_size = ndw * 4; + + /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ + ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0); + if (ve->fetch_shader == NULL) { + return -ENOMEM; + } + + bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); + if (bytecode == NULL) { + r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + return -ENOMEM; + } + + if (rctx->family >= CHIP_CEDAR) { + eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); + } else { + r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); + fetch_resource_start = 160; + } + + /* vertex elements offset need special handling, if offset is bigger + * than what we can put in fetch instruction then we need to alterate + * the vertex resource offset. In such case in order to simplify code + * we will bound one resource per elements. It's a worst case scenario. + */ + for (i = 0; i < ve->count; i++) { + ve->vbuffer_offset[i] = C_SQ_VTX_WORD2_OFFSET & elements[i].src_offset; + if (ve->vbuffer_offset[i]) { + ve->vbuffer_need_offset = 1; + } + } + + for (i = 0; i < ve->count; i++) { + unsigned vbuffer_index; + r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp); + desc = util_format_description(ve->hw_format[i]); + if (desc == NULL) { + R600_ERR("unknown format %d\n", ve->hw_format[i]); + r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + return -EINVAL; + } + + /* see above for vbuffer_need_offset explanation */ + vbuffer_index = elements[i].vertex_buffer_index; + if (ve->vbuffer_need_offset) { + bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start); + } else { + bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start); + } + bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) | + S_SQ_VTX_WORD0_SRC_SEL_X(0) | + S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); + bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) | + S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) | + S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) | + S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) | + S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) | + S_SQ_VTX_WORD1_DATA_FORMAT(format) | + S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) | + S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) | + S_SQ_VTX_WORD1_SRF_MODE_ALL(1) | + S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1); + bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) | + S_SQ_VTX_WORD2_MEGA_FETCH(1); + bytecode[8 + i * 4 + 3] = 0; + } + r600_bo_unmap(rctx->radeon, ve->fetch_shader); + return 0; +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 25cda16837d..b147f0f5c88 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -28,6 +28,9 @@ #define NUM_OF_CYCLES 3 #define NUM_OF_COMPONENTS 4 +struct r600_vertex_element; +struct r600_pipe_context; + struct r600_bc_alu_src { unsigned sel; unsigned chan; @@ -145,12 +148,12 @@ struct r600_bc_cf { struct r600_bc_alu *curr_bs_head; }; -#define FC_NONE 0 -#define FC_IF 1 -#define FC_LOOP 2 -#define FC_REP 3 -#define FC_PUSH_VPM 4 -#define FC_PUSH_WQM 5 +#define FC_NONE 0 +#define FC_IF 1 +#define FC_LOOP 2 +#define FC_REP 3 +#define FC_PUSH_VPM 4 +#define FC_PUSH_WQM 5 struct r600_cf_stack_entry { int type; @@ -166,10 +169,11 @@ struct r600_cf_callstack { int current; int max; }; - + struct r600_bc { enum radeon_family family; int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ + int type; struct list_head cf; struct r600_bc_cf *cf_last; unsigned ndw; @@ -187,6 +191,7 @@ struct r600_bc { /* eg_asm.c */ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); +void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); /* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); @@ -199,6 +204,11 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); int r600_bc_build(struct r600_bc *bc); int r600_bc_add_cfinst(struct r600_bc *bc, int inst); int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); +void r600_bc_dump(struct r600_bc *bc); +void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); +void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); + +int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); /* r700_asm.c */ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 50d47060c1a..0f04136fb2a 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -27,9 +27,9 @@ enum r600_blitter_op /* bitmask */ { - R600_CLEAR = 1, - R600_CLEAR_SURFACE = 2, - R600_COPY = 4 + R600_CLEAR = 1, + R600_CLEAR_SURFACE = 2, + R600_COPY = 4 }; static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) @@ -81,16 +81,21 @@ static void r600_blitter_end(struct pipe_context *ctx) int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_surface *zsurf, *cbsurf; + struct pipe_surface *zsurf, *cbsurf, surf_tmpl; int level = 0; float depth = 1.0f; + surf_tmpl.format = texture->resource.base.b.format; + surf_tmpl.u.tex.level = level; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0, - PIPE_BIND_DEPTH_STENCIL); + zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl); - cbsurf = ctx->screen->get_tex_surface(ctx->screen, - (struct pipe_resource*)texture->flushed_depth_texture, - 0, level, 0, PIPE_BIND_RENDER_TARGET); + surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + cbsurf = ctx->create_surface(ctx, + (struct pipe_resource*)texture->flushed_depth_texture, &surf_tmpl); if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) @@ -154,42 +159,38 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx, /* Copy a block of pixels from one surface to another using HW. */ static void r600_hw_copy_region(struct pipe_context *ctx, - struct pipe_resource *dst, - struct pipe_subresource subdst, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_blitter_begin(ctx, R600_COPY); - util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height, - TRUE); + util_blitter_copy_region(rctx->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); r600_blitter_end(ctx); } static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, - struct pipe_subresource subdst, + unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, - struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) + unsigned src_level, + const struct pipe_box *src_box) { boolean is_depth; /* there is something wrong with depth resource copies at the moment so avoid them for now */ is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; if (is_depth) - util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); + util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); else - r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); - + r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); } void r600_init_blit_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index ed97b6e69a3..a17c54d6eeb 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -29,7 +29,6 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> -#include <util/u_upload_mgr.h> #include "state_tracker/drm_driver.h" #include <xf86drm.h> #include "radeon_drm.h" @@ -53,12 +52,13 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, rbuffer->magic = R600_BUFFER_MAGIC; rbuffer->user_buffer = NULL; - rbuffer->num_ranges = 0; rbuffer->r.base.b = *templ; pipe_reference_init(&rbuffer->r.base.b.reference, 1); rbuffer->r.base.b.screen = screen; rbuffer->r.base.vtbl = &r600_buffer_vtbl; rbuffer->r.size = rbuffer->r.base.b.width0; + rbuffer->r.bo_size = rbuffer->r.size; + rbuffer->uploaded = FALSE; bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage); if (bo == NULL) { FREE(rbuffer); @@ -89,10 +89,12 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, rbuffer->r.base.b.width0 = bytes; rbuffer->r.base.b.height0 = 1; rbuffer->r.base.b.depth0 = 1; + rbuffer->r.base.b.array_size = 1; rbuffer->r.base.b.flags = 0; - rbuffer->num_ranges = 0; rbuffer->r.bo = NULL; + rbuffer->r.bo_size = 0; rbuffer->user_buffer = ptr; + rbuffer->uploaded = FALSE; return &rbuffer->r.base.b; } @@ -104,6 +106,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, if (rbuffer->r.bo) { r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); } + rbuffer->r.bo = NULL; FREE(rbuffer); } @@ -113,29 +116,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); int write = 0; uint8_t *data; - int i; - boolean flush = FALSE; if (rbuffer->user_buffer) return (uint8_t*)rbuffer->user_buffer + transfer->box.x; - if (transfer->usage & PIPE_TRANSFER_DISCARD) { - for (i = 0; i < rbuffer->num_ranges; i++) { - if ((transfer->box.x >= rbuffer->ranges[i].start) && - (transfer->box.x < rbuffer->ranges[i].end)) - flush = TRUE; - - if (flush) { - r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL); - rbuffer->num_ranges = 0; - rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys, - rbuffer->r.base.b.width0, 0, - rbuffer->r.base.b.bind, - rbuffer->r.base.b.usage); - break; - } - } - } if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) { /* FIXME */ } @@ -154,41 +138,22 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + if (rbuffer->user_buffer) + return; + if (rbuffer->r.bo) r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); } static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) + struct pipe_transfer *transfer, + const struct pipe_box *box) { - struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - unsigned i; - unsigned offset = transfer->box.x + box->x; - unsigned length = box->width; - - assert(box->x + box->width <= transfer->box.width); - - if (rbuffer->user_buffer) - return; - - /* mark the range as used */ - for(i = 0; i < rbuffer->num_ranges; ++i) { - if(offset <= rbuffer->ranges[i].end && rbuffer->ranges[i].start <= (offset+box->width)) { - rbuffer->ranges[i].start = MIN2(rbuffer->ranges[i].start, offset); - rbuffer->ranges[i].end = MAX2(rbuffer->ranges[i].end, (offset+length)); - return; - } - } - - rbuffer->ranges[rbuffer->num_ranges].start = offset; - rbuffer->ranges[rbuffer->num_ranges].end = offset+length; - rbuffer->num_ranges++; } unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *buf, - unsigned face, unsigned level) + unsigned level, int layer) { /* FIXME */ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; @@ -235,29 +200,25 @@ struct u_resource_vtbl r600_buffer_vtbl = int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) { - struct pipe_resource *upload_buffer = NULL; - unsigned index_offset = draw->index_buffer_offset; - int ret = 0; - if (r600_buffer_is_user_buffer(draw->index_buffer)) { - ret = u_upload_buffer(rctx->upload_ib, - index_offset, - draw->count * draw->index_size, - draw->index_buffer, - &index_offset, - &upload_buffer); - if (ret) { - goto done; - } - draw->index_buffer_offset = index_offset; - - /* Transfer ownership. */ - pipe_resource_reference(&draw->index_buffer, upload_buffer); - pipe_resource_reference(&upload_buffer, NULL); + struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); + unsigned upload_offset; + int ret = 0; + + ret = r600_upload_buffer(rctx->rupload_vb, + draw->index_buffer_offset, + draw->count * draw->index_size, + rbuffer, + &upload_offset, + &rbuffer->r.bo_size, + &rbuffer->r.bo); + if (ret) + return ret; + rbuffer->uploaded = TRUE; + draw->index_buffer_offset = upload_offset; } -done: - return ret; + return 0; } int r600_upload_user_buffers(struct r600_pipe_context *rctx) @@ -266,25 +227,24 @@ int r600_upload_user_buffers(struct r600_pipe_context *rctx) int i, nr; nr = rctx->vertex_elements->count; + nr = rctx->nvertex_buffer; for (i = 0; i < nr; i++) { - struct pipe_vertex_buffer *vb = - &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index]; + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; if (r600_buffer_is_user_buffer(vb->buffer)) { - struct pipe_resource *upload_buffer = NULL; - unsigned offset = 0; /*vb->buffer_offset * 4;*/ - unsigned size = vb->buffer->width0; + struct r600_resource_buffer *rbuffer = r600_buffer(vb->buffer); unsigned upload_offset; - ret = u_upload_buffer(rctx->upload_vb, - offset, size, - vb->buffer, - &upload_offset, &upload_buffer); + + ret = r600_upload_buffer(rctx->rupload_vb, + 0, vb->buffer->width0, + rbuffer, + &upload_offset, + &rbuffer->r.bo_size, + &rbuffer->r.bo); if (ret) return ret; - - pipe_resource_reference(&vb->buffer, NULL); - vb->buffer = upload_buffer; + rbuffer->uploaded = TRUE; vb->buffer_offset = upload_offset; } } diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 4f9b39a7fdc..2ee0c83e5d3 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -385,8 +385,13 @@ #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_EXPORT_COMBINED 0x0000005B #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS 0x0000005C -#define BC_INST(bc, x) ((bc)->chiprev == 2 ? EG_##x : x) -#define CTX_INST(x) (ctx->bc->chiprev == 2 ? EG_##x : x) +#define CHIPREV_R600 0 +#define CHIPREV_R700 1 +#define CHIPREV_EVERGREEN 2 + +#define BC_INST(bc, x) ((bc)->chiprev == CHIPREV_EVERGREEN ? EG_##x : x) + +#define CTX_INST(x) (ctx->bc->chiprev == CHIPREV_EVERGREEN ? EG_##x : x) #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 2a113f0ea8d..72988b946e5 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -35,7 +35,6 @@ #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> -#include <util/u_upload_mgr.h> #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -59,9 +58,6 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, if (!rctx->ctx.pm4_cdwords) return; - u_upload_flush(rctx->upload_vb); - u_upload_flush(rctx->upload_ib); - #if 0 sprintf(dname, "gallium-%08d.bof", dc); if (dc < 20) { @@ -71,6 +67,8 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, dc++; #endif r600_context_flush(&rctx->ctx); + + r600_upload_flush(rctx->rupload_vb); } static void r600_destroy_context(struct pipe_context *context) @@ -79,6 +77,8 @@ static void r600_destroy_context(struct pipe_context *context) rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush); + r600_end_vertex_translate(rctx); + r600_context_fini(&rctx->ctx); util_blitter_destroy(rctx->blitter); @@ -87,8 +87,7 @@ static void r600_destroy_context(struct pipe_context *context) free(rctx->states[i]); } - u_upload_destroy(rctx->upload_vb); - u_upload_destroy(rctx->upload_ib); + r600_upload_destroy(rctx->rupload_vb); if (rctx->tran.translate_cache) translate_cache_destroy(rctx->tran.translate_cache); @@ -120,6 +119,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_init_blit_functions(rctx); r600_init_query_functions(rctx); r600_init_context_resource_functions(rctx); + r600_init_surface_functions(rctx); switch (r600_get_family(rctx->radeon)) { case CHIP_R600: @@ -147,6 +147,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_JUNIPER: case CHIP_CYPRESS: case CHIP_HEMLOCK: + case CHIP_PALM: rctx->context.draw_vbo = evergreen_draw; evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { @@ -161,16 +162,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - if (rctx->upload_ib == NULL) { - r600_destroy_context(&rctx->context); - return NULL; - } - - rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (rctx->upload_vb == NULL) { + rctx->rupload_vb = r600_upload_create(rctx, 128 * 1024, 16); + if (rctx->rupload_vb == NULL) { r600_destroy_context(&rctx->context); return NULL; } @@ -186,7 +179,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void FREE(rctx); return NULL; } - + rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); if (!rctx->vs_resource) { FREE(rctx); @@ -238,6 +231,7 @@ static const char *r600_get_family_name(enum radeon_family family) case CHIP_JUNIPER: return "AMD JUNIPER"; case CHIP_CYPRESS: return "AMD CYPRESS"; case CHIP_HEMLOCK: return "AMD HEMLOCK"; + case CHIP_PALM: return "AMD PALM"; default: return "AMD unknown"; } } @@ -252,6 +246,9 @@ static const char* r600_get_name(struct pipe_screen* pscreen) static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + enum radeon_family family = r600_get_family(rscreen->radeon); + switch (param) { /* Supported features (boolean caps). */ case PIPE_CAP_NPOT_TEXTURES: @@ -284,7 +281,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 14; + if (family >= CHIP_CEDAR) + return 15; + else + return 14; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: /* FIXME allow this once infrastructure is there */ return 16; @@ -313,12 +313,18 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) { + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + enum radeon_family family = r600_get_family(rscreen->radeon); + switch (param) { case PIPE_CAP_MAX_LINE_WIDTH: case PIPE_CAP_MAX_LINE_WIDTH_AA: case PIPE_CAP_MAX_POINT_WIDTH: case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 8192.0f; + if (family >= CHIP_CEDAR) + return 16384.0f; + else + return 8192.0f; case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: return 16.0f; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: @@ -375,6 +381,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; default: return 0; } @@ -403,10 +411,10 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, } if ((usage & (PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) && - r600_is_colorbuffer_format_supported(format)) { + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + r600_is_colorbuffer_format_supported(format)) { retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | @@ -463,7 +471,6 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) rscreen->screen.get_paramf = r600_get_paramf; rscreen->screen.is_format_supported = r600_is_format_supported; rscreen->screen.context_create = r600_create_context; - r600_init_screen_texture_functions(&rscreen->screen); r600_init_screen_resource_functions(&rscreen->screen); rscreen->tiling_info = r600_get_tiling_info(radeon); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 1c691f6b764..43dbee99b0f 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -53,6 +53,8 @@ enum r600_pipe_state_id { R600_PIPE_STATE_CONSTANT, R600_PIPE_STATE_SAMPLER, R600_PIPE_STATE_RESOURCE, + R600_PIPE_STATE_POLYGON_OFFSET, + R600_PIPE_STATE_FETCH_SHADER, R600_PIPE_NSTATES }; @@ -86,13 +88,22 @@ struct r600_vertex_element struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; unsigned hw_format_size[PIPE_MAX_ATTRIBS]; - boolean incompatible_layout; + boolean incompatible_layout; + struct r600_bo *fetch_shader; + unsigned fs_size; + struct r600_pipe_state rstate; + /* if offset is to big for fetch instructio we need to alterate + * offset of vertex buffer, record here the offset need to add + */ + unsigned vbuffer_need_offset; + unsigned vbuffer_offset[PIPE_MAX_ATTRIBS]; }; struct r600_pipe_shader { struct r600_shader shader; struct r600_pipe_state rstate; struct r600_bo *bo; + struct r600_bo *bo_fetch; struct r600_vertex_element vertex_elements; }; @@ -100,25 +111,28 @@ struct r600_pipe_shader { #define NUM_TEX_UNITS 16 struct r600_textures_info { - struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; - unsigned n_views; + struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; + unsigned n_views; void *samplers[NUM_TEX_UNITS]; - unsigned n_samplers; + unsigned n_samplers; }; +/* vertex buffer translation context, used to translate vertex input that + * hw doesn't natively support, so far only FLOAT64 is unsupported. + */ struct r600_translate_context { /* Translate cache for incompatible vertex offset/stride/format fallback. */ - struct translate_cache *translate_cache; - + struct translate_cache *translate_cache; /* The vertex buffer slot containing the translated buffer. */ - unsigned vb_slot; - /* Saved and new vertex element state. */ - void *saved_velems, *new_velems; + unsigned vb_slot; + void *new_velems; }; #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 +struct r600_upload; + struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; @@ -139,6 +153,7 @@ struct r600_pipe_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_clip_state clip; + unsigned nvs_resource; struct r600_pipe_state *vs_resource; struct r600_pipe_state *ps_resource; struct r600_pipe_state config; @@ -150,14 +165,11 @@ struct r600_pipe_context { /* shader information */ unsigned sprite_coord_enable; bool flatshade; - struct u_upload_mgr *upload_vb; - struct u_upload_mgr *upload_ib; + struct r600_upload *rupload_vb; unsigned any_user_vbs; - struct r600_textures_info ps_samplers; - - unsigned vb_max_index; - struct r600_translate_context tran; - + struct r600_textures_info ps_samplers; + unsigned vb_max_index; + struct r600_translate_context tran; }; struct r600_drawl { @@ -180,6 +192,8 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); +void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); +void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx); /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); @@ -193,7 +207,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, unsigned bind); unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *buf, - unsigned face, unsigned level); + unsigned level, int layer); struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); @@ -206,7 +220,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx); void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader); +int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, @@ -217,14 +231,20 @@ void r600_init_state_functions(struct r600_pipe_context *rctx); void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); void r600_init_config(struct r600_pipe_context *rctx); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); +void r600_polygon_offset_update(struct r600_pipe_context *rctx); +void r600_vertex_buffer_update(struct r600_pipe_context *rctx); + /* r600_helper.h */ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); /* r600_texture.c */ void r600_init_screen_texture_functions(struct pipe_screen *screen); +void r600_init_surface_functions(struct r600_pipe_context *r600); uint32_t r600_translate_texformat(enum pipe_format format, const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p); +unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, + unsigned level, unsigned layer); /* r600_translate.c */ void r600_begin_vertex_translate(struct r600_pipe_context *rctx); @@ -251,13 +271,13 @@ void r600_sampler_view_destroy(struct pipe_context *ctx, void r600_bind_state(struct pipe_context *ctx, void *state); void r600_delete_state(struct pipe_context *ctx, void *state); void r600_bind_vertex_elements(struct pipe_context *ctx, void *state); - void *r600_create_shader_state(struct pipe_context *ctx, const struct pipe_shader_state *state); void r600_bind_ps_shader(struct pipe_context *ctx, void *state); void r600_bind_vs_shader(struct pipe_context *ctx, void *state); void r600_delete_ps_shader(struct pipe_context *ctx, void *state); void r600_delete_vs_shader(struct pipe_context *ctx, void *state); + /* * common helpers */ diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 7a2d1f44122..8ca27699206 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -46,6 +46,7 @@ struct r600_resource { struct u_resource base; struct r600_bo *bo; u32 size; + unsigned bo_size; }; struct r600_resource_texture { @@ -61,7 +62,21 @@ struct r600_resource_texture { unsigned tile_type; unsigned depth; unsigned dirty; - struct r600_resource_texture *flushed_depth_texture; + struct r600_resource_texture *flushed_depth_texture; +}; + +#define R600_BUFFER_MAGIC 0xabcd1600 + +struct r600_resource_buffer { + struct r600_resource r; + uint32_t magic; + void *user_buffer; + bool uploaded; +}; + +struct r600_surface { + struct pipe_surface base; + unsigned aligned_height; }; void r600_init_screen_resource_functions(struct pipe_screen *screen); @@ -73,46 +88,30 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *base, struct winsys_handle *whandle); -#define R600_BUFFER_MAGIC 0xabcd1600 -#define R600_BUFFER_MAX_RANGES 32 - -struct r600_buffer_range { - uint32_t start; - uint32_t end; -}; - -struct r600_resource_buffer { - struct r600_resource r; - uint32_t magic; - void *user_buffer; - struct r600_buffer_range ranges[R600_BUFFER_MAX_RANGES]; - unsigned num_ranges; -}; - /* r600_buffer */ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buffer) { if (buffer) { assert(((struct r600_resource_buffer *)buffer)->magic == R600_BUFFER_MAGIC); return (struct r600_resource_buffer *)buffer; - } - return NULL; + } + return NULL; } static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) { - return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; + if (r600_buffer(buffer)->uploaded) + return FALSE; + return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; } -int r600_texture_depth_flush(struct pipe_context *ctx, - struct pipe_resource *texture); - -extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); +int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture); +int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); /* r600_texture.c texture transfer functions. */ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box); void r600_texture_transfer_destroy(struct pipe_context *ctx, @@ -122,9 +121,15 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); -struct r600_surface { - struct pipe_surface base; - unsigned aligned_height; -}; +struct r600_pipe_context; +struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx, + unsigned default_size, + unsigned alignment); +void r600_upload_flush(struct r600_upload *upload); +void r600_upload_destroy(struct r600_upload *upload); +int r600_upload_buffer(struct r600_upload *upload, unsigned offset, + unsigned size, struct r600_resource_buffer *in_buffer, + unsigned *out_offset, unsigned *out_size, + struct r600_bo **out_buffer); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f6153c0e80f..9c7b7f0a578 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -44,6 +44,9 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade rstate->nregs = 0; /* so far never got proper semantic id from tgsi */ + /* FIXME better to move this in config things so they get emited + * only one time per cs + */ for (i = 0; i < 10; i++) { spi_vs_out_id[i] = 0; } @@ -67,20 +70,11 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade S_028868_STACK_SIZE(rshader->bc.nstack), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288D0_SQ_PGM_CF_OFFSET_VS, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028858_SQ_PGM_START_VS, r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, - R_028894_SQ_PGM_START_FS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, @@ -104,37 +98,20 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; int pos_index = -1, face_index = -1; - /* clear previous register */ rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - if (rshader->input[i].centroid) - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - tmp |= S_028644_SEL_LINEAR(1); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) pos_index = i; - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); - } if (rshader->input[i].name == TGSI_SEMANTIC_FACE) face_index = i; - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } + for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) r600_pipe_state_add_reg(rstate, @@ -210,7 +187,7 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade 0xFFFFFFFF, NULL); } -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) +int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_shader *rshader = &shader->shader; @@ -227,7 +204,6 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s r600_bo_unmap(rctx->radeon, shader->bo); } /* build state */ - rshader->flat_shade = rctx->flatshade; switch (rshader->processor_type) { case TGSI_PROCESSOR_VERTEX: if (rshader->family >= CHIP_CEDAR) { @@ -246,75 +222,9 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s default: return -EINVAL; } - r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); return 0; } -static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_shader *shader = &rshader->shader; - const struct util_format_description *desc; - enum pipe_format resource_format[160]; - unsigned i, nresources = 0; - struct r600_bc *bc = &shader->bc; - struct r600_bc_cf *cf; - struct r600_bc_vtx *vtx; - - if (shader->processor_type != TGSI_PROCESSOR_VERTEX) - return 0; - /* doing a full memcmp fell over the refcount */ - if ((rshader->vertex_elements.count == rctx->vertex_elements->count) && - (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) { - return 0; - } - rshader->vertex_elements = *rctx->vertex_elements; - for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->hw_format[i]; - } - r600_bo_reference(rctx->radeon, &rshader->bo, NULL); - LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { - switch (cf->inst) { - case V_SQ_CF_WORD1_SQ_CF_INST_VTX: - case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - desc = util_format_description(resource_format[vtx->buffer_id]); - if (desc == NULL) { - R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); - return -EINVAL; - } - vtx->dst_sel_x = desc->swizzle[0]; - vtx->dst_sel_y = desc->swizzle[1]; - vtx->dst_sel_z = desc->swizzle[2]; - vtx->dst_sel_w = desc->swizzle[3]; - } - break; - default: - break; - } - } - return r600_bc_build(&shader->bc); -} - -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - int r; - - if (shader == NULL) - return -EINVAL; - /* there should be enough input */ - if (rctx->vertex_elements->count < shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, shader->shader.bc.nresource); - return -EINVAL; - } - r = r600_shader_update(ctx, shader); - if (r) - return r; - return r600_pipe_shader(ctx, shader); -} - int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) { @@ -334,20 +244,17 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s R600_ERR("building bytecode failed !\n"); return r; } +//r600_bc_dump(&shader->shader.bc); //fprintf(stderr, "______________________________________________________________\n"); - return 0; + return r600_pipe_shader(ctx, shader); } -void -r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) +void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_bo_reference(rctx->radeon, &shader->bo, NULL); - r600_bc_clear(&shader->shader.bc); - - /* FIXME: is there more stuff to free? */ } /* @@ -444,7 +351,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) if (ctx->shader->input[input].centroid) ij_index++; } - + /* work out gpr and base_chan from index */ gpr = ij_index / 2; base_chan = (2 * (ij_index % 2)) + 1; @@ -477,15 +384,13 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) return r; } return 0; -} - - +} + + static int tgsi_declaration(struct r600_shader_ctx *ctx) { struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; - struct r600_bc_vtx vtx; unsigned i; - int r; switch (d->Declaration.File) { case TGSI_FILE_INPUT: @@ -495,27 +400,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].interpolate = d->Declaration.Interpolate; ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; - if (ctx->type == TGSI_PROCESSOR_VERTEX) { - /* turn input into fetch */ - memset(&vtx, 0, sizeof(struct r600_bc_vtx)); - vtx.inst = 0; - vtx.fetch_type = 0; - vtx.buffer_id = i; - /* register containing the index into the buffer */ - vtx.src_gpr = 0; - vtx.src_sel_x = 0; - vtx.mega_fetch_count = 0x1F; - vtx.dst_gpr = ctx->shader->input[i].gpr; - vtx.dst_sel_x = 0; - vtx.dst_sel_y = 1; - vtx.dst_sel_z = 2; - vtx.dst_sel_w = 3; - vtx.use_const_fields = 1; - r = r600_bc_add_vtx(ctx->bc, &vtx); - if (r) - return r; - } - if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { /* turn input into interpolate on EG */ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { if (ctx->shader->input[i].interpolate > 0) { @@ -549,7 +434,7 @@ static int r600_get_temp(struct r600_shader_ctx *ctx) return ctx->temp_reg + ctx->max_driver_temp_used++; } -/* +/* * for evergreen we need to scan the shader to find the number of GPRs we need to * reserve for interpolation. * @@ -615,6 +500,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s tgsi_parse_init(&ctx.parse, tokens); ctx.type = ctx.parse.FullHeader.Processor.Processor; shader->processor_type = ctx.type; + ctx.bc->type = shader->processor_type; /* register allocations */ /* Values [0,127] correspond to GPR[0..127]. @@ -640,8 +526,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; + if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { + r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); + } else { + r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); + } } - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + @@ -687,7 +578,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s /* reserve first tmp for everyone */ r600_get_temp(&ctx); opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; - if (ctx.bc->chiprev == 2) + if (ctx.bc->chiprev == CHIPREV_EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; @@ -1001,7 +892,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) return r; - + alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { @@ -1046,7 +937,7 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) return tgsi_op2_s(ctx, 1); } -/* +/* * r600 - trunc to -PI..PI range * r700 - normalize by dividing by 2PI * see fdo bug 27901 @@ -1058,7 +949,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, int r; uint32_t lit_vals[4]; struct r600_bc_alu alu; - + memset(lit_vals, 0, 4*4); r = tgsi_split_constant(ctx, r600_src); if (r) @@ -1067,10 +958,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, if (r) return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); lit_vals[1] = fui(0.5f); @@ -1084,7 +971,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[0] = r600_src[0]; alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); - + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; @@ -1099,7 +986,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - + alu.dst.chan = 0; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1111,7 +998,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, if (r) return r; - if (ctx->bc->chiprev == 0) { + if (ctx->bc->chiprev == CHIPREV_R600) { lit_vals[0] = fui(3.1415926535897f * 2.0f); lit_vals[1] = fui(-3.1415926535897f); } else { @@ -1129,7 +1016,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; - + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; @@ -1564,7 +1451,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) return r; /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); if (r) return r; @@ -1908,10 +1795,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - + /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x - * muladd has no writemask, have to use another temp + * muladd has no writemask, have to use another temp */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); @@ -1921,7 +1808,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 2; - + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; @@ -1941,7 +1828,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].chan = 1; alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 2; - + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; @@ -1980,7 +1867,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } src_gpr = ctx->temp_reg; } - + opcode = ctx->inst_info->r600_opcode; if (opcode == SQ_TEX_INST_SAMPLE && (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) @@ -1990,8 +1877,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.inst = opcode; tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; tex.resource_id = tex.sampler_id; - if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX) - tex.resource_id += PIPE_MAX_ATTRIBS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; @@ -2026,7 +1911,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* add shadow ambient support - gallium doesn't do it yet */ return 0; - } static int tgsi_lrp(struct r600_shader_ctx *ctx) @@ -2156,7 +2040,7 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - } + } if (use_temp) return tgsi_helper_copy(ctx, inst); return 0; @@ -2342,7 +2226,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; } - + /* result.y = tmp - floor(tmp); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2621,16 +2505,25 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return tgsi_helper_copy(ctx, inst); } -/* r6/7 only for now */ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; + break; + case TGSI_OPCODE_ARR: + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + break; + default: + assert(0); + return -1; + } + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); if (r) return r; @@ -2663,18 +2556,18 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) int r; memset(&alu, 0, sizeof(struct r600_bc_alu)); - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; - break; - case TGSI_OPCODE_ARR: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; - break; - default: - assert(0); - return -1; - } - + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; + break; + case TGSI_OPCODE_ARR: + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; + break; + default: + assert(0); + return -1; + } + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); if (r) @@ -2703,8 +2596,8 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) return r; - - if (i == 0 || i == 3) { + + if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); @@ -2750,7 +2643,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; - + alu.last = 1; r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); @@ -2763,6 +2656,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops) { r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); ctx->bc->cf_last->pop_count = pops; + ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; return 0; } @@ -2804,7 +2698,7 @@ static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned r ctx->bc->callstack[ctx->bc->call_sp].current + diff; } return; - } + } switch (reason) { case FC_PUSH_VPM: ctx->bc->callstack[ctx->bc->call_sp].current++; @@ -2878,7 +2772,7 @@ static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_v static void emit_testflag(struct r600_shader_ctx *ctx) { - + } static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) @@ -3239,7 +3133,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index f8bc5951395..35b0331525a 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -38,7 +38,6 @@ struct r600_shader_io { struct r600_shader { unsigned processor_type; struct r600_bc bc; - boolean flat_shade; unsigned ninput; unsigned noutput; unsigned nlds; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 54cc79b1549..cd5f0792d5e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -36,7 +36,6 @@ #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> -#include <util/u_upload_mgr.h> #include <util/u_framebuffer.h> #include <pipebuffer/pb_buffer.h> #include "r600.h" @@ -46,14 +45,164 @@ #include "r600_pipe.h" #include "r600_state_inlines.h" +void r600_polygon_offset_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state state; + + state.id = R600_PIPE_STATE_POLYGON_OFFSET; + state.nregs = 0; + if (rctx->rasterizer && rctx->framebuffer.zsbuf) { + float offset_units = rctx->rasterizer->offset_units; + unsigned offset_db_fmt_cntl = 0, depth; + + switch (rctx->framebuffer.zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + return; + } + /* FIXME some of those reg can be computed with cso */ + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + r600_pipe_state_add_reg(&state, + R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, + fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, + fui(offset_units), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&state, + R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, + offset_db_fmt_cntl, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, &state); + } +} + +/* FIXME optimize away spi update when it's not needed */ +static void r600_spi_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_shader *shader = rctx->ps_shader; + struct r600_pipe_state rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp; + + rstate.nregs = 0; + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rctx->flatshade); + } + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &rstate); +} + +void r600_vertex_buffer_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + struct pipe_vertex_buffer *vertex_buffer; + unsigned i, offset; + + /* we don't update until we know vertex elements */ + if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) + return; + + if (rctx->vertex_elements->incompatible_layout) { + /* translate rebind new vertex elements so + * return once translated + */ + r600_begin_vertex_translate(rctx); + return; + } + + if (rctx->any_user_vbs) { + r600_upload_user_buffers(rctx); + rctx->any_user_vbs = FALSE; + } + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + rctx->nvs_resource = rctx->vertex_elements->count; + } else { + /* bind vertex buffer once */ + rctx->nvs_resource = rctx->nvertex_buffer; + } + + for (i = 0 ; i < rctx->nvs_resource; i++) { + rstate = &rctx->vs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + unsigned vbuffer_index; + vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->vbuffer_offset[i] + + vertex_buffer->buffer_offset + + r600_bo_offset(rbuffer->bo); + } else { + /* bind vertex buffer once */ + vertex_buffer = &rctx->vertex_buffer[i]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = vertex_buffer->buffer_offset + + r600_bo_offset(rbuffer->bo); + } + + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, + S_038008_STRIDE(vertex_buffer->stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, + 0xC0000000, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); + } +} + static void r600_draw_common(struct r600_drawl *draw) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; - struct r600_pipe_state *rstate; struct r600_resource *rbuffer; - unsigned i, j, offset, prim; + unsigned prim; u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct pipe_vertex_buffer *vertex_buffer; struct r600_draw rdraw; struct r600_pipe_state vgt; @@ -76,42 +225,23 @@ static void r600_draw_common(struct r600_drawl *draw) } if (r600_conv_pipe_prim(draw->mode, &prim)) return; - - - /* rebuild vertex shader if input format changed */ - if (r600_pipe_shader_update(&rctx->context, rctx->vs_shader)) + if (unlikely(rctx->ps_shader == NULL)) { + R600_ERR("missing vertex shader\n"); return; - if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader)) + } + if (unlikely(rctx->vs_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + /* there should be enough input */ + if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); return; - - for (i = 0 ; i < rctx->vertex_elements->count; i++) { - uint32_t word2, format; - - rstate = &rctx->vs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - j = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[j]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->elements[i].src_offset + - vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - - format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]); - - word2 = format | S_038008_STRIDE(vertex_buffer->stride); - - r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, word2, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, i); } + r600_spi_update(rctx); + mask = 0; for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { mask |= (0xF << (i * 4)); @@ -126,46 +256,6 @@ static void r600_draw_common(struct r600_drawl *draw) r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - /* build late state */ - if (rctx->rasterizer && rctx->framebuffer.zsbuf) { - float offset_units = rctx->rasterizer->offset_units; - unsigned offset_db_fmt_cntl = 0, depth; - - switch (rctx->framebuffer.zsbuf->texture->format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - depth = -24; - offset_units *= 2.0f; - break; - case PIPE_FORMAT_Z32_FLOAT: - depth = -23; - offset_units *= 1.0f; - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); - break; - case PIPE_FORMAT_Z16_UNORM: - depth = -16; - offset_units *= 4.0f; - break; - default: - return; - } - offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); - r600_pipe_state_add_reg(&vgt, - R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, - fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, - fui(offset_units), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, - R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - offset_db_fmt_cntl, 0xFFFFFFFF, NULL); - } r600_context_pipe_state_set(&rctx->ctx, &vgt); rdraw.vgt_num_indices = draw->count; @@ -185,17 +275,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_drawl draw; - boolean translate = FALSE; - if (rctx->vertex_elements->incompatible_layout) { - r600_begin_vertex_translate(rctx); - translate = TRUE; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } memset(&draw, 0, sizeof(struct r600_drawl)); draw.ctx = ctx; draw.mode = info->mode; @@ -226,9 +306,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } r600_draw_common(&draw); - if (translate) - r600_end_vertex_translate(rctx); - pipe_resource_reference(&draw.index_buffer, NULL); } @@ -296,7 +373,7 @@ static void *r600_create_blend_state(struct pipe_context *ctx, unsigned eqRGB = state->rt[i].rgb_func; unsigned srcRGB = state->rt[i].rgb_src_factor; unsigned dstRGB = state->rt[i].rgb_dst_factor; - + unsigned eqA = state->rt[i].alpha_func; unsigned srcA = state->rt[i].alpha_src_factor; unsigned dstA = state->rt[i].alpha_dst_factor; @@ -475,7 +552,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); - + r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), 0xFFFFFFFF, NULL); @@ -603,9 +680,9 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | - S_038010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); + S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, - S_038014_LAST_LEVEL(state->last_level) | + S_038014_LAST_LEVEL(state->u.tex.last_level) | S_038014_BASE_ARRAY(0) | S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, @@ -622,7 +699,7 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, for (int i = 0; i < count; i++) { if (resource[i]) { - r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); } } } @@ -692,16 +769,16 @@ static void r600_set_clip_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_CLIP; for (int i = 0; i < state->nr; i++) { r600_pipe_state_add_reg(rstate, - R_028E20_PA_CL_UCP0_X + i * 4, + R_028E20_PA_CL_UCP0_X + i * 16, fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_028E24_PA_CL_UCP0_Y + i * 4, + R_028E24_PA_CL_UCP0_Y + i * 16, fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_028E28_PA_CL_UCP0_Z + i * 4, + R_028E28_PA_CL_UCP0_Z + i * 16, fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_028E2C_PA_CL_UCP0_W + i * 4, + R_028E2C_PA_CL_UCP0_W + i * 16, fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); } r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, @@ -824,10 +901,11 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta struct r600_resource_texture *rtex; struct r600_resource *rbuffer; struct r600_surface *surf; - unsigned level = state->cbufs[cb]->level; + unsigned level = state->cbufs[cb]->u.tex.level; unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype; + unsigned offset; const struct util_format_description *desc; struct r600_bo *bo[3]; @@ -838,6 +916,9 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; + /* XXX quite sure for dx10+ hw don't need any offset hacks */ + offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, + level, state->cbufs[cb]->u.tex.first_layer); pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; ntype = 0; @@ -852,12 +933,12 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | S_0280A0_BLEND_CLAMP(1) | S_0280A0_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) color_info |= S_0280A0_SOURCE_FORMAT(1); r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, - (state->cbufs[cb]->offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); + (offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_0280A0_CB_COLOR0_INFO + cb * 4, color_info, 0xFFFFFFFF, bo[0]); @@ -888,11 +969,12 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta struct r600_surface *surf; unsigned level; unsigned pitch, slice, format; + unsigned offset; if (state->zsbuf == NULL) return; - level = state->zsbuf->level; + level = state->zsbuf->u.tex.level; surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; @@ -902,12 +984,15 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta rtex->depth = 1; rbuffer = &rtex->resource; + /* XXX quite sure for dx10+ hw don't need any offset hacks */ + offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, + level, state->zsbuf->u.tex.first_layer); pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, - (state->zsbuf->offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE, S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice), 0xFFFFFFFF, NULL); @@ -933,7 +1018,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_FRAMEBUFFER; util_copy_framebuffer_state(&rctx->framebuffer, state); - + rctx->pframebuffer = &rctx->framebuffer; /* build states */ @@ -1015,6 +1100,10 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; r600_context_pipe_state_set(&rctx->ctx, rstate); + + if (state->zsbuf) { + r600_polygon_offset_update(rctx); + } } static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 210420e823b..99b372caace 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -58,6 +58,12 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) rctx->states[rs->rstate.id] = &rs->rstate; r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); + + if (rctx->family >= CHIP_CEDAR) { + evergreen_polygon_offset_update(rctx); + } else { + r600_polygon_offset_update(rctx); + } } void r600_delete_rs_state(struct pipe_context *ctx, void *state) @@ -113,8 +119,23 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = (struct r600_vertex_element*)state; + /* delete previous translated vertex elements */ + if (rctx->tran.new_velems) { + r600_end_vertex_translate(rctx); + } + rctx->vertex_elements = v; if (v) { + rctx->states[v->rstate.id] = &v->rstate; + r600_context_pipe_state_set(&rctx->ctx, &v->rstate); + if (rctx->family >= CHIP_CEDAR) { + evergreen_vertex_buffer_update(rctx); + } else { + r600_vertex_buffer_update(rctx); + } + } + + if (v) { // rctx->vs_rebuild = TRUE; } } @@ -122,11 +143,16 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) void r600_delete_vertex_element(struct pipe_context *ctx, void *state) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_vertex_element *v = (struct r600_vertex_element*)state; - FREE(state); - + if (rctx->states[v->rstate.id] == &v->rstate) { + rctx->states[v->rstate.id] = NULL; + } if (rctx->vertex_elements == state) rctx->vertex_elements = NULL; + + r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL); + FREE(state); } @@ -166,16 +192,23 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, rctx->any_user_vbs = TRUE; pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + /* The stride of zero means we will be fetching only the first + * vertex, so don't care about max_index. */ + if (!vbo->stride) + continue; + if (vbo->max_index == ~0) { - if (!vbo->stride) - vbo->max_index = 1; - else - vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; } max_index = MIN2(vbo->max_index, max_index); } rctx->nvertex_buffer = count; rctx->vb_max_index = max_index; + if (rctx->family >= CHIP_CEDAR) { + evergreen_vertex_buffer_update(rctx); + } else { + r600_vertex_buffer_update(rctx); + } } @@ -186,9 +219,10 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *elements) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - int i; enum pipe_format *format; + int i; assert(count < 32); if (!v) @@ -202,19 +236,22 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, format = &v->hw_format[i]; switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); default:; } v->incompatible_layout = v->incompatible_layout || - v->elements[i].src_format != v->hw_format[i] || - v->elements[i].src_offset % 4 != 0; + v->elements[i].src_format != v->hw_format[i]; + + v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4); + } - v->hw_format_size[i] = - align(util_format_get_blocksize(v->hw_format[i]), 4); + if (r600_vertex_elements_build_fetch_shader(rctx, v)) { + FREE(v); + return NULL; } return v; @@ -239,6 +276,9 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state) /* TODO delete old shader */ rctx->ps_shader = (struct r600_pipe_shader *)state; + if (state) { + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_shader->rstate); + } } void r600_bind_vs_shader(struct pipe_context *ctx, void *state) @@ -247,6 +287,9 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state) /* TODO delete old shader */ rctx->vs_shader = (struct r600_pipe_shader *)state; + if (state) { + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_shader->rstate); + } } void r600_delete_ps_shader(struct pipe_context *ctx, void *state) diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 1be5b156d35..d994196e19d 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -282,6 +282,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) switch (format) { /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: + return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_R8_UNORM: @@ -304,6 +305,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_SWAP_STD; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index e719f7fb983..d4d9b07c0e8 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -45,14 +45,10 @@ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_t { struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; struct pipe_resource *texture = transfer->resource; - struct pipe_subresource subdst; - subdst.face = 0; - subdst.level = 0; ctx->resource_copy_region(ctx, rtransfer->staging_texture, - subdst, 0, 0, 0, texture, transfer->sr, - transfer->box.x, transfer->box.y, transfer->box.z, - transfer->box.width, transfer->box.height); + 0, 0, 0, 0, texture, transfer->level, + &transfer->box); } @@ -61,34 +57,32 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 { struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; struct pipe_resource *texture = transfer->resource; - struct pipe_subresource subsrc; - - subsrc.face = 0; - subsrc.level = 0; - ctx->resource_copy_region(ctx, texture, transfer->sr, + struct pipe_box sbox; + + sbox.x = sbox.y = sbox.z = 0; + sbox.width = transfer->box.width; + sbox.height = transfer->box.height; + /* XXX that might be wrong */ + sbox.depth = 1; + ctx->resource_copy_region(ctx, texture, transfer->level, transfer->box.x, transfer->box.y, transfer->box.z, - rtransfer->staging_texture, subsrc, - 0, 0, 0, - transfer->box.width, transfer->box.height); + rtransfer->staging_texture, + 0, &sbox); ctx->flush(ctx, 0, NULL); } -static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, - unsigned level, unsigned zslice, - unsigned face) +unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, + unsigned level, unsigned layer) { unsigned offset = rtex->offset[level]; switch (rtex->resource.base.b.target) { case PIPE_TEXTURE_3D: - assert(face == 0); - return offset + zslice * rtex->layer_size[level]; case PIPE_TEXTURE_CUBE: - assert(zslice == 0); - return offset + face * rtex->layer_size[level]; + return offset + layer * rtex->layer_size[level]; default: - assert(zslice == 0 && face == 0); + assert(layer == 0); return offset; } } @@ -109,11 +103,11 @@ static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, case V_038000_ARRAY_2D_TILED_THIN1: p_align = MAX2(rscreen->tiling_info->num_banks, (((rscreen->tiling_info->group_bytes / 8 / pixsize)) * - rscreen->tiling_info->num_banks)); + rscreen->tiling_info->num_banks)) * 8; break; - case 0: + case V_038000_ARRAY_LINEAR_GENERAL: default: - p_align = 64; + p_align = rscreen->tiling_info->group_bytes / pixsize; break; } return p_align; @@ -139,6 +133,29 @@ static unsigned r600_get_height_alignment(struct pipe_screen *screen, return h_align; } +static unsigned r600_get_base_alignment(struct pipe_screen *screen, + enum pipe_format format, + unsigned array_mode) +{ + struct r600_screen* rscreen = (struct r600_screen *)screen; + unsigned pixsize = util_format_get_blocksize(format); + int p_align = r600_get_pixel_alignment(screen, format, array_mode); + int h_align = r600_get_height_alignment(screen, array_mode); + int b_align; + + switch (array_mode) { + case V_038000_ARRAY_2D_TILED_THIN1: + b_align = MAX2(rscreen->tiling_info->num_banks * rscreen->tiling_info->num_channels * 8 * 8 * pixsize, + p_align * pixsize * h_align); + break; + case V_038000_ARRAY_1D_TILED_THIN1: + default: + b_align = rscreen->tiling_info->group_bytes; + break; + } + return b_align; +} + static unsigned mip_minify(unsigned size, unsigned level) { unsigned val; @@ -153,10 +170,8 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen, unsigned level) { struct pipe_resource *ptex = &rtex->resource.base.b; - struct radeon *radeon = (struct radeon *)screen->winsys; - enum chip_class chipc = r600_get_family_class(radeon); unsigned width, stride, tile_width; - + if (rtex->pitch_override) return rtex->pitch_override; @@ -167,11 +182,6 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen, width = align(width, tile_width); } stride = util_format_get_stride(ptex->format, width); - if (chipc == EVERGREEN) - stride = align(stride, 512); - - if (ptex->bind & PIPE_BIND_RENDER_TARGET) - stride = align(stride, 512); return stride; } @@ -193,11 +203,10 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, } /* Get a width in pixels from a stride in bytes. */ -static unsigned pitch_to_width(enum pipe_format format, - unsigned pitch_in_bytes) +static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes) { - return (pitch_in_bytes / util_format_get_blocksize(format)) * - util_format_get_blockwidth(format); + return (pitch_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); } static void r600_texture_set_array_mode(struct pipe_screen *screen, @@ -257,6 +266,9 @@ static void r600_setup_miptree(struct pipe_screen *screen, } else size = layer_size * u_minify(ptex->depth0, i); + /* align base image and start of miptree */ + if ((i == 0) || (i == 1)) + offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode)); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; rtex->pitch_in_bytes[i] = pitch; @@ -297,7 +309,10 @@ r600_texture_create_object(struct pipe_screen *screen, resource->size = rtex->size; if (!resource->bo) { - resource->bo = r600_bo(radeon, rtex->size, 4096, base->bind, base->usage); + struct pipe_resource *ptex = &rtex->resource.base.b; + int base_align = r600_get_base_alignment(screen, ptex->format, array_mode); + + resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage); if (!resource->bo) { FREE(rtex); return NULL; @@ -310,12 +325,12 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ) { unsigned array_mode = 0; - static int force_tiling = -1; + static int force_tiling = -1; - /* Would like some magic "get_bool_option_once" routine. + /* Would like some magic "get_bool_option_once" routine. */ if (force_tiling == -1) - force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE); + force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE); if (force_tiling) { if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && @@ -346,8 +361,8 @@ static void r600_texture_destroy(struct pipe_screen *screen, } static boolean r600_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *ptex, - struct winsys_handle *whandle) + struct pipe_resource *ptex, + struct winsys_handle *whandle) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; struct r600_resource *resource = &rtex->resource; @@ -357,36 +372,39 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, rtex->pitch_in_bytes[0], whandle); } -static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, +static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, struct pipe_resource *texture, - unsigned face, unsigned level, - unsigned zslice, unsigned flags) + const struct pipe_surface *surf_tmpl) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct r600_surface *surface = CALLOC_STRUCT(r600_surface); - unsigned offset, tile_height; + unsigned tile_height; + unsigned level = surf_tmpl->u.tex.level; + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); if (surface == NULL) return NULL; - offset = r600_texture_get_offset(rtex, level, zslice, face); + /* XXX no offset */ +/* offset = r600_texture_get_offset(rtex, level, surf_tmpl->u.tex.first_layer);*/ pipe_reference_init(&surface->base.reference, 1); pipe_resource_reference(&surface->base.texture, texture); - surface->base.format = texture->format; + surface->base.context = pipe; + surface->base.format = surf_tmpl->format; surface->base.width = mip_minify(texture->width0, level); surface->base.height = mip_minify(texture->height0, level); - surface->base.offset = offset; - surface->base.usage = flags; - surface->base.zslice = zslice; + surface->base.usage = surf_tmpl->usage; surface->base.texture = texture; - surface->base.face = face; - surface->base.level = level; + surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; + surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; + surface->base.u.tex.level = level; - tile_height = r600_get_height_alignment(screen, rtex->array_mode[level]); + tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]); surface->aligned_height = align(surface->base.height, tile_height); return &surface->base; } -static void r600_tex_surface_destroy(struct pipe_surface *surface) +static void r600_surface_destroy(struct pipe_context *pipe, + struct pipe_surface *surface) { pipe_resource_reference(&surface->texture, NULL); FREE(surface); @@ -419,7 +437,7 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, static unsigned int r600_texture_is_referenced(struct pipe_context *context, struct pipe_resource *texture, - unsigned face, unsigned level) + unsigned level, int layer) { /* FIXME */ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; @@ -466,7 +484,7 @@ out: */ static INLINE unsigned u_box_volume( const struct pipe_box *box ) { - return box->width * box->depth * box->height; + return box->width * box->depth * box->height; }; @@ -474,44 +492,44 @@ static INLINE unsigned u_box_volume( const struct pipe_box *box ) * If so, don't use a staging resource. */ static boolean permit_hardware_blit(struct pipe_screen *screen, - struct pipe_resource *res) + struct pipe_resource *res) { - unsigned bind; + unsigned bind; - if (util_format_is_depth_or_stencil(res->format)) - bind = PIPE_BIND_DEPTH_STENCIL; - else - bind = PIPE_BIND_RENDER_TARGET; + if (util_format_is_depth_or_stencil(res->format)) + bind = PIPE_BIND_DEPTH_STENCIL; + else + bind = PIPE_BIND_RENDER_TARGET; /* See r600_resource_copy_region: there is something wrong - * with depth resource copies at the moment so avoid them for - * now. - */ + * with depth resource copies at the moment so avoid them for + * now. + */ if (util_format_get_component_bits(res->format, - UTIL_FORMAT_COLORSPACE_ZS, - 0) != 0) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - bind, 0)) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - PIPE_BIND_SAMPLER_VIEW, 0)) - return FALSE; - - return TRUE; + UTIL_FORMAT_COLORSPACE_ZS, + 0) != 0) + return FALSE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + bind, 0)) + return FALSE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + PIPE_BIND_SAMPLER_VIEW, 0)) + return FALSE; + + return TRUE; } struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box) { @@ -531,37 +549,36 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, if (rtex->tiled) use_staging_texture = TRUE; - if ((usage & PIPE_TRANSFER_READ) && - u_box_volume(box) > 1024) - use_staging_texture = TRUE; - - /* XXX: Use a staging texture for uploads if the underlying BO - * is busy. No interface for checking that currently? so do - * it eagerly whenever the transfer doesn't require a readback - * and might block. - */ - if ((usage & PIPE_TRANSFER_WRITE) && - !(usage & (PIPE_TRANSFER_READ | - PIPE_TRANSFER_DONTBLOCK | - PIPE_TRANSFER_UNSYNCHRONIZED))) - use_staging_texture = TRUE; - - if (!permit_hardware_blit(ctx->screen, texture) || - (texture->flags & R600_RESOURCE_FLAG_TRANSFER)) - use_staging_texture = FALSE; + if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024) + use_staging_texture = TRUE; + + /* XXX: Use a staging texture for uploads if the underlying BO + * is busy. No interface for checking that currently? so do + * it eagerly whenever the transfer doesn't require a readback + * and might block. + */ + if ((usage & PIPE_TRANSFER_WRITE) && + !(usage & (PIPE_TRANSFER_READ | + PIPE_TRANSFER_DONTBLOCK | + PIPE_TRANSFER_UNSYNCHRONIZED))) + use_staging_texture = TRUE; + + if (!permit_hardware_blit(ctx->screen, texture) || + (texture->flags & R600_RESOURCE_FLAG_TRANSFER)) + use_staging_texture = FALSE; trans = CALLOC_STRUCT(r600_transfer); if (trans == NULL) return NULL; pipe_resource_reference(&trans->transfer.resource, texture); - trans->transfer.sr = sr; + trans->transfer.level = level; trans->transfer.usage = usage; trans->transfer.box = *box; if (rtex->depth) { - /* XXX: only readback the rectangle which is being mapped? - */ - /* XXX: when discard is true, no need to read back from depth texture - */ + /* XXX: only readback the rectangle which is being mapped? + */ + /* XXX: when discard is true, no need to read back from depth texture + */ r = r600_texture_depth_flush(ctx, texture); if (r < 0) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); @@ -575,6 +592,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, resource.width0 = box->width; resource.height0 = box->height; resource.depth0 = 1; + resource.array_size = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_STAGING; @@ -600,7 +618,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, } trans->transfer.stride = - ((struct r600_resource_texture *)trans->staging_texture)->pitch_in_bytes[0]; + ((struct r600_resource_texture *)trans->staging_texture)->pitch_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); /* Always referenced in the blit. */ @@ -608,8 +626,8 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, } return &trans->transfer; } - trans->transfer.stride = rtex->pitch_in_bytes[sr.level]; - trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); + trans->transfer.stride = rtex->pitch_in_bytes[level]; + trans->offset = r600_texture_get_offset(rtex, level, box->z); return &trans->transfer; } @@ -722,10 +740,10 @@ struct u_resource_vtbl r600_texture_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -void r600_init_screen_texture_functions(struct pipe_screen *screen) +void r600_init_surface_functions(struct r600_pipe_context *r600) { - screen->get_tex_surface = r600_get_tex_surface; - screen->tex_surface_destroy = r600_tex_surface_destroy; + r600->context.create_surface = r600_create_surface; + r600->context.surface_destroy = r600_surface_destroy; } static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, @@ -826,8 +844,8 @@ uint32_t r600_translate_texformat(enum pipe_format format, case UTIL_FORMAT_COLORSPACE_YUV: yuv_format |= (1 << 30); switch (format) { - case PIPE_FORMAT_UYVY: - case PIPE_FORMAT_YUYV: + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: default: break; } @@ -845,29 +863,29 @@ uint32_t r600_translate_texformat(enum pipe_format format, /* S3TC formats. TODO */ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { - static int r600_enable_s3tc = -1; + static int r600_enable_s3tc = -1; - if (r600_enable_s3tc == -1) - r600_enable_s3tc = - debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + if (r600_enable_s3tc == -1) + r600_enable_s3tc = + debug_get_bool_option("R600_ENABLE_S3TC", FALSE); - if (!r600_enable_s3tc) - goto out_unknown; + if (!r600_enable_s3tc) + goto out_unknown; switch (format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: - result = FMT_BC1; - goto out_word4; + result = FMT_BC1; + goto out_word4; case PIPE_FORMAT_DXT3_RGBA: - result = FMT_BC2; - goto out_word4; + result = FMT_BC2; + goto out_word4; case PIPE_FORMAT_DXT5_RGBA: - result = FMT_BC3; - goto out_word4; - default: - goto out_unknown; - } + result = FMT_BC3; + goto out_word4; + default: + goto out_unknown; + } } diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index 9a07cf2073f..ba12eee2b56 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -41,6 +41,8 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; struct pipe_resource *out_buffer; unsigned i, num_verts; + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + void *tmp; /* Initialize the translate key, i.e. the recipe how vertices should be * translated. */ @@ -51,9 +53,7 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) unsigned output_format_size = ve->hw_format_size[i]; /* Check for support. */ - if (ve->elements[i].src_format == ve->hw_format[i] && - (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 && - vb->stride % 4 == 0) { + if (ve->elements[i].src_format == ve->hw_format[i]) { continue; } @@ -147,29 +147,23 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) } /* Save and replace vertex elements. */ - { - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - - rctx->tran.saved_velems = rctx->vertex_elements; - - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->elements[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->elements[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->elements[i], - sizeof(struct pipe_vertex_element)); - } + for (i = 0; i < ve->count; i++) { + if (vb_translated[ve->elements[i].vertex_buffer_index]) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = ve->elements[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; + } else { + memcpy(&new_velems[i], &ve->elements[i], + sizeof(struct pipe_vertex_element)); } - - rctx->tran.new_velems = - pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); } + tmp = pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->bind_vertex_elements_state(pipe, tmp); + rctx->tran.new_velems = tmp; + pipe_resource_reference(&out_buffer, NULL); } @@ -177,13 +171,15 @@ void r600_end_vertex_translate(struct r600_pipe_context *rctx) { struct pipe_context *pipe = &rctx->context; + if (rctx->tran.new_velems == NULL) { + return; + } /* Restore vertex elements. */ - pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); + rctx->tran.new_velems = NULL; /* Delete the now-unused VBO. */ - pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, - NULL); + pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, NULL); } void r600_translate_index_buffer(struct r600_pipe_context *r600, @@ -197,14 +193,7 @@ void r600_translate_index_buffer(struct r600_pipe_context *r600, *index_size = 2; *start = 0; break; - case 2: - if (*start % 2 != 0) { - util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); - *start = 0; - } - break; - case 4: break; } diff --git a/src/gallium/drivers/r600/r600_upload.c b/src/gallium/drivers/r600/r600_upload.c new file mode 100644 index 00000000000..44102ff55b6 --- /dev/null +++ b/src/gallium/drivers/r600/r600_upload.c @@ -0,0 +1,114 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse <[email protected]> + */ +#include <errno.h> +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "r600.h" +#include "r600_pipe.h" +#include "r600_resource.h" + +struct r600_upload { + struct r600_pipe_context *rctx; + struct r600_bo *buffer; + char *ptr; + unsigned size; + unsigned default_size; + unsigned total_alloc_size; + unsigned offset; + unsigned alignment; +}; + +struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx, + unsigned default_size, + unsigned alignment) +{ + struct r600_upload *upload = CALLOC_STRUCT(r600_upload); + + if (upload == NULL) + return NULL; + + upload->rctx = rctx; + upload->size = 0; + upload->default_size = default_size; + upload->alignment = alignment; + upload->ptr = NULL; + upload->buffer = NULL; + upload->total_alloc_size = 0; + + return upload; +} + +void r600_upload_flush(struct r600_upload *upload) +{ + if (upload->buffer) { + r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL); + } + upload->default_size = MAX2(upload->total_alloc_size, upload->default_size); + upload->total_alloc_size = 0; + upload->size = 0; + upload->offset = 0; + upload->ptr = NULL; + upload->buffer = NULL; +} + +void r600_upload_destroy(struct r600_upload *upload) +{ + r600_upload_flush(upload); + FREE(upload); +} + +int r600_upload_buffer(struct r600_upload *upload, unsigned offset, + unsigned size, struct r600_resource_buffer *in_buffer, + unsigned *out_offset, unsigned *out_size, + struct r600_bo **out_buffer) +{ + unsigned alloc_size = align(size, upload->alignment); + const void *in_ptr = NULL; + + if (upload->offset + alloc_size > upload->size) { + if (upload->size) { + r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL); + } + upload->size = align(MAX2(upload->default_size, alloc_size), 4096); + upload->total_alloc_size += upload->size; + upload->offset = 0; + upload->buffer = r600_bo(upload->rctx->radeon, upload->size, 4096, PIPE_BIND_VERTEX_BUFFER, 0); + if (upload->buffer == NULL) { + return -ENOMEM; + } + upload->ptr = r600_bo_map(upload->rctx->radeon, upload->buffer, 0, NULL); + } + + in_ptr = in_buffer->user_buffer; + memcpy(upload->ptr + upload->offset, (uint8_t *) in_ptr + offset, size); + *out_offset = upload->offset; + *out_size = upload->size; + *out_buffer = NULL; + r600_bo_reference(upload->rctx->radeon, out_buffer, upload->buffer); + upload->offset += alloc_size; + + return 0; +} |