diff options
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/eg_asm.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 71 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 154 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_buffer.c | 26 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_resource.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_sq.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_texture.c | 4 |
14 files changed, 294 insertions, 42 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 1bb4c6b2afb..1881e633d54 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -39,15 +39,15 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): assert(!end_of_program); bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | - S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | - S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) | - S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | + S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) | + S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX: diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index c6f3669c9a3..94eef77945b 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -842,6 +842,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer = (struct r600_resource*)buffer; + uint32_t offset; /* Note that the state tracker can unbind constant buffers by * passing NULL here. @@ -850,6 +851,8 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, return; } + r600_upload_const_buffer(rctx, buffer, &offset); + switch (shader) { case PIPE_SHADER_VERTEX: rctx->vs_const_buffer.nregs = 0; @@ -859,7 +862,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); break; case PIPE_SHADER_FRAGMENT: @@ -870,7 +873,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); break; default: @@ -1067,12 +1070,76 @@ void evergreen_init_config(struct r600_pipe_context *rctx) num_hs_stack_entries = 42; num_ls_stack_entries = 42; break; + case CHIP_BARTS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_TURKS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_CAICOS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 10; + num_gs_threads = 10; + num_es_threads = 10; + num_hs_threads = 10; + num_ls_threads = 10; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; } tmp = 0x00000000; switch (family) { case CHIP_CEDAR: case CHIP_PALM: + case CHIP_CAICOS: break; default: tmp |= S_008C00_VC_ENABLE(1); diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 85e29665053..335f282b06f 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -35,7 +35,7 @@ #define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) #define R600_ERR(fmt, args...) \ - fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) + fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args) typedef uint64_t u64; typedef uint32_t u32; @@ -92,6 +92,9 @@ enum radeon_family { CHIP_CYPRESS, CHIP_HEMLOCK, CHIP_PALM, + CHIP_BARTS, + CHIP_TURKS, + CHIP_CAICOS, CHIP_LAST, }; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index bee1c941e5d..b15758adc33 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -161,6 +161,9 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: bc->chiprev = CHIPREV_EVERGREEN; break; default: @@ -473,9 +476,20 @@ static int is_cfile(unsigned sel) return (sel > 255 && sel < 512); } +/* CB constants start at 512, and get translated to a kcache index when ALU + * clauses are constructed. Note that we handle kcache constants the same way + * as (the now gone) cfile constants, is that really required? */ +static int is_cb_const(int sel) +{ + if (sel > 511 && sel < 4607) + return 1; + return 0; +} + static int is_const(int sel) { return is_cfile(sel) || + is_cb_const(sel) || (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL); } @@ -837,6 +851,120 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], s return 0; } +/* This code handles kcache lines as single blocks of 32 constants. We could + * probably do slightly better by recognizing that we actually have two + * consecutive lines of 16 constants, but the resulting code would also be + * somewhat more complicated. */ +static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type) +{ + struct r600_bc_kcache *kcache = bc->cf_last->kcache; + unsigned int required_lines; + unsigned int free_lines = 0; + unsigned int cache_line[3]; + unsigned int count = 0; + unsigned int i, j; + int r; + + /* Collect required cache lines. */ + for (i = 0; i < 3; ++i) { + bool found = false; + unsigned int line; + + if (alu->src[i].sel < 512) + continue; + + line = ((alu->src[i].sel - 512) / 32) * 2; + + for (j = 0; j < count; ++j) { + if (cache_line[j] == line) { + found = true; + break; + } + } + + if (!found) + cache_line[count++] = line; + } + + /* This should never actually happen. */ + if (count >= 3) return -ENOMEM; + + for (i = 0; i < 2; ++i) { + if (kcache[i].mode == V_SQ_CF_KCACHE_NOP) { + ++free_lines; + } + } + + /* Filter lines pulled in by previous intructions. Note that this is + * only for the required_lines count, we can't remove these from the + * cache_line array since we may have to start a new ALU clause. */ + for (i = 0, required_lines = count; i < count; ++i) { + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == cache_line[i]) { + --required_lines; + break; + } + } + } + + /* Start a new ALU clause if needed. */ + if (required_lines > free_lines) { + if ((r = r600_bc_add_cf(bc))) { + return r; + } + bc->cf_last->inst = (type << 3); + kcache = bc->cf_last->kcache; + } + + /* Setup the kcache lines. */ + for (i = 0; i < count; ++i) { + bool found = false; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == cache_line[i]) { + found = true; + break; + } + } + + if (found) continue; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_NOP) { + kcache[j].bank = 0; + kcache[j].addr = cache_line[i]; + kcache[j].mode = V_SQ_CF_KCACHE_LOCK_2; + break; + } + } + } + + /* Alter the src operands to refer to the kcache. */ + for (i = 0; i < 3; ++i) { + static const unsigned int base[] = {128, 160, 256, 288}; + unsigned int line; + + if (alu->src[i].sel < 512) + continue; + + alu->src[i].sel -= 512; + line = (alu->src[i].sel / 32) * 2; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == line) { + alu->src[i].sel &= 0x1f; + alu->src[i].sel += base[j]; + break; + } + } + } + + return 0; +} + int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type) { struct r600_bc_alu *nalu = r600_bc_alu(); @@ -870,12 +998,20 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int } } bc->cf_last->inst = (type << 3); + + /* Setup the kcache for this ALU instruction. This will start a new + * ALU clause if needed. */ + if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) { + free(nalu); + return r; + } + if (!bc->cf_last->curr_bs_head) { bc->cf_last->curr_bs_head = nalu; } /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots) * worst case */ - if (alu->last && (bc->cf_last->ndw >> 1) >= 120) { + if (nalu->last && (bc->cf_last->ndw >> 1) >= 120) { bc->force_add_cf = 1; } /* replace special constants */ @@ -890,10 +1026,8 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int bc->cf_last->ndw += 2; bc->ndw += 2; - bc->cf_last->kcache0_mode = 2; - /* process cur ALU instructions for bank swizzle */ - if (alu->last) { + if (nalu->last) { struct r600_bc_alu *slots[5]; r = assign_alu_units(bc->cf_last->curr_bs_head, slots); if (r) @@ -1182,14 +1316,14 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case CF_CLASS_ALU: assert(!end_of_program); bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | - S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) | S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 6059e45737f..519245f3af2 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -117,6 +117,12 @@ struct r600_bc_output { unsigned burst_count; }; +struct r600_bc_kcache { + unsigned bank; + unsigned mode; + unsigned addr; +}; + struct r600_bc_cf { struct list_head list; unsigned inst; @@ -127,12 +133,7 @@ struct r600_bc_cf { unsigned pop_count; unsigned cf_addr; /* control flow addr */ unsigned barrier; - unsigned kcache0_mode; - unsigned kcache1_mode; - unsigned kcache0_addr; - unsigned kcache1_addr; - unsigned kcache0_bank; - unsigned kcache1_bank; + struct r600_bc_kcache kcache[2]; unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 0f04136fb2a..b9ec9592e35 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -78,7 +78,7 @@ static void r600_blitter_end(struct pipe_context *ctx) r600_context_queries_resume(&rctx->ctx); } -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct pipe_surface *zsurf, *cbsurf, surf_tmpl; @@ -107,9 +107,6 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); - - - return 0; } static void r600_clear(struct pipe_context *ctx, unsigned buffers, diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index a17c54d6eeb..469c8195fe9 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -250,3 +250,29 @@ int r600_upload_user_buffers(struct r600_pipe_context *rctx) } return ret; } + + +int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, + uint32_t *const_offset) +{ + if (r600_buffer_is_user_buffer(cbuffer)) { + struct r600_resource_buffer *rbuffer = r600_buffer(cbuffer); + unsigned upload_offset; + int ret = 0; + + ret = r600_upload_buffer(rctx->rupload_const, + 0, cbuffer->width0, + rbuffer, + &upload_offset, + &rbuffer->r.bo_size, + &rbuffer->r.bo); + if (ret) + return ret; + rbuffer->uploaded = TRUE; + *const_offset = upload_offset; + return 0; + } + + *const_offset = 0; + return 0; +} diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 69cb5f7751f..ad14dbe14f4 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -70,6 +70,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, r600_context_flush(&rctx->ctx); r600_upload_flush(rctx->rupload_vb); + r600_upload_flush(rctx->rupload_const); } static void r600_destroy_context(struct pipe_context *context) @@ -89,6 +90,7 @@ static void r600_destroy_context(struct pipe_context *context) } r600_upload_destroy(rctx->rupload_vb); + r600_upload_destroy(rctx->rupload_const); if (rctx->tran.translate_cache) translate_cache_destroy(rctx->tran.translate_cache); @@ -149,6 +151,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: rctx->context.draw_vbo = evergreen_draw; evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { @@ -169,6 +174,12 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } + rctx->rupload_const = r600_upload_create(rctx, 128 * 1024, 256); + if (rctx->rupload_const == NULL) { + r600_destroy_context(&rctx->context); + return NULL; + } + rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) { FREE(rctx); @@ -199,8 +210,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void else rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); - r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth; - return &rctx->context; } @@ -233,6 +242,9 @@ static const char *r600_get_family_name(enum radeon_family family) case CHIP_CYPRESS: return "AMD CYPRESS"; case CHIP_HEMLOCK: return "AMD HEMLOCK"; case CHIP_PALM: return "AMD PALM"; + case CHIP_BARTS: return "AMD BARTS"; + case CHIP_TURKS: return "AMD TURKS"; + case CHIP_CAICOS: return "AMD CAICOS"; default: return "AMD unknown"; } } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index d9c35a44f18..2112a40f696 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -169,6 +169,7 @@ struct r600_pipe_context { struct r600_textures_info ps_samplers; unsigned vb_max_index; struct r600_translate_context tran; + struct r600_upload *rupload_const; }; struct r600_drawl { @@ -196,7 +197,7 @@ void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx); /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); /* r600_buffer.c */ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 8ca27699206..28b3e1e5e40 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -106,7 +106,6 @@ static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) } int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture); -int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); /* r600_texture.c texture transfer functions. */ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, @@ -132,4 +131,5 @@ int r600_upload_buffer(struct r600_upload *upload, unsigned offset, unsigned *out_offset, unsigned *out_size, struct r600_bo **out_buffer); +int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, uint32_t *offset); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d78e249ae95..95367d7c536 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -507,7 +507,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s /* Values [0,127] correspond to GPR[0..127]. * Values [128,159] correspond to constant buffer bank 0 * Values [160,191] correspond to constant buffer bank 1 - * Values [256,511] correspond to cfile constants c[0..255]. + * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) + * Values [256,287] correspond to constant buffer bank 2 (EG) + * Values [288,319] correspond to constant buffer bank 3 (EG) * Other special values are shown in the list below. * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) @@ -541,7 +543,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + ctx.info.file_count[TGSI_FILE_OUTPUT]; - ctx.file_offset[TGSI_FILE_CONSTANT] = 128; + /* Outside the GPR range. This will be translated to one of the + * kcache banks later. */ + ctx.file_offset[TGSI_FILE_CONSTANT] = 512; ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + @@ -587,6 +591,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (r) goto out_err; break; + case TGSI_TOKEN_TYPE_PROPERTY: + break; default: R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); r = -EINVAL; diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 2401d47e2a2..56ed35e8b32 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -74,6 +74,10 @@ #define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30) #define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3) #define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF +#define V_SQ_CF_KCACHE_NOP 0x00000000 +#define V_SQ_CF_KCACHE_LOCK_1 0x00000001 +#define V_SQ_CF_KCACHE_LOCK_2 0x00000002 +#define V_SQ_CF_KCACHE_LOCK_LOOP_INDEX 0x00000003 #define P_SQ_CF_ALU_WORD1 #define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0) #define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 8b001e11f4a..9572ff9a1a2 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1120,6 +1120,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer = (struct r600_resource*)buffer; + uint32_t offset; /* Note that the state tracker can unbind constant buffers by * passing NULL here. @@ -1128,6 +1129,8 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint return; } + r600_upload_const_buffer(rctx, buffer, &offset); + switch (shader) { case PIPE_SHADER_VERTEX: rctx->vs_const_buffer.nregs = 0; @@ -1137,7 +1140,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); break; case PIPE_SHADER_FRAGMENT: @@ -1148,7 +1151,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); break; default: diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 23e239c0068..6add92e6d4c 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -443,8 +443,6 @@ static unsigned int r600_texture_is_referenced(struct pipe_context *context, return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } -int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); - int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture) { @@ -476,7 +474,7 @@ int r600_texture_depth_flush(struct pipe_context *ctx, out: /* XXX: only do this if the depth texture has actually changed: */ - r600_blit_uncompress_depth_ptr(ctx, rtex); + r600_blit_uncompress_depth(ctx, rtex); return 0; } |