diff options
author | Christian König <[email protected]> | 2010-11-21 19:40:52 +0100 |
---|---|---|
committer | Christian König <[email protected]> | 2010-11-21 19:40:52 +0100 |
commit | e6b71530daea3059ee362d4df51575e27e026b22 (patch) | |
tree | 8df01693aa4bf44360647d79a340cbb4156aa91a /src/gallium/drivers/r600 | |
parent | 42c7291d2cb50c2bd94dd9346a8402a24303d66d (diff) | |
parent | 5e3733fadf08178fca7c9f20a0f4783f940383aa (diff) |
Merge remote branch 'origin/master' into pipe-video
Conflicts:
src/gallium/auxiliary/Makefile
src/gallium/auxiliary/SConscript
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/eg_asm.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/eg_state_inlines.h | 41 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 31 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 74 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_buffer.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_opcodes.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 146 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_texture.c | 45 |
16 files changed, 256 insertions, 163 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index c30f09c394b..21d66fa9564 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -74,6 +74,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | S_SQ_CF_WORD1_BARRIER(1) | diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 59641976403..698299ec134 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -311,6 +311,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_028C70_SWAP_STD; + case PIPE_FORMAT_L8A8_UNORM: case PIPE_FORMAT_R8G8_UNORM: return V_028C70_SWAP_STD; @@ -400,6 +401,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return V_028C70_COLOR_16; + case PIPE_FORMAT_L8A8_UNORM: case PIPE_FORMAT_R8G8_UNORM: return V_028C70_COLOR_8_8; @@ -447,8 +449,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_16_16; /* 64-bit buffers. */ - case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16A16_USCALED: case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: return V_028C70_COLOR_16_16_16_16; @@ -460,6 +464,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R32G32_FLOAT: return V_028C70_COLOR_32_32_FLOAT; + case PIPE_FORMAT_R32G32_USCALED: case PIPE_FORMAT_R32G32_SSCALED: return V_028C70_COLOR_32_32; @@ -634,38 +639,4 @@ out_unknown: return ~0; } -static INLINE uint32_t r600_translate_vertex_data_swizzle(enum pipe_format format) -{ - const struct util_format_description *desc = util_format_description(format); - unsigned i; - uint32_t word3; - - assert(format); - - if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { - fprintf(stderr, "r600: Bad format %s in %s:%d\n", - util_format_short_name(format), __FUNCTION__, __LINE__); - return 0; - } - - word3 = 0; - for (i = 0; i < desc->nr_channels; i++) { - switch (i) { - case 0: - word3 |= S_03000C_DST_SEL_X(desc->swizzle[0]); - break; - case 1: - word3 |= S_03000C_DST_SEL_Y(desc->swizzle[1]); - break; - case 2: - word3 |= S_03000C_DST_SEL_Z(desc->swizzle[2]); - break; - case 3: - word3 |= S_03000C_DST_SEL_W(desc->swizzle[3]); - break; - } - } - return word3; -} - #endif diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4725b5e75e2..26dad7b65c0 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -323,11 +323,11 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); /* FIXME LOD it depends on texture base level ... */ r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)), + S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | + S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, - S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)) | + S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | S_03C008_TYPE(1), 0xFFFFFFFF, NULL); @@ -431,7 +431,7 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou for (int i = 0; i < count; i++) { if (resource[i]) { - evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); } } } @@ -501,16 +501,16 @@ static void evergreen_set_clip_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_CLIP; for (int i = 0; i < state->nr; i++) { r600_pipe_state_add_reg(rstate, - R_0285BC_PA_CL_UCP0_X + i * 4, + R_0285BC_PA_CL_UCP0_X + i * 16, fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0285C0_PA_CL_UCP0_Y + i * 4, + R_0285C0_PA_CL_UCP0_Y + i * 16, fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0285C4_PA_CL_UCP0_Z + i * 4, + R_0285C4_PA_CL_UCP0_Z + i * 16, fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_0285C8_PA_CL_UCP0_W + i * 4, + R_0285C8_PA_CL_UCP0_W + i * 16, fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); } r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, @@ -660,7 +660,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(1) | S_028C70_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) color_info |= S_028C70_SOURCE_FORMAT(1); /* FIXME handle enabling of CB beyond BASE8 which has different offset */ @@ -1276,7 +1276,10 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) word2 = format | S_030008_STRIDE(vertex_buffer->stride); - word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->hw_format[i]); + word3 = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W); r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); @@ -1286,7 +1289,7 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL); - evergreen_vs_resource_set(&rctx->ctx, rstate, i); + evergreen_fs_resource_set(&rctx->ctx, rstate, i); } mask = 0; @@ -1464,8 +1467,8 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader S_0286E0_PERSP_CENTROID_ENA(have_centroid); if (have_linear) spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) | - S_0286E0_LINEAR_CENTROID_ENA(have_centroid); - + S_0286E0_LINEAR_CENTROID_ENA(have_centroid); + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, @@ -1551,7 +1554,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); + (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo_fetch); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 17858b2d381..a617a5b8631 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -264,6 +264,7 @@ void r600_context_fini(struct r600_context *ctx); void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state); void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_flush(struct r600_context *ctx); @@ -284,9 +285,11 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 8a7f3ce575c..ba1471eb785 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -23,6 +23,7 @@ #include <stdio.h> #include <errno.h> #include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" #include "r600_pipe.h" #include "r600_sq.h" #include "r600_opcodes.h" @@ -55,8 +56,8 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: - return 2; - + return 2; + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: @@ -74,7 +75,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) default: R600_ERR( "Need instruction operand number for 0x%x.\n", alu->inst); }; - + return 3; } @@ -137,20 +138,20 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_RV635: case CHIP_RS780: case CHIP_RS880: - bc->chiprev = 0; + bc->chiprev = CHIPREV_R600; break; case CHIP_RV770: case CHIP_RV730: case CHIP_RV710: case CHIP_RV740: - bc->chiprev = 1; + bc->chiprev = CHIPREV_R700; break; case CHIP_CEDAR: case CHIP_REDWOOD: case CHIP_JUNIPER: case CHIP_CYPRESS: case CHIP_HEMLOCK: - bc->chiprev = 2; + bc->chiprev = CHIPREV_EVERGREEN; break; default: R600_ERR("unknown family %d\n", bc->family); @@ -199,9 +200,9 @@ const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000 SQ_ALU_VEC_012}; //111 const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000 - SQ_ALU_SCL_122, //001 + SQ_ALU_SCL_122, //001 SQ_ALU_SCL_122, //010 - + SQ_ALU_SCL_221, //011 SQ_ALU_SCL_212, //100 SQ_ALU_SCL_122, //101 @@ -592,10 +593,34 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) /* common to all 3 families */ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { - bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | - S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | - S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); + unsigned fetch_resource_start = 0; + + /* check if we are fetch shader */ + /* fetch shader can also access vertex resource, + * first fetch shader resource is at 160 + */ + if (bc->type == -1) { + switch (bc->chiprev) { + /* r600 */ + case CHIPREV_R600: + /* r700 */ + case CHIPREV_R700: + fetch_resource_start = 160; + break; + /* evergreen */ + case CHIPREV_EVERGREEN: + fetch_resource_start = 0; + break; + default: + fprintf(stderr, "%s:%s:%d unknown chiprev %d\n", + __FILE__, __func__, __LINE__, bc->chiprev); + break; + } + } + bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) | + S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | + S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | + S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | @@ -678,8 +703,8 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | - S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | - S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); + S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | + S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } if (alu->last) { if (alu->nliteral && !alu->literal_added) { @@ -710,7 +735,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | - S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == 0 ? cf->r6xx_uses_waterfall : 0) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: @@ -742,6 +767,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | S_SQ_CF_WORD1_BARRIER(1) | @@ -766,7 +793,10 @@ int r600_bc_build(struct r600_bc *bc) int r; if (bc->callstack[0].max > 0) - bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; + bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; + if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) { + bc->nstack = 1; + } /* first path compute addr of each CF block */ /* addr start after all the CF instructions */ @@ -795,6 +825,8 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); @@ -810,7 +842,7 @@ int r600_bc_build(struct r600_bc *bc) return -ENOMEM; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; - if (bc->chiprev == 2) + if (bc->chiprev == CHIPREV_EVERGREEN) r = eg_bc_cf_build(bc, cf); else r = r600_bc_cf_build(bc, cf); @@ -821,11 +853,11 @@ int r600_bc_build(struct r600_bc *bc) case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { switch(bc->chiprev) { - case 0: + case CHIPREV_R600: r = r600_bc_alu_build(bc, alu, addr); break; - case 1: - case 2: /* eg alu is same encoding as r700 */ + case CHIPREV_R700: + case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */ r = r700_bc_alu_build(bc, alu, addr); break; default: @@ -868,6 +900,8 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: case V_SQ_CF_WORD1_SQ_CF_INST_POP: + case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 25cda16837d..f2016af3e72 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -145,12 +145,12 @@ struct r600_bc_cf { struct r600_bc_alu *curr_bs_head; }; -#define FC_NONE 0 -#define FC_IF 1 -#define FC_LOOP 2 -#define FC_REP 3 -#define FC_PUSH_VPM 4 -#define FC_PUSH_WQM 5 +#define FC_NONE 0 +#define FC_IF 1 +#define FC_LOOP 2 +#define FC_REP 3 +#define FC_PUSH_VPM 4 +#define FC_PUSH_WQM 5 struct r600_cf_stack_entry { int type; @@ -166,10 +166,11 @@ struct r600_cf_callstack { int current; int max; }; - + struct r600_bc { enum radeon_family family; int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ + int type; struct list_head cf; struct r600_bc_cf *cf_last; unsigned ndw; diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 50d47060c1a..74cf9687999 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -27,9 +27,9 @@ enum r600_blitter_op /* bitmask */ { - R600_CLEAR = 1, - R600_CLEAR_SURFACE = 2, - R600_COPY = 4 + R600_CLEAR = 1, + R600_CLEAR_SURFACE = 2, + R600_COPY = 4 }; static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) @@ -189,7 +189,6 @@ static void r600_resource_copy_region(struct pipe_context *ctx, else r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz, src, subsrc, srcx, srcy, srcz, width, height); - } void r600_init_blit_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index ed97b6e69a3..a432271b82d 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -124,7 +124,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, if ((transfer->box.x >= rbuffer->ranges[i].start) && (transfer->box.x < rbuffer->ranges[i].end)) flush = TRUE; - + if (flush) { r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL); rbuffer->num_ranges = 0; @@ -180,7 +180,7 @@ static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, return; } } - + rbuffer->ranges[rbuffer->num_ranges].start = offset; rbuffer->ranges[rbuffer->num_ranges].end = offset+length; rbuffer->num_ranges++; diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 4f9b39a7fdc..2ee0c83e5d3 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -385,8 +385,13 @@ #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_EXPORT_COMBINED 0x0000005B #define EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS 0x0000005C -#define BC_INST(bc, x) ((bc)->chiprev == 2 ? EG_##x : x) -#define CTX_INST(x) (ctx->bc->chiprev == 2 ? EG_##x : x) +#define CHIPREV_R600 0 +#define CHIPREV_R700 1 +#define CHIPREV_EVERGREEN 2 + +#define BC_INST(bc, x) ((bc)->chiprev == CHIPREV_EVERGREEN ? EG_##x : x) + +#define CTX_INST(x) (ctx->bc->chiprev == CHIPREV_EVERGREEN ? EG_##x : x) #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 8a62d646d3a..3bfba99dcaf 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -187,7 +187,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void FREE(rctx); return NULL; } - + rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); if (!rctx->vs_resource) { FREE(rctx); @@ -371,6 +371,11 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 0; /* FIXME */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; default: return 0; } @@ -402,7 +407,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) && - r600_is_colorbuffer_format_supported(format)) { + r600_is_colorbuffer_format_supported(format)) { retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 1c691f6b764..ba9fedf0b6c 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -93,6 +93,7 @@ struct r600_pipe_shader { struct r600_shader shader; struct r600_pipe_state rstate; struct r600_bo *bo; + struct r600_bo *bo_fetch; struct r600_vertex_element vertex_elements; }; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f6153c0e80f..3e42309bde0 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -80,7 +80,7 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch); r600_pipe_state_add_reg(rstate, R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, @@ -217,6 +217,15 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s void *ptr; /* copy new shader */ + if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) { + shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0); + if (shader->bo_fetch == NULL) { + return -ENOMEM; + } + ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL); + memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4); + r600_bo_unmap(rctx->radeon, shader->bo_fetch); + } if (shader->bo == NULL) { shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); if (shader->bo == NULL) { @@ -257,7 +266,7 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader const struct util_format_description *desc; enum pipe_format resource_format[160]; unsigned i, nresources = 0; - struct r600_bc *bc = &shader->bc; + struct r600_bc *bc = &shader->bc_fetch; struct r600_bc_cf *cf; struct r600_bc_vtx *vtx; @@ -272,7 +281,7 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader for (i = 0; i < rctx->vertex_elements->count; i++) { resource_format[nresources++] = rctx->vertex_elements->hw_format[i]; } - r600_bo_reference(rctx->radeon, &rshader->bo, NULL); + r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL); LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { switch (cf->inst) { case V_SQ_CF_WORD1_SQ_CF_INST_VTX: @@ -293,7 +302,7 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader break; } } - return r600_bc_build(&shader->bc); + return r600_bc_build(&shader->bc_fetch); } int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -334,6 +343,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s R600_ERR("building bytecode failed !\n"); return r; } + if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) { + r = r600_bc_build(&shader->shader.bc_fetch); + if (r) { + R600_ERR("building bytecode failed !\n"); + return r; + } + } //fprintf(stderr, "______________________________________________________________\n"); return 0; } @@ -364,6 +380,7 @@ struct r600_shader_ctx { unsigned temp_reg; struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; + struct r600_bc *bc_fetch; struct r600_shader *shader; u32 value[4]; u32 *literals; @@ -444,7 +461,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) if (ctx->shader->input[input].centroid) ij_index++; } - + /* work out gpr and base_chan from index */ gpr = ij_index / 2; base_chan = (2 * (ij_index % 2)) + 1; @@ -477,9 +494,9 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) return r; } return 0; -} - - +} + + static int tgsi_declaration(struct r600_shader_ctx *ctx) { struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; @@ -511,11 +528,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) vtx.dst_sel_z = 2; vtx.dst_sel_w = 3; vtx.use_const_fields = 1; - r = r600_bc_add_vtx(ctx->bc, &vtx); + r = r600_bc_add_vtx(ctx->bc_fetch, &vtx); if (r) return r; } - if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { /* turn input into interpolate on EG */ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { if (ctx->shader->input[i].interpolate > 0) { @@ -549,7 +566,7 @@ static int r600_get_temp(struct r600_shader_ctx *ctx) return ctx->temp_reg + ctx->max_driver_temp_used++; } -/* +/* * for evergreen we need to scan the shader to find the number of GPRs we need to * reserve for interpolation. * @@ -606,6 +623,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s int i, r = 0, pos0; ctx.bc = &shader->bc; + ctx.bc_fetch = &shader->bc_fetch; ctx.shader = shader; r = r600_bc_init(ctx.bc, shader->family); if (r) @@ -615,6 +633,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s tgsi_parse_init(&ctx.parse, tokens); ctx.type = ctx.parse.FullHeader.Processor.Processor; shader->processor_type = ctx.type; + if (shader->processor_type == TGSI_PROCESSOR_VERTEX) { + r = r600_bc_init(ctx.bc_fetch, shader->family); + if (r) + return r; + ctx.bc_fetch->type = -1; + } + ctx.bc->type = shader->processor_type; /* register allocations */ /* Values [0,127] correspond to GPR[0..127]. @@ -640,8 +665,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; + if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { + r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); + } else { + r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); + } } - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + @@ -687,7 +717,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s /* reserve first tmp for everyone */ r600_get_temp(&ctx); opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; - if (ctx.bc->chiprev == 2) + if (ctx.bc->chiprev == CHIPREV_EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; @@ -809,6 +839,14 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); } } + /* add return to fetch shader */ + if (ctx.type == TGSI_PROCESSOR_VERTEX) { + if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { + r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN); + } else { + r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); + } + } /* add output to bytecode */ for (i = 0; i < noutput; i++) { r = r600_bc_add_output(ctx.bc, &output[i]); @@ -1001,7 +1039,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) return r; - + alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { @@ -1046,7 +1084,7 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) return tgsi_op2_s(ctx, 1); } -/* +/* * r600 - trunc to -PI..PI range * r700 - normalize by dividing by 2PI * see fdo bug 27901 @@ -1058,7 +1096,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, int r; uint32_t lit_vals[4]; struct r600_bc_alu alu; - + memset(lit_vals, 0, 4*4); r = tgsi_split_constant(ctx, r600_src); if (r) @@ -1084,7 +1122,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[0] = r600_src[0]; alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); - + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; @@ -1099,7 +1137,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - + alu.dst.chan = 0; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1111,7 +1149,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, if (r) return r; - if (ctx->bc->chiprev == 0) { + if (ctx->bc->chiprev == CHIPREV_R600) { lit_vals[0] = fui(3.1415926535897f * 2.0f); lit_vals[1] = fui(-3.1415926535897f); } else { @@ -1129,7 +1167,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; - + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; @@ -1908,10 +1946,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - + /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x - * muladd has no writemask, have to use another temp + * muladd has no writemask, have to use another temp */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); @@ -1921,7 +1959,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].chan = 0; alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 2; - + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; @@ -1941,7 +1979,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].chan = 1; alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 2; - + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; @@ -1980,7 +2018,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } src_gpr = ctx->temp_reg; } - + opcode = ctx->inst_info->r600_opcode; if (opcode == SQ_TEX_INST_SAMPLE && (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) @@ -1990,8 +2028,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.inst = opcode; tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; tex.resource_id = tex.sampler_id; - if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX) - tex.resource_id += PIPE_MAX_ATTRIBS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; @@ -2026,7 +2062,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* add shadow ambient support - gallium doesn't do it yet */ return 0; - } static int tgsi_lrp(struct r600_shader_ctx *ctx) @@ -2156,7 +2191,7 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - } + } if (use_temp) return tgsi_helper_copy(ctx, inst); return 0; @@ -2342,7 +2377,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; } - + /* result.y = tmp - floor(tmp); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2621,16 +2656,25 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return tgsi_helper_copy(ctx, inst); } -/* r6/7 only for now */ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; + break; + case TGSI_OPCODE_ARR: + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + break; + default: + assert(0); + return -1; + } + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); if (r) return r; @@ -2663,18 +2707,18 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) int r; memset(&alu, 0, sizeof(struct r600_bc_alu)); - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; - break; - case TGSI_OPCODE_ARR: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; - break; - default: - assert(0); - return -1; - } - + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; + break; + case TGSI_OPCODE_ARR: + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; + break; + default: + assert(0); + return -1; + } + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); if (r) @@ -2703,8 +2747,8 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) return r; - - if (i == 0 || i == 3) { + + if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); @@ -2750,7 +2794,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; - + alu.last = 1; r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); @@ -2804,7 +2848,7 @@ static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned r ctx->bc->callstack[ctx->bc->call_sp].current + diff; } return; - } + } switch (reason) { case FC_PUSH_VPM: ctx->bc->callstack[ctx->bc->call_sp].current++; @@ -2878,7 +2922,7 @@ static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_v static void emit_testflag(struct r600_shader_ctx *ctx) { - + } static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) @@ -3239,7 +3283,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index f8bc5951395..cd108da4915 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -46,6 +46,7 @@ struct r600_shader { struct r600_shader_io output[32]; enum radeon_family family; boolean uses_kill; + struct r600_bc bc_fetch; }; int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index fa011612aeb..40874152274 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -109,7 +109,7 @@ static void r600_draw_common(struct r600_drawl *draw) r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, i); + r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } mask = 0; @@ -296,7 +296,7 @@ static void *r600_create_blend_state(struct pipe_context *ctx, unsigned eqRGB = state->rt[i].rgb_func; unsigned srcRGB = state->rt[i].rgb_src_factor; unsigned dstRGB = state->rt[i].rgb_dst_factor; - + unsigned eqA = state->rt[i].alpha_func; unsigned srcA = state->rt[i].alpha_src_factor; unsigned dstA = state->rt[i].alpha_dst_factor; @@ -475,7 +475,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); - + r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), 0xFFFFFFFF, NULL); @@ -622,7 +622,7 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, for (int i = 0; i < count; i++) { if (resource[i]) { - r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); } } } @@ -692,16 +692,16 @@ static void r600_set_clip_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_CLIP; for (int i = 0; i < state->nr; i++) { r600_pipe_state_add_reg(rstate, - R_028E20_PA_CL_UCP0_X + i * 4, + R_028E20_PA_CL_UCP0_X + i * 16, fui(state->ucp[i][0]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_028E24_PA_CL_UCP0_Y + i * 4, + R_028E24_PA_CL_UCP0_Y + i * 16, fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_028E28_PA_CL_UCP0_Z + i * 4, + R_028E28_PA_CL_UCP0_Z + i * 16, fui(state->ucp[i][2]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, - R_028E2C_PA_CL_UCP0_W + i * 4, + R_028E2C_PA_CL_UCP0_W + i * 16, fui(state->ucp[i][3]), 0xFFFFFFFF, NULL); } r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, @@ -863,7 +863,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | S_0280A0_BLEND_CLAMP(1) | S_0280A0_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) color_info |= S_0280A0_SOURCE_FORMAT(1); r600_pipe_state_add_reg(rstate, @@ -944,7 +944,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_FRAMEBUFFER; util_copy_framebuffer_state(&rctx->framebuffer, state); - + rctx->pframebuffer = &rctx->framebuffer; /* build states */ diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 210420e823b..55bc5d0d22b 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -202,10 +202,10 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, format = &v->hw_format[i]; switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); default:; } v->incompatible_layout = @@ -213,8 +213,7 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, v->elements[i].src_format != v->hw_format[i] || v->elements[i].src_offset % 4 != 0; - v->hw_format_size[i] = - align(util_format_get_blocksize(v->hw_format[i]), 4); + v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4); } return v; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index a5ac75736f0..b7600e90eb5 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -109,11 +109,11 @@ static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, case V_038000_ARRAY_2D_TILED_THIN1: p_align = MAX2(rscreen->tiling_info->num_banks, (((rscreen->tiling_info->group_bytes / 8 / pixsize)) * - rscreen->tiling_info->num_banks)); + rscreen->tiling_info->num_banks)) * 8; break; - case 0: + case V_038000_ARRAY_LINEAR_GENERAL: default: - p_align = 64; + p_align = rscreen->tiling_info->group_bytes / pixsize; break; } return p_align; @@ -139,6 +139,29 @@ static unsigned r600_get_height_alignment(struct pipe_screen *screen, return h_align; } +static unsigned r600_get_base_alignment(struct pipe_screen *screen, + enum pipe_format format, + unsigned array_mode) +{ + struct r600_screen* rscreen = (struct r600_screen *)screen; + unsigned pixsize = util_format_get_blocksize(format); + int p_align = r600_get_pixel_alignment(screen, format, array_mode); + int h_align = r600_get_height_alignment(screen, array_mode); + int b_align; + + switch (array_mode) { + case V_038000_ARRAY_2D_TILED_THIN1: + b_align = MAX2(rscreen->tiling_info->num_banks * rscreen->tiling_info->num_channels * 8 * 8 * pixsize, + p_align * pixsize * h_align); + break; + case V_038000_ARRAY_1D_TILED_THIN1: + default: + b_align = rscreen->tiling_info->group_bytes; + break; + } + return b_align; +} + static unsigned mip_minify(unsigned size, unsigned level) { unsigned val; @@ -152,11 +175,12 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level) { + struct r600_screen* rscreen = (struct r600_screen *)screen; struct pipe_resource *ptex = &rtex->resource.base.b; struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); unsigned width, stride, tile_width; - + if (rtex->pitch_override) return rtex->pitch_override; @@ -167,11 +191,6 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen, width = align(width, tile_width); } stride = util_format_get_stride(ptex->format, width); - if (chipc == EVERGREEN) - stride = align(stride, 512); - - if (ptex->bind & PIPE_BIND_RENDER_TARGET) - stride = align(stride, 512); return stride; } @@ -257,6 +276,9 @@ static void r600_setup_miptree(struct pipe_screen *screen, } else size = layer_size * u_minify(ptex->depth0, i); + /* align base image and start of miptree */ + if ((i == 0) || (i == 1)) + offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode)); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; rtex->pitch_in_bytes[i] = pitch; @@ -297,7 +319,10 @@ r600_texture_create_object(struct pipe_screen *screen, resource->size = rtex->size; if (!resource->bo) { - resource->bo = r600_bo(radeon, rtex->size, 4096, base->bind, base->usage); + struct pipe_resource *ptex = &rtex->resource.base.b; + int base_align = r600_get_base_alignment(screen, ptex->format, array_mode); + + resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage); if (!resource->bo) { FREE(rtex); return NULL; |