diff options
author | Christian König <[email protected]> | 2011-02-28 23:59:53 +0100 |
---|---|---|
committer | Christian König <[email protected]> | 2011-02-28 23:59:53 +0100 |
commit | b97e41c7b18c363a303693fb841fe606b1106fe6 (patch) | |
tree | 5329179ac070a8e3ecc3d3e51c020e004f5b32af /src/gallium/drivers/r600 | |
parent | 77217af40d67612d1f1089ca188393d27a8a038f (diff) | |
parent | 5f44fab5a6ba99c287da8d01fa584763bff2565b (diff) |
Merge remote branch 'origin/master' into pipe-video
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/eg_asm.c | 26 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 233 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_buffer.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 41 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_texture.c | 10 |
10 files changed, 230 insertions, 140 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 4f86e3b4c38..8cb417f9731 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -98,31 +98,9 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) return 0; } -void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +void eg_cf_vtx(struct r600_vertex_element *ve) { - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 8 - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; + struct r600_pipe_state *rstate = &ve->rstate; rstate->id = R600_PIPE_STATE_FETCH_SHADER; rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 64c52bca795..1b76f0098dd 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -118,10 +118,10 @@ unsigned r600_get_clock_crystal_freq(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; struct r600_bo *r600_bo(struct radeon *radeon, - unsigned size, unsigned alignment, - unsigned binding, unsigned usage); + unsigned size, unsigned alignment, + unsigned binding, unsigned usage); struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode); + unsigned handle, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 1393df88757..8006e9b9a58 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -86,6 +86,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: return 1; @@ -135,6 +136,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: return 1; @@ -1441,7 +1443,8 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); - bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); + bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) | + S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; return 0; } @@ -2778,12 +2781,13 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z); fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w); fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields); - fprintf(stderr, "DATA_FORMAT:%d ", vtx->data_format); - fprintf(stderr, "NUM_FORMAT_ALL:%d ", vtx->num_format_all); - fprintf(stderr, "FORMAT_COMP_ALL:%d ", vtx->format_comp_all); - fprintf(stderr, "SRF_MODE_ALL:%d\n", vtx->srf_mode_all); + fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format); + fprintf(stderr, "NUM:%d ", vtx->num_format_all); + fprintf(stderr, "COMP:%d ", vtx->format_comp_all); + fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); id++; - fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "OFFSET:%d\n", vtx->offset); //TODO id++; fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); @@ -2794,29 +2798,9 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "--------------------------------------\n"); } -static void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +static void r600_cf_vtx(struct r600_vertex_element *ve) { struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 8 - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); rstate = &ve->rstate; rstate->id = R600_PIPE_STATE_FETCH_SHADER; @@ -2962,37 +2946,19 @@ out_unknown: int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve) { - unsigned ndw, i; - u32 *bytecode; - unsigned fetch_resource_start = 0, format, num_format, format_comp; + static int dump_shaders = -1; + + struct r600_bc bc; + struct r600_bc_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; - - /* 2 dwords for cf aligned to 4 + 4 dwords per input */ - ndw = 8 + ve->count * 4; - ve->fs_size = ndw * 4; - - /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ - ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0); - if (ve->fetch_shader == NULL) { - return -ENOMEM; - } - - bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); - if (bytecode == NULL) { - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); - return -ENOMEM; - } - - if (rctx->family >= CHIP_CEDAR) { - eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); - } else { - r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); - fetch_resource_start = 160; - } + unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; + unsigned format, num_format, format_comp; + u32 *bytecode; + int i, r; /* vertex elements offset need special handling, if offset is bigger - * than what we can put in fetch instruction then we need to alterate + + * than what we can put in fetch instruction then we need to alterate * the vertex resource offset. In such case in order to simplify code * we will bound one resource per elements. It's a worst case scenario. */ @@ -3003,40 +2969,155 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } } + memset(&bc, 0, sizeof(bc)); + r = r600_bc_init(&bc, r600_get_family(rctx->radeon)); + if (r) + return r; + + for (i = 0; i < ve->count; i++) { + if (elements[i].instance_divisor > 1) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.src[0].sel = i + 1; + alu.src[0].chan = 3; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = fui(1.0f / (float)elements[i].instance_divisor); + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); + alu.src[0].sel = i + 1; + alu.src[0].chan = 3; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT); + alu.src[0].sel = i + 1; + alu.src[0].chan = 3; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + } + } + for (i = 0; i < ve->count; i++) { unsigned vbuffer_index; r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp); desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { + r600_bc_clear(&bc); R600_ERR("unknown format %d\n", ve->elements[i].src_format); - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); return -EINVAL; } /* see above for vbuffer_need_offset explanation */ vbuffer_index = elements[i].vertex_buffer_index; - if (ve->vbuffer_need_offset) { - bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start); - } else { - bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start); - } - bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) | - S_SQ_VTX_WORD0_SRC_SEL_X(0) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); - bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) | - S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) | - S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) | - S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) | - S_SQ_VTX_WORD1_DATA_FORMAT(format) | - S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) | - S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) | - S_SQ_VTX_WORD1_SRF_MODE_ALL(1) | - S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1); - bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) | - S_SQ_VTX_WORD2_MEGA_FETCH(1); - bytecode[8 + i * 4 + 3] = 0; + memset(&vtx, 0, sizeof(vtx)); + vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start; + vtx.fetch_type = elements[i].instance_divisor ? 1 : 0; + vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; + vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = i + 1; + vtx.dst_sel_x = desc->swizzle[0]; + vtx.dst_sel_y = desc->swizzle[1]; + vtx.dst_sel_z = desc->swizzle[2]; + vtx.dst_sel_w = desc->swizzle[3]; + vtx.data_format = format; + vtx.num_format_all = num_format; + vtx.format_comp_all = format_comp; + vtx.srf_mode_all = 1; + vtx.offset = elements[i].src_offset; + + if ((r = r600_bc_add_vtx(&bc, &vtx))) { + r600_bc_clear(&bc); + return r; + } + } + + r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); + + /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ + ve->fetch_shader = r600_bo(rctx->radeon, bc.ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0); + if (ve->fetch_shader == NULL) { + r600_bc_clear(&bc); + return -ENOMEM; + } + + ve->fs_size = bc.ndw*4; + if ((r = r600_bc_build(&bc))) { + r600_bc_clear(&bc); + return r; } + + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + + if (dump_shaders) { + fprintf(stderr, "--------------------------------------------------------------\n"); + r600_bc_dump(&bc); + fprintf(stderr, "______________________________________________________________\n"); + } + + bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); + if (bytecode == NULL) { + r600_bc_clear(&bc); + r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + return -ENOMEM; + } + + memcpy(bytecode, bc.bytecode, ve->fs_size); + r600_bo_unmap(rctx->radeon, ve->fetch_shader); + r600_bc_clear(&bc); + + if (rctx->family >= CHIP_CEDAR) + eg_cf_vtx(ve); + else + r600_cf_vtx(ve); + return 0; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 453c29790c1..dbd1e204b49 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -103,6 +103,7 @@ struct r600_bc_vtx { unsigned num_format_all; unsigned format_comp_all; unsigned srf_mode_all; + unsigned offset; }; struct r600_bc_output { @@ -187,7 +188,7 @@ struct r600_bc { /* eg_asm.c */ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); -void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); +void eg_cf_vtx(struct r600_vertex_element *ve); /* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 9865ea17ae5..04408a5cc8e 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -225,7 +225,7 @@ struct texture_orig_info { unsigned height0; }; -static void r600_s3tc_to_blittable(struct pipe_resource *tex, +static void r600_compressed_to_blittable(struct pipe_resource *tex, unsigned level, struct texture_orig_info *orig) { @@ -253,7 +253,7 @@ static void r600_s3tc_to_blittable(struct pipe_resource *tex, } -static void r600_reset_blittable_to_s3tc(struct pipe_resource *tex, +static void r600_reset_blittable_to_compressed(struct pipe_resource *tex, unsigned level, struct texture_orig_info *orig) { @@ -282,13 +282,13 @@ static void r600_resource_copy_region(struct pipe_context *ctx, restore_orig[0] = restore_orig[1] = FALSE; - if (util_format_is_s3tc(src->format)) { - r600_s3tc_to_blittable(src, src_level, &orig_info[0]); + if (util_format_is_compressed(src->format)) { + r600_compressed_to_blittable(src, src_level, &orig_info[0]); restore_orig[0] = TRUE; } - if (util_format_is_s3tc(dst->format)) { - r600_s3tc_to_blittable(dst, dst_level, &orig_info[1]); + if (util_format_is_compressed(dst->format)) { + r600_compressed_to_blittable(dst, dst_level, &orig_info[1]); restore_orig[1] = TRUE; /* translate the dst box as well */ dstx = util_format_get_nblocksx(orig_info[1].format, dstx); @@ -299,10 +299,10 @@ static void r600_resource_copy_region(struct pipe_context *ctx, src, src_level, src_box); if (restore_orig[0]) - r600_reset_blittable_to_s3tc(src, src_level, &orig_info[0]); + r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]); if (restore_orig[1]) - r600_reset_blittable_to_s3tc(dst, dst_level, &orig_info[1]); + r600_reset_blittable_to_compressed(dst, dst_level, &orig_info[1]); } void r600_init_blit_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 0c5d7133c7a..2363cd1ebc5 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -132,13 +132,13 @@ static void r600_transfer_destroy(struct pipe_context *ctx, } static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned layer_stride) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { struct radeon *ws = (struct radeon*)pipe->winsys; struct r600_resource_buffer *rbuffer = r600_buffer(resource); @@ -224,7 +224,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, rbuffer->r.b.b.b.depth0 = 1; rbuffer->r.b.b.b.array_size = 1; rbuffer->r.b.b.b.flags = 0; - rbuffer->r.b.user_ptr = ptr; + rbuffer->r.b.user_ptr = ptr; rbuffer->r.bo = NULL; rbuffer->r.bo_size = 0; return &rbuffer->r.b.b.b; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 34094001b75..3fd6668f718 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -77,8 +77,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, u_upload_flush(rctx->vbuf_mgr->uploader); } -static void r600_update_num_contexts(struct r600_screen *rscreen, - int diff) +static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) { pipe_mutex_lock(rscreen->mutex_num_contexts); if (diff > 0) { @@ -286,13 +285,13 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_INSTANCED_DRAWING: return 1; /* Unsupported features (boolean caps). */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ - case PIPE_CAP_INSTANCED_DRAWING: return 0; case PIPE_CAP_ARRAY_TEXTURES: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 240c8f1ffd0..0b4dc75e584 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -241,10 +241,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; int r; - /* Would like some magic "get_bool_option_once" routine. - */ - if (dump_shaders == -1) - dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + /* Would like some magic "get_bool_option_once" routine. + */ + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); @@ -420,6 +420,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) { struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; unsigned i; + int r; switch (d->Declaration.File) { case TGSI_FILE_INPUT: @@ -451,6 +452,26 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_SAMPLER: case TGSI_FILE_ADDRESS: break; + + case TGSI_FILE_SYSTEM_VALUE: + if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + struct r600_bc_alu alu; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; + + alu.dst.sel = 0; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + break; + } + default: R600_ERR("unsupported file %d declaration\n", d->Declaration.File); return -EINVAL; @@ -521,6 +542,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; r600_src->neg = tgsi_src->Register.Negate; r600_src->abs = tgsi_src->Register.Absolute; + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { int index; if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && @@ -535,6 +557,13 @@ static void tgsi_src(struct r600_shader_ctx *ctx, index = tgsi_src->Register.Index; r600_src->sel = V_SQ_ALU_SRC_LITERAL; memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); + } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { + /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ + r600_src->swizzle[0] = 3; + r600_src->swizzle[1] = 3; + r600_src->swizzle[2] = 3; + r600_src->swizzle[3] = 3; + r600_src->sel = 0; } else { if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; @@ -2858,7 +2887,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -3016,7 +3045,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 72707fbd8b8..3c072fe7ca9 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -299,13 +299,13 @@ void r600_spi_update(struct r600_pipe_context *rctx) tmp |= S_028644_PT_SPRITE_TEX(1); } - if (rctx->family < CHIP_CEDAR) { - if (rshader->input[i].centroid) - tmp |= S_028644_SEL_CENTROID(1); + if (rctx->family < CHIP_CEDAR) { + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - tmp |= S_028644_SEL_LINEAR(1); - } + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + } r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } @@ -520,7 +520,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_context_pipe_state_set(&rctx->ctx, &vgt); rdraw.vgt_num_indices = draw.info.count; - rdraw.vgt_num_instances = 1; + rdraw.vgt_num_instances = draw.info.instance_count; rdraw.vgt_index_type = vgt_dma_index_type; rdraw.vgt_draw_initiator = vgt_draw_initiator; rdraw.indices = NULL; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 4c9d5609c06..048d0b61e3b 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -292,7 +292,7 @@ static boolean permit_hardware_blit(struct pipe_screen *screen, bind = PIPE_BIND_RENDER_TARGET; /* hackaround for S3TC */ - if (util_format_is_s3tc(res->format)) + if (util_format_is_compressed(res->format)) return TRUE; if (!screen->is_format_supported(screen, @@ -433,7 +433,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, } if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && - util_format_is_s3tc(templ->format)) + util_format_is_compressed(templ->format)) array_mode = V_038000_ARRAY_1D_TILED_THIN1; return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, @@ -887,12 +887,14 @@ uint32_t r600_translate_texformat(enum pipe_format format, goto out_unknown; switch (format) { - case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_RGTC1_SNORM: + word4 |= sign_bit[0]; + case PIPE_FORMAT_RGTC1_UNORM: result = FMT_BC4; goto out_word4; - case PIPE_FORMAT_RGTC2_UNORM: case PIPE_FORMAT_RGTC2_SNORM: + word4 |= sign_bit[0] | sign_bit[1]; + case PIPE_FORMAT_RGTC2_UNORM: result = FMT_BC5; goto out_word4; default: |