diff options
Diffstat (limited to 'src/gallium/drivers/r600/r600_asm.c')
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 96 |
1 files changed, 42 insertions, 54 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 996d9f74a60..45e614977e2 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -483,7 +483,9 @@ static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { return is_alu_reduction_inst(bc, alu) || - is_alu_mova_inst(bc, alu); + is_alu_mova_inst(bc, alu) || + (bc->chiprev == CHIPREV_EVERGREEN && + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR); } /* alu instructions that can only execute on the trans unit */ @@ -525,8 +527,9 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) case CHIPREV_EVERGREEN: default: if (!alu->is_op3) - /* Note that FLT_TO_INT* instructions are vector instructions - * on Evergreen, despite what the documentation says. */ + /* Note that FLT_TO_INT_* instructions are vector-only instructions + * on Evergreen, despite what the documentation says. FLT_TO_INT + * can do both vector and scalar. */ return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT || @@ -1314,6 +1317,24 @@ static void r600_bc_remove_alu(struct r600_bc_cf *cf, struct r600_bc_alu *alu) cf->ndw -= 2; } +static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + return 8; + + case CHIPREV_R700: + return 16; + + case CHIPREV_EVERGREEN: + return 64; + + default: + R600_ERR("Unknown chiprev %d.\n", bc->chiprev); + return 8; + } +} + int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) { struct r600_bc_vtx *nvtx = r600_bc_vtx(); @@ -1339,7 +1360,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->cf_last->ndw / 4) > 7) + if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } @@ -1386,7 +1407,7 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->cf_last->ndw / 4) > 7) + if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } @@ -1406,31 +1427,7 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) /* common to all 3 families */ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { - unsigned fetch_resource_start = 0; - - /* check if we are fetch shader */ - /* fetch shader can also access vertex resource, - * first fetch shader resource is at 160 - */ - if (bc->type == -1) { - switch (bc->chiprev) { - /* r600 */ - case CHIPREV_R600: - /* r700 */ - case CHIPREV_R700: - fetch_resource_start = 160; - break; - /* evergreen */ - case CHIPREV_EVERGREEN: - fetch_resource_start = 0; - break; - default: - fprintf(stderr, "%s:%s:%d unknown chiprev %d\n", - __FILE__, __func__, __LINE__, bc->chiprev); - break; - } - } - bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) | + bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | @@ -1523,6 +1520,14 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign return 0; } +static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +{ + *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); +} + enum cf_class { CF_CLASS_ALU, @@ -1596,11 +1601,10 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) break; case CF_CLASS_TEXTURE: case CF_CLASS_VERTEX: - bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); - bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | - S_SQ_CF_WORD1_BARRIER(cf->barrier) | - S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1) | - S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program); + if (bc->chiprev == CHIPREV_R700) + r700_bc_cf_vtx_build(&bc->bytecode[id], cf); + else + r600_bc_cf_vtx_build(&bc->bytecode[id], cf); break; case CF_CLASS_EXPORT: bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | @@ -2801,22 +2805,6 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "--------------------------------------\n"); } -static void r600_cf_vtx(struct r600_vertex_element *ve) -{ - struct r600_pipe_state *rstate; - - rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, - 0xFFFFFFFF, ve->fetch_shader); -} - static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, unsigned *num_format, unsigned *format_comp) { @@ -2987,7 +2975,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru alu.src[0].chan = 3; alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[1].value = (1l << 32) / elements[i].instance_divisor + 1; + alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; alu.dst.sel = i + 1; alu.dst.chan = 3; @@ -3075,9 +3063,9 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru r600_bc_clear(&bc); if (rctx->family >= CHIP_CEDAR) - eg_cf_vtx(ve); + evergreen_fetch_shader(ve); else - r600_cf_vtx(ve); + r600_fetch_shader(ve); return 0; } |