diff options
author | Christian König <[email protected]> | 2011-05-07 14:11:40 +0200 |
---|---|---|
committer | Christian König <[email protected]> | 2011-05-07 14:11:40 +0200 |
commit | 213b9004a6ee033a16af3dcd187aa68b56c39858 (patch) | |
tree | f577f4dc276f70dad63f102f69f87f4d566186ef /src/gallium/drivers/r600/r600_asm.c | |
parent | 6ad846ee78d9d8ba93dcecdefbf89f2b981333ef (diff) | |
parent | 03615c02d81437cf546609fc6a39c6c73be39360 (diff) |
Merge remote-tracking branch 'origin/master' into pipe-video
Conflicts:
src/gallium/drivers/r600/r600_state.c
Diffstat (limited to 'src/gallium/drivers/r600/r600_asm.c')
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 148 |
1 files changed, 75 insertions, 73 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 7e854b1b81d..033e84665f5 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -33,12 +33,6 @@ #include "r600_formats.h" #include "r600d.h" -#ifdef PIPE_ARCH_BIG_ENDIAN -#define CPU_TO_LE32(x) bswap_32(x) -#else -#define CPU_TO_LE32(x) (x) -#endif - #define NUM_OF_CYCLES 3 #define NUM_OF_COMPONENTS 4 @@ -531,19 +525,19 @@ static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, else if (is_alu_vec_unit_inst(bc, alu)) trans = 0; else if (assignment[chan]) - trans = 1; // assume ALU_INST_PREFER_VECTOR + trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */ else trans = 0; if (trans) { if (assignment[4]) { - assert(0); //ALU.Trans has already been allocated + assert(0); /* ALU.Trans has already been allocated. */ return -1; } assignment[4] = alu; } else { if (assignment[chan]) { - assert(0); //ALU.chan has already been allocated + assert(0); /* ALU.chan has already been allocated. */ return -1; } assignment[chan] = alu; @@ -595,7 +589,7 @@ static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, if (bs->hw_gpr[cycle][chan] == -1) bs->hw_gpr[cycle][chan] = sel; else if (bs->hw_gpr[cycle][chan] != (int)sel) { - // Another scalar operation has already used GPR read port for channel + /* Another scalar operation has already used the GPR read port for the channel. */ return -1; } return 0; @@ -615,9 +609,9 @@ static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsign return 0; } else if (bs->hw_cfile_addr[res] == sel && bs->hw_cfile_elem[res] == chan) - return 0; // Read for this scalar element already reserved, nothing to do here. + return 0; /* Read for this scalar element already reserved, nothing to do here. */ } - // All cfile read ports are used, cannot reference vector element + /* All cfile read ports are used, cannot reference vector element. */ return -1; } @@ -632,8 +626,8 @@ static int is_gpr(unsigned sel) static int is_cfile(unsigned sel) { return (sel > 255 && sel < 512) || - (sel > 511 && sel < 4607) || // Kcache before translate - (sel > 127 && sel < 192); // Kcache after translate + (sel > 511 && sel < 4607) || /* Kcache before translation. */ + (sel > 127 && sel < 192); /* Kcache after translation. */ } static int is_const(int sel) @@ -655,8 +649,8 @@ static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, if (is_gpr(sel)) { cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) - // Nothing to do; special-case optimization, - // second source uses first source’s reservation + /* Nothing to do; special-case optimization, + * second source uses first source’s reservation. */ continue; else { r = reserve_gpr(bs, sel, elem, cycle); @@ -668,7 +662,7 @@ static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, if (r) return r; } - // No restrictions on PV, PS, literal or special constants + /* No restrictions on PV, PS, literal or special constants. */ } return 0; } @@ -682,10 +676,10 @@ static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, for (const_count = 0, src = 0; src < num_src; ++src) { sel = alu->src[src].sel; elem = alu->src[src].chan; - if (is_const(sel)) { // Any constant, including literal and inline constants + if (is_const(sel)) { /* Any constant, including literal and inline constants. */ if (const_count >= 2) - // More than two references to a constant in - // transcendental operation. + /* More than two references to a constant in + * transcendental operation. */ return -1; else const_count++; @@ -702,15 +696,19 @@ static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, if (is_gpr(sel)) { cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; if (cycle < const_count) - // Cycle for GPR load conflicts with - // constant load in transcendental operation. + /* Cycle for GPR load conflicts with + * constant load in transcendental operation. */ return -1; r = reserve_gpr(bs, sel, elem, cycle); if (r) return r; } - // Constants already processed - // No restrictions on PV, PS + /* PV PS restrictions */ + if (const_count && (sel == 254 || sel == 255)) { + cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; + if (cycle < const_count) + return -1; + } } return 0; } @@ -721,30 +719,36 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct alu_bank_swizzle bs; int bank_swizzle[5]; int i, r = 0, forced = 0; - - for (i = 0; i < 5; i++) + boolean scalar_only = true; + for (i = 0; i < 5; i++) { if (slots[i] && slots[i]->bank_swizzle_force) { slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; forced = 1; } - + if (i < 4 && slots[i]) + scalar_only = false; + } if (forced) return 0; - // just check every possible combination of bank swizzle - // not very efficent, but works on the first try in most of the cases + /* Just check every possible combination of bank swizzle. + * Not very efficent, but works on the first try in most of the cases. */ for (i = 0; i < 4; i++) bank_swizzle[i] = SQ_ALU_VEC_012; bank_swizzle[4] = SQ_ALU_SCL_210; while(bank_swizzle[4] <= SQ_ALU_SCL_221) { init_bank_swizzle(&bs); - for (i = 0; i < 4; i++) { - if (slots[i]) { - r = check_vector(bc, slots[i], &bs, bank_swizzle[i]); - if (r) - break; + if (scalar_only == false) { + for (i = 0; i < 4; i++) { + if (slots[i]) { + r = check_vector(bc, slots[i], &bs, bank_swizzle[i]); + if (r) + break; + } } - } + } else + r = 0; + if (!r && slots[4]) { r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); } @@ -756,16 +760,20 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, return 0; } - for (i = 0; i < 5; i++) { - bank_swizzle[i]++; - if (bank_swizzle[i] <= SQ_ALU_VEC_210) - break; - else - bank_swizzle[i] = SQ_ALU_VEC_012; + if (scalar_only) { + bank_swizzle[4]++; + } else { + for (i = 0; i < 5; i++) { + bank_swizzle[i]++; + if (bank_swizzle[i] <= SQ_ALU_VEC_210) + break; + else + bank_swizzle[i] = SQ_ALU_VEC_012; + } } } - // couldn't find a working swizzle + /* Couldn't find a working swizzle. */ return -1; } @@ -835,17 +843,17 @@ void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) case -1: *sel = V_SQ_ALU_SRC_M_1_INT; break; - case 0x3F800000: // 1.0f + case 0x3F800000: /* 1.0f */ *sel = V_SQ_ALU_SRC_1; break; - case 0x3F000000: // 0.5f + case 0x3F000000: /* 0.5f */ *sel = V_SQ_ALU_SRC_0_5; break; - case 0xBF800000: // -1.0f + case 0xBF800000: /* -1.0f */ *sel = V_SQ_ALU_SRC_1; *neg ^= 1; break; - case 0xBF000000: // -0.5f + case 0xBF000000: /* -0.5f */ *sel = V_SQ_ALU_SRC_0_5; *neg ^= 1; break; @@ -938,13 +946,13 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral)) return 0; - // let's check used slots + /* Let's check used slots. */ if (prev[i] && !slots[i]) { result[i] = prev[i]; continue; } else if (prev[i] && slots[i]) { if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { - // trans unit is still free try to use it + /* Trans unit is still free try to use it. */ if (is_alu_any_unit_inst(bc, slots[i])) { result[i] = prev[i]; result[4] = slots[i]; @@ -963,14 +971,14 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], alu = slots[i]; num_once_inst += is_alu_once_inst(bc, alu); - // let's check dst gpr + /* Let's check dst gpr. */ if (alu->dst.rel) { if (have_mova) return 0; have_rel = 1; } - // let's check source gprs + /* Let's check source gprs */ num_src = r600_bc_get_num_operands(bc, alu); for (src = 0; src < num_src; ++src) { if (alu->src[src].rel) { @@ -979,7 +987,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], have_rel = 1; } - // constants doesn't matter + /* Constants don't matter. */ if (!is_gpr(alu->src[src].sel)) continue; @@ -987,7 +995,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], if (!prev[j] || !prev[j]->dst.write) continue; - // if it's relative then we can't determin which gpr is really used + /* If it's relative then we can't determin which gpr is really used. */ if (prev[j]->dst.chan == alu->src[src].chan && (prev[j]->dst.sel == alu->src[src].sel || prev[j]->dst.rel || alu->src[src].rel)) @@ -1390,7 +1398,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) | - S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian) | + S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian) | S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; return 0; @@ -1927,7 +1935,7 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "ENDIAN:%d ", vtx->endian); fprintf(stderr, "OFFSET:%d\n", vtx->offset); - //TODO + /* TODO */ id++; fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); id++; @@ -1960,6 +1968,8 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, } } + *endian = r600_endian_swap(desc->channel[i].size); + switch (desc->channel[i].type) { /* Half-floats, floats, ints */ case UTIL_FORMAT_TYPE_FLOAT: @@ -1977,9 +1987,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16_16_16_FLOAT; break; } -#ifdef PIPE_ARCH_BIG_ENDIAN - *endian = ENDIAN_8IN16; -#endif break; case 32: switch (desc->nr_channels) { @@ -1996,9 +2003,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_32_32_32_32_FLOAT; break; } -#ifdef PIPE_ARCH_BIG_ENDIAN - *endian = ENDIAN_8IN32; -#endif break; default: goto out_unknown; @@ -2036,9 +2040,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16_16_16; break; } -#ifdef PIPE_ARCH_BIG_ENDIAN - *endian = ENDIAN_8IN16; -#endif break; case 32: switch (desc->nr_channels) { @@ -2055,9 +2056,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_32_32_32_32; break; } -#ifdef PIPE_ARCH_BIG_ENDIAN - *endian = ENDIAN_8IN32; -#endif break; default: goto out_unknown; @@ -2093,11 +2091,11 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru u32 *bytecode; int i, r; - /* vertex elements offset need special handling, if offset is bigger - + * than what we can put in fetch instruction then we need to alterate - * the vertex resource offset. In such case in order to simplify code - * we will bound one resource per elements. It's a worst case scenario. - */ + /* Vertex element offsets need special handling. If the offset is + * bigger than what we can put in the fetch instruction we need to + * alter the vertex resource offset. In order to simplify code we + * will bind one resource per element in such cases. It's a worst + * case scenario. */ for (i = 0; i < ve->count; i++) { ve->vbuffer_offset[i] = C_SQ_VTX_WORD2_OFFSET & elements[i].src_offset; if (ve->vbuffer_offset[i]) { @@ -2202,8 +2200,12 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru return -ENOMEM; } - for(i = 0; i < ve->fs_size / 4; i++) { - *(bytecode + i) = CPU_TO_LE32(*(bc.bytecode + i)); + if (R600_BIG_ENDIAN) { + for (i = 0; i < ve->fs_size / 4; ++i) { + bytecode[i] = bswap_32(bc.bytecode[i]); + } + } else { + memcpy(bytecode, bc.bytecode, ve->fs_size); } r600_bo_unmap(rctx->radeon, ve->fetch_shader); |