diff options
author | Rob Clark <[email protected]> | 2013-05-26 17:13:27 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2013-06-08 13:15:51 -0400 |
commit | 18c317b21ddc2ec4538544f9dd69dc568dcf821f (patch) | |
tree | 224e3c0464dcedabbf5784e5c3def952473fd8f8 /src/gallium/drivers/freedreno/a2xx/ir-a2xx.c | |
parent | 213c207b3ac40ae769afe01b8578f566b5e7840d (diff) |
freedreno: prepare for a3xx
Split the parts that are specific to adreno a2xx series GPUs from the
parts that will be in common with a3xx, so that a3xx support can be
added more cleanly.
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/a2xx/ir-a2xx.c')
-rw-r--r-- | src/gallium/drivers/freedreno/a2xx/ir-a2xx.c | 635 |
1 files changed, 635 insertions, 0 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c new file mode 100644 index 00000000000..18afba8a5a3 --- /dev/null +++ b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c @@ -0,0 +1,635 @@ +/* + * Copyright (c) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ir-a2xx.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "freedreno_util.h" +#include "instr-a2xx.h" + +#define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0) +#define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__) +#define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__) + +#define REG_MASK 0x3f + +static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr); + +static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, + uint32_t idx, struct ir2_shader_info *info); + +static void reg_update_stats(struct ir2_register *reg, + struct ir2_shader_info *info, bool dest); +static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n); +static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg); +static uint32_t reg_alu_dst_swiz(struct ir2_register *reg); +static uint32_t reg_alu_src_swiz(struct ir2_register *reg); + +/* simple allocator to carve allocations out of an up-front allocated heap, + * so that we can free everything easily in one shot. + */ +static void * ir2_alloc(struct ir2_shader *shader, int sz) +{ + void *ptr = &shader->heap[shader->heap_idx]; + shader->heap_idx += align(sz, 4); + return ptr; +} + +static char * ir2_strdup(struct ir2_shader *shader, const char *str) +{ + char *ptr = NULL; + if (str) { + int len = strlen(str); + ptr = ir2_alloc(shader, len+1); + memcpy(ptr, str, len); + ptr[len] = '\0'; + } + return ptr; +} + +struct ir2_shader * ir2_shader_create(void) +{ + DEBUG_MSG(""); + return calloc(1, sizeof(struct ir2_shader)); +} + +void ir2_shader_destroy(struct ir2_shader *shader) +{ + DEBUG_MSG(""); + free(shader); +} + +/* resolve addr/cnt/sequence fields in the individual CF's */ +static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info) +{ + uint32_t addr; + unsigned i; + int j; + + addr = shader->cfs_count / 2; + for (i = 0; i < shader->cfs_count; i++) { + struct ir2_cf *cf = shader->cfs[i]; + if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { + uint32_t sequence = 0; + + if (cf->exec.addr && (cf->exec.addr != addr)) + WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i); + if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count)) + WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i); + + for (j = cf->exec.instrs_count - 1; j >= 0; j--) { + struct ir2_instruction *instr = cf->exec.instrs[j]; + sequence <<= 2; + if (instr->instr_type == IR2_FETCH) + sequence |= 0x1; + if (instr->sync) + sequence |= 0x2; + } + + cf->exec.addr = addr; + cf->exec.cnt = cf->exec.instrs_count; + cf->exec.sequence = sequence; + + addr += cf->exec.instrs_count; + } + } + + info->sizedwords = 3 * addr; + + return 0; +} + +void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info) +{ + uint32_t i, j; + uint32_t *ptr, *dwords = NULL; + uint32_t idx = 0; + int ret; + + info->sizedwords = 0; + info->max_reg = -1; + info->max_input_reg = 0; + info->regs_written = 0; + + /* we need an even # of CF's.. insert a NOP if needed */ + if (shader->cfs_count != align(shader->cfs_count, 2)) + ir2_cf_create(shader, NOP); + + /* first pass, resolve sizes and addresses: */ + ret = shader_resolve(shader, info); + if (ret) { + ERROR_MSG("resolve failed: %d", ret); + goto fail; + } + + ptr = dwords = calloc(1, 4 * info->sizedwords); + + /* second pass, emit CF program in pairs: */ + for (i = 0; i < shader->cfs_count; i += 2) { + instr_cf_t *cfs = (instr_cf_t *)ptr; + ret = cf_emit(shader->cfs[i], &cfs[0]); + if (ret) { + ERROR_MSG("CF emit failed: %d\n", ret); + goto fail; + } + ret = cf_emit(shader->cfs[i+1], &cfs[1]); + if (ret) { + ERROR_MSG("CF emit failed: %d\n", ret); + goto fail; + } + ptr += 3; + assert((ptr - dwords) <= info->sizedwords); + } + + /* third pass, emit ALU/FETCH: */ + for (i = 0; i < shader->cfs_count; i++) { + struct ir2_cf *cf = shader->cfs[i]; + if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { + for (j = 0; j < cf->exec.instrs_count; j++) { + ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info); + if (ret) { + ERROR_MSG("instruction emit failed: %d", ret); + goto fail; + } + ptr += 3; + assert((ptr - dwords) <= info->sizedwords); + } + } + } + + return dwords; + +fail: + free(dwords); + return NULL; +} + + +struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type) +{ + struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf)); + DEBUG_MSG("%d", cf_type); + cf->shader = shader; + cf->cf_type = cf_type; + assert(shader->cfs_count < ARRAY_SIZE(shader->cfs)); + shader->cfs[shader->cfs_count++] = cf; + return cf; +} + + +/* + * CF instructions: + */ + +static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr) +{ + memset(instr, 0, sizeof(*instr)); + + instr->opc = cf->cf_type; + + switch (cf->cf_type) { + case NOP: + break; + case EXEC: + case EXEC_END: + assert(cf->exec.addr <= 0x1ff); + assert(cf->exec.cnt <= 0x6); + assert(cf->exec.sequence <= 0xfff); + instr->exec.address = cf->exec.addr; + instr->exec.count = cf->exec.cnt; + instr->exec.serialize = cf->exec.sequence; + break; + case ALLOC: + assert(cf->alloc.size <= 0xf); + instr->alloc.size = cf->alloc.size; + switch (cf->alloc.type) { + case SQ_POSITION: + case SQ_PARAMETER_PIXEL: + instr->alloc.buffer_select = cf->alloc.type; + break; + default: + ERROR_MSG("invalid alloc type: %d", cf->alloc.type); + return -1; + } + break; + case COND_EXEC: + case COND_EXEC_END: + case COND_PRED_EXEC: + case COND_PRED_EXEC_END: + case LOOP_START: + case LOOP_END: + case COND_CALL: + case RETURN: + case COND_JMP: + case COND_EXEC_PRED_CLEAN: + case COND_EXEC_PRED_CLEAN_END: + case MARK_VS_FETCH_DONE: + ERROR_MSG("TODO"); + return -1; + } + + return 0; +} + + +struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type) +{ + struct ir2_instruction *instr = + ir2_alloc(cf->shader, sizeof(struct ir2_instruction)); + DEBUG_MSG("%d", instr_type); + instr->shader = cf->shader; + instr->pred = cf->shader->pred; + instr->instr_type = instr_type; + assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs)); + cf->exec.instrs[cf->exec.instrs_count++] = instr; + return instr; +} + + +/* + * FETCH instructions: + */ + +static int instr_emit_fetch(struct ir2_instruction *instr, + uint32_t *dwords, uint32_t idx, + struct ir2_shader_info *info) +{ + instr_fetch_t *fetch = (instr_fetch_t *)dwords; + int reg = 0; + struct ir2_register *dst_reg = instr->regs[reg++]; + struct ir2_register *src_reg = instr->regs[reg++]; + + memset(fetch, 0, sizeof(*fetch)); + + reg_update_stats(dst_reg, info, true); + reg_update_stats(src_reg, info, false); + + fetch->opc = instr->fetch.opc; + + if (instr->fetch.opc == VTX_FETCH) { + instr_fetch_vtx_t *vtx = &fetch->vtx; + + assert(instr->fetch.stride <= 0xff); + assert(instr->fetch.fmt <= 0x3f); + assert(instr->fetch.const_idx <= 0x1f); + assert(instr->fetch.const_idx_sel <= 0x3); + + vtx->src_reg = src_reg->num; + vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1); + vtx->dst_reg = dst_reg->num; + vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg); + vtx->must_be_one = 1; + vtx->const_index = instr->fetch.const_idx; + vtx->const_index_sel = instr->fetch.const_idx_sel; + vtx->format_comp_all = !!instr->fetch.is_signed; + vtx->num_format_all = !instr->fetch.is_normalized; + vtx->format = instr->fetch.fmt; + vtx->stride = instr->fetch.stride; + vtx->offset = instr->fetch.offset; + + if (instr->pred != IR2_PRED_NONE) { + vtx->pred_select = 1; + vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; + } + + /* XXX seems like every FETCH but the first has + * this bit set: + */ + vtx->reserved3 = (idx > 0) ? 0x1 : 0x0; + vtx->reserved0 = (idx > 0) ? 0x2 : 0x3; + } else if (instr->fetch.opc == TEX_FETCH) { + instr_fetch_tex_t *tex = &fetch->tex; + + assert(instr->fetch.const_idx <= 0x1f); + + tex->src_reg = src_reg->num; + tex->src_swiz = reg_fetch_src_swiz(src_reg, 3); + tex->dst_reg = dst_reg->num; + tex->dst_swiz = reg_fetch_dst_swiz(dst_reg); + tex->const_idx = instr->fetch.const_idx; + tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; + tex->min_filter = TEX_FILTER_USE_FETCH_CONST; + tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; + tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; + tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; + tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; + tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; + tex->use_comp_lod = 1; + tex->use_reg_lod = !instr->fetch.is_cube; + tex->sample_location = SAMPLE_CENTER; + + if (instr->pred != IR2_PRED_NONE) { + tex->pred_select = 1; + tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; + } + + } else { + ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc); + return -1; + } + + return 0; +} + +/* + * ALU instructions: + */ + +static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords, + struct ir2_shader_info *info) +{ + int reg = 0; + instr_alu_t *alu = (instr_alu_t *)dwords; + struct ir2_register *dst_reg = instr->regs[reg++]; + struct ir2_register *src1_reg; + struct ir2_register *src2_reg; + struct ir2_register *src3_reg; + + memset(alu, 0, sizeof(*alu)); + + /* handle instructions w/ 3 src operands: */ + switch (instr->alu.vector_opc) { + case MULADDv: + case CNDEv: + case CNDGTEv: + case CNDGTv: + case DOT2ADDv: + /* note: disassembler lists 3rd src first, ie: + * MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2) + * which is the reason for this strange ordering. + */ + src3_reg = instr->regs[reg++]; + break; + default: + src3_reg = NULL; + break; + } + + src1_reg = instr->regs[reg++]; + src2_reg = instr->regs[reg++]; + + reg_update_stats(dst_reg, info, true); + reg_update_stats(src1_reg, info, false); + reg_update_stats(src2_reg, info, false); + + assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0); + assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4)); + assert((src1_reg->flags & IR2_REG_EXPORT) == 0); + assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4)); + assert((src2_reg->flags & IR2_REG_EXPORT) == 0); + assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4)); + + if (instr->alu.vector_opc == ~0) { + alu->vector_opc = MAXv; + alu->vector_write_mask = 0; + } else { + alu->vector_opc = instr->alu.vector_opc; + alu->vector_write_mask = reg_alu_dst_swiz(dst_reg); + } + + alu->vector_dest = dst_reg->num; + alu->export_data = !!(dst_reg->flags & IR2_REG_EXPORT); + + // TODO predicate case/condition.. need to add to parser + + alu->src2_reg = src2_reg->num; + alu->src2_swiz = reg_alu_src_swiz(src2_reg); + alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE); + alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS); + alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST); + + alu->src1_reg = src1_reg->num; + alu->src1_swiz = reg_alu_src_swiz(src1_reg); + alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE); + alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS); + alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST); + + alu->vector_clamp = instr->alu.vector_clamp; + alu->scalar_clamp = instr->alu.scalar_clamp; + + if (instr->alu.scalar_opc != ~0) { + struct ir2_register *sdst_reg = instr->regs[reg++]; + + reg_update_stats(sdst_reg, info, true); + + assert(sdst_reg->flags == dst_reg->flags); + + if (src3_reg) { + assert(src3_reg == instr->regs[reg++]); + } else { + src3_reg = instr->regs[reg++]; + } + + alu->scalar_dest = sdst_reg->num; + alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg); + alu->scalar_opc = instr->alu.scalar_opc; + } else { + /* not sure if this is required, but adreno compiler seems + * to always set scalar opc to MAXs if it is not used: + */ + alu->scalar_opc = MAXs; + } + + if (src3_reg) { + reg_update_stats(src3_reg, info, false); + + alu->src3_reg = src3_reg->num; + alu->src3_swiz = reg_alu_src_swiz(src3_reg); + alu->src3_reg_negate = !!(src3_reg->flags & IR2_REG_NEGATE); + alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS); + alu->src3_sel = !(src3_reg->flags & IR2_REG_CONST); + } else { + /* not sure if this is required, but adreno compiler seems + * to always set register bank for 3rd src if unused: + */ + alu->src3_sel = 1; + } + + if (instr->pred != IR2_PRED_NONE) { + alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2; + } + + return 0; +} + +static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, + uint32_t idx, struct ir2_shader_info *info) +{ + switch (instr->instr_type) { + case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info); + case IR2_ALU: return instr_emit_alu(instr, dwords, info); + } + return -1; +} + + +struct ir2_register * ir2_reg_create(struct ir2_instruction *instr, + int num, const char *swizzle, int flags) +{ + struct ir2_register *reg = + ir2_alloc(instr->shader, sizeof(struct ir2_register)); + DEBUG_MSG("%x, %d, %s", flags, num, swizzle); + assert(num <= REG_MASK); + reg->flags = flags; + reg->num = num; + reg->swizzle = ir2_strdup(instr->shader, swizzle); + assert(instr->regs_count < ARRAY_SIZE(instr->regs)); + instr->regs[instr->regs_count++] = reg; + return reg; +} + +static void reg_update_stats(struct ir2_register *reg, + struct ir2_shader_info *info, bool dest) +{ + if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) { + info->max_reg = MAX2(info->max_reg, reg->num); + + if (dest) { + info->regs_written |= (1 << reg->num); + } else if (!(info->regs_written & (1 << reg->num))) { + /* for registers that haven't been written, they must be an + * input register that the thread scheduler (presumably?) + * needs to know about: + */ + info->max_input_reg = MAX2(info->max_input_reg, reg->num); + } + } +} + +static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n) +{ + uint32_t swiz = 0; + int i; + + assert(reg->flags == 0); + assert(reg->swizzle); + + DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle); + + for (i = n-1; i >= 0; i--) { + swiz <<= 2; + switch (reg->swizzle[i]) { + default: + ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle); + case 'x': swiz |= 0x0; break; + case 'y': swiz |= 0x1; break; + case 'z': swiz |= 0x2; break; + case 'w': swiz |= 0x3; break; + } + } + + return swiz; +} + +static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg) +{ + uint32_t swiz = 0; + int i; + + assert(reg->flags == 0); + assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); + + DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle); + + if (reg->swizzle) { + for (i = 3; i >= 0; i--) { + swiz <<= 3; + switch (reg->swizzle[i]) { + default: + ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); + case 'x': swiz |= 0x0; break; + case 'y': swiz |= 0x1; break; + case 'z': swiz |= 0x2; break; + case 'w': swiz |= 0x3; break; + case '0': swiz |= 0x4; break; + case '1': swiz |= 0x5; break; + case '_': swiz |= 0x7; break; + } + } + } else { + swiz = 0x688; + } + + return swiz; +} + +/* actually, a write-mask */ +static uint32_t reg_alu_dst_swiz(struct ir2_register *reg) +{ + uint32_t swiz = 0; + int i; + + assert((reg->flags & ~IR2_REG_EXPORT) == 0); + assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); + + DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle); + + if (reg->swizzle) { + for (i = 3; i >= 0; i--) { + swiz <<= 1; + if (reg->swizzle[i] == "xyzw"[i]) { + swiz |= 0x1; + } else if (reg->swizzle[i] != '_') { + ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); + break; + } + } + } else { + swiz = 0xf; + } + + return swiz; +} + +static uint32_t reg_alu_src_swiz(struct ir2_register *reg) +{ + uint32_t swiz = 0; + int i; + + assert((reg->flags & IR2_REG_EXPORT) == 0); + assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); + + DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle); + + if (reg->swizzle) { + for (i = 3; i >= 0; i--) { + swiz <<= 2; + switch (reg->swizzle[i]) { + default: + ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle); + case 'x': swiz |= (0x0 - i) & 0x3; break; + case 'y': swiz |= (0x1 - i) & 0x3; break; + case 'z': swiz |= (0x2 - i) & 0x3; break; + case 'w': swiz |= (0x3 - i) & 0x3; break; + } + } + } else { + swiz = 0x0; + } + + return swiz; +} |