diff options
Diffstat (limited to 'src/gallium/drivers')
40 files changed, 9778 insertions, 0 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am new file mode 100644 index 00000000000..9bb532dc181 --- /dev/null +++ b/src/gallium/drivers/freedreno/Makefile.am @@ -0,0 +1,32 @@ +include $(top_srcdir)/src/gallium/Automake.inc + +noinst_LTLIBRARIES = libfreedreno.la + +AM_CFLAGS = \ + -Wno-packed-bitfield-compat \ + -I$(top_srcdir)/src/gallium/drivers \ + $(GALLIUM_CFLAGS) \ + $(FREEDRENO_CFLAGS) \ + $(PIC_FLAGS) \ + $(VISIBILITY_CFLAGS) + +libfreedreno_la_SOURCES = \ + freedreno_util.c \ + freedreno_fence.c \ + freedreno_resource.c \ + freedreno_surface.c \ + freedreno_vbo.c \ + freedreno_blend.c \ + freedreno_rasterizer.c \ + freedreno_zsa.c \ + freedreno_state.c \ + freedreno_clear.c \ + freedreno_program.c \ + freedreno_texture.c \ + freedreno_context.c \ + freedreno_screen.c \ + freedreno_gmem.c \ + freedreno_compiler.c \ + ir.c \ + disasm.c + diff --git a/src/gallium/drivers/freedreno/disasm.c b/src/gallium/drivers/freedreno/disasm.c new file mode 100644 index 00000000000..ee14ced15df --- /dev/null +++ b/src/gallium/drivers/freedreno/disasm.c @@ -0,0 +1,632 @@ +/* + * Copyright (c) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> + +#include "disasm.h" +#include "instr.h" + +static const char *levels[] = { + "\t", + "\t\t", + "\t\t\t", + "\t\t\t\t", + "\t\t\t\t\t", + "\t\t\t\t\t\t", + "\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t\t", + "x", + "x", + "x", + "x", + "x", + "x", +}; + +static enum debug_t debug; + +/* + * ALU instructions: + */ + +static const char chan_names[] = { + 'x', 'y', 'z', 'w', + /* these only apply to FETCH dst's: */ + '0', '1', '?', '_', +}; + +static void print_srcreg(uint32_t num, uint32_t type, + uint32_t swiz, uint32_t negate, uint32_t abs) +{ + if (negate) + printf("-"); + if (abs) + printf("|"); + printf("%c%u", type ? 'R' : 'C', num); + if (swiz) { + int i; + printf("."); + for (i = 0; i < 4; i++) { + printf("%c", chan_names[(swiz + i) & 0x3]); + swiz >>= 2; + } + } + if (abs) + printf("|"); +} + +static void print_dstreg(uint32_t num, uint32_t mask, uint32_t dst_exp) +{ + printf("%s%u", dst_exp ? "export" : "R", num); + if (mask != 0xf) { + int i; + printf("."); + for (i = 0; i < 4; i++) { + printf("%c", (mask & 0x1) ? chan_names[i] : '_'); + mask >>= 1; + } + } +} + +static void print_export_comment(uint32_t num, enum shader_t type) +{ + const char *name = NULL; + switch (type) { + case SHADER_VERTEX: + switch (num) { + case 62: name = "gl_Position"; break; + case 63: name = "gl_PointSize"; break; + } + break; + case SHADER_FRAGMENT: + switch (num) { + case 0: name = "gl_FragColor"; break; + } + break; + } + /* if we had a symbol table here, we could look + * up the name of the varying.. + */ + if (name) { + printf("\t; %s", name); + } +} + +struct { + uint32_t num_srcs; + const char *name; +} vector_instructions[0x20] = { +#define INSTR(opc, num_srcs) [opc] = { num_srcs, #opc } + INSTR(ADDv, 2), + INSTR(MULv, 2), + INSTR(MAXv, 2), + INSTR(MINv, 2), + INSTR(SETEv, 2), + INSTR(SETGTv, 2), + INSTR(SETGTEv, 2), + INSTR(SETNEv, 2), + INSTR(FRACv, 1), + INSTR(TRUNCv, 1), + INSTR(FLOORv, 1), + INSTR(MULADDv, 3), + INSTR(CNDEv, 3), + INSTR(CNDGTEv, 3), + INSTR(CNDGTv, 3), + INSTR(DOT4v, 2), + INSTR(DOT3v, 2), + INSTR(DOT2ADDv, 3), // ??? + INSTR(CUBEv, 2), + INSTR(MAX4v, 1), + INSTR(PRED_SETE_PUSHv, 2), + INSTR(PRED_SETNE_PUSHv, 2), + INSTR(PRED_SETGT_PUSHv, 2), + INSTR(PRED_SETGTE_PUSHv, 2), + INSTR(KILLEv, 2), + INSTR(KILLGTv, 2), + INSTR(KILLGTEv, 2), + INSTR(KILLNEv, 2), + INSTR(DSTv, 2), + INSTR(MOVAv, 1), +}, scalar_instructions[0x40] = { + INSTR(ADDs, 1), + INSTR(ADD_PREVs, 1), + INSTR(MULs, 1), + INSTR(MUL_PREVs, 1), + INSTR(MUL_PREV2s, 1), + INSTR(MAXs, 1), + INSTR(MINs, 1), + INSTR(SETEs, 1), + INSTR(SETGTs, 1), + INSTR(SETGTEs, 1), + INSTR(SETNEs, 1), + INSTR(FRACs, 1), + INSTR(TRUNCs, 1), + INSTR(FLOORs, 1), + INSTR(EXP_IEEE, 1), + INSTR(LOG_CLAMP, 1), + INSTR(LOG_IEEE, 1), + INSTR(RECIP_CLAMP, 1), + INSTR(RECIP_FF, 1), + INSTR(RECIP_IEEE, 1), + INSTR(RECIPSQ_CLAMP, 1), + INSTR(RECIPSQ_FF, 1), + INSTR(RECIPSQ_IEEE, 1), + INSTR(MOVAs, 1), + INSTR(MOVA_FLOORs, 1), + INSTR(SUBs, 1), + INSTR(SUB_PREVs, 1), + INSTR(PRED_SETEs, 1), + INSTR(PRED_SETNEs, 1), + INSTR(PRED_SETGTs, 1), + INSTR(PRED_SETGTEs, 1), + INSTR(PRED_SET_INVs, 1), + INSTR(PRED_SET_POPs, 1), + INSTR(PRED_SET_CLRs, 1), + INSTR(PRED_SET_RESTOREs, 1), + INSTR(KILLEs, 1), + INSTR(KILLGTs, 1), + INSTR(KILLGTEs, 1), + INSTR(KILLNEs, 1), + INSTR(KILLONEs, 1), + INSTR(SQRT_IEEE, 1), + INSTR(MUL_CONST_0, 1), + INSTR(MUL_CONST_1, 1), + INSTR(ADD_CONST_0, 1), + INSTR(ADD_CONST_1, 1), + INSTR(SUB_CONST_0, 1), + INSTR(SUB_CONST_1, 1), + INSTR(SIN, 1), + INSTR(COS, 1), + INSTR(RETAIN_PREV, 1), +#undef INSTR +}; + +static int disasm_alu(uint32_t *dwords, uint32_t alu_off, + int level, int sync, enum shader_t type) +{ + instr_alu_t *alu = (instr_alu_t *)dwords; + + printf("%s", levels[level]); + if (debug & PRINT_RAW) { + printf("%02x: %08x %08x %08x\t", alu_off, + dwords[0], dwords[1], dwords[2]); + } + + printf(" %sALU:\t", sync ? "(S)" : " "); + + printf("%s", vector_instructions[alu->vector_opc].name); + + if (alu->pred_select & 0x2) { + /* seems to work similar to conditional execution in ARM instruction + * set, so let's use a similar syntax for now: + */ + printf((alu->pred_select & 0x1) ? "EQ" : "NE"); + } + + printf("\t"); + + print_dstreg(alu->vector_dest, alu->vector_write_mask, alu->export_data); + printf(" = "); + if (vector_instructions[alu->vector_opc].num_srcs == 3) { + print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, + alu->src3_reg_negate, alu->src3_reg_abs); + printf(", "); + } + print_srcreg(alu->src1_reg, alu->src1_sel, alu->src1_swiz, + alu->src1_reg_negate, alu->src1_reg_abs); + if (vector_instructions[alu->vector_opc].num_srcs > 1) { + printf(", "); + print_srcreg(alu->src2_reg, alu->src2_sel, alu->src2_swiz, + alu->src2_reg_negate, alu->src2_reg_abs); + } + + if (alu->vector_clamp) + printf(" CLAMP"); + + if (alu->export_data) + print_export_comment(alu->vector_dest, type); + + printf("\n"); + + if (alu->scalar_write_mask || !alu->vector_write_mask) { + /* 2nd optional scalar op: */ + + printf("%s", levels[level]); + if (debug & PRINT_RAW) + printf(" \t"); + + if (scalar_instructions[alu->scalar_opc].name) { + printf("\t \t%s\t", scalar_instructions[alu->scalar_opc].name); + } else { + printf("\t \tOP(%u)\t", alu->scalar_opc); + } + + print_dstreg(alu->scalar_dest, alu->scalar_write_mask, alu->export_data); + printf(" = "); + print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, + alu->src3_reg_negate, alu->src3_reg_abs); + // TODO ADD/MUL must have another src?!? + if (alu->scalar_clamp) + printf(" CLAMP"); + if (alu->export_data) + print_export_comment(alu->scalar_dest, type); + printf("\n"); + } + + return 0; +} + + +/* + * FETCH instructions: + */ + +struct { + const char *name; +} fetch_types[0xff] = { +#define TYPE(id) [id] = { #id } + TYPE(FMT_1_REVERSE), + TYPE(FMT_32_FLOAT), + TYPE(FMT_32_32_FLOAT), + TYPE(FMT_32_32_32_FLOAT), + TYPE(FMT_32_32_32_32_FLOAT), + TYPE(FMT_16), + TYPE(FMT_16_16), + TYPE(FMT_16_16_16_16), + TYPE(FMT_8), + TYPE(FMT_8_8), + TYPE(FMT_8_8_8_8), + TYPE(FMT_32), + TYPE(FMT_32_32), + TYPE(FMT_32_32_32_32), +#undef TYPE +}; + +static void print_fetch_dst(uint32_t dst_reg, uint32_t dst_swiz) +{ + int i; + printf("\tR%u.", dst_reg); + for (i = 0; i < 4; i++) { + printf("%c", chan_names[dst_swiz & 0x7]); + dst_swiz >>= 3; + } +} + +static void print_fetch_vtx(instr_fetch_t *fetch) +{ + instr_fetch_vtx_t *vtx = &fetch->vtx; + + if (vtx->pred_select) { + /* seems to work similar to conditional execution in ARM instruction + * set, so let's use a similar syntax for now: + */ + printf(vtx->pred_condition ? "EQ" : "NE"); + } + + print_fetch_dst(vtx->dst_reg, vtx->dst_swiz); + printf(" = R%u.", vtx->src_reg); + printf("%c", chan_names[vtx->src_swiz & 0x3]); + if (fetch_types[vtx->format].name) { + printf(" %s", fetch_types[vtx->format].name); + } else { + printf(" TYPE(0x%x)", vtx->format); + } + printf(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED"); + if (!vtx->num_format_all) + printf(" NORMALIZED"); + printf(" STRIDE(%u)", vtx->stride); + if (vtx->offset) + printf(" OFFSET(%u)", vtx->offset); + printf(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel); + if (0) { + // XXX + printf(" src_reg_am=%u", vtx->src_reg_am); + printf(" dst_reg_am=%u", vtx->dst_reg_am); + printf(" num_format_all=%u", vtx->num_format_all); + printf(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all); + printf(" exp_adjust_all=%u", vtx->exp_adjust_all); + } +} + +static void print_fetch_tex(instr_fetch_t *fetch) +{ + static const char *filter[] = { + [TEX_FILTER_POINT] = "POINT", + [TEX_FILTER_LINEAR] = "LINEAR", + [TEX_FILTER_BASEMAP] = "BASEMAP", + }; + static const char *aniso_filter[] = { + [ANISO_FILTER_DISABLED] = "DISABLED", + [ANISO_FILTER_MAX_1_1] = "MAX_1_1", + [ANISO_FILTER_MAX_2_1] = "MAX_2_1", + [ANISO_FILTER_MAX_4_1] = "MAX_4_1", + [ANISO_FILTER_MAX_8_1] = "MAX_8_1", + [ANISO_FILTER_MAX_16_1] = "MAX_16_1", + }; + static const char *arbitrary_filter[] = { + [ARBITRARY_FILTER_2X4_SYM] = "2x4_SYM", + [ARBITRARY_FILTER_2X4_ASYM] = "2x4_ASYM", + [ARBITRARY_FILTER_4X2_SYM] = "4x2_SYM", + [ARBITRARY_FILTER_4X2_ASYM] = "4x2_ASYM", + [ARBITRARY_FILTER_4X4_SYM] = "4x4_SYM", + [ARBITRARY_FILTER_4X4_ASYM] = "4x4_ASYM", + }; + static const char *sample_loc[] = { + [SAMPLE_CENTROID] = "CENTROID", + [SAMPLE_CENTER] = "CENTER", + }; + instr_fetch_tex_t *tex = &fetch->tex; + uint32_t src_swiz = tex->src_swiz; + int i; + + if (tex->pred_select) { + /* seems to work similar to conditional execution in ARM instruction + * set, so let's use a similar syntax for now: + */ + printf(tex->pred_condition ? "EQ" : "NE"); + } + + print_fetch_dst(tex->dst_reg, tex->dst_swiz); + printf(" = R%u.", tex->src_reg); + for (i = 0; i < 3; i++) { + printf("%c", chan_names[src_swiz & 0x3]); + src_swiz >>= 2; + } + printf(" CONST(%u)", tex->const_idx); + if (tex->fetch_valid_only) + printf(" VALID_ONLY"); + if (tex->tx_coord_denorm) + printf(" DENORM"); + if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) + printf(" MAG(%s)", filter[tex->mag_filter]); + if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) + printf(" MIN(%s)", filter[tex->min_filter]); + if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) + printf(" MIP(%s)", filter[tex->mip_filter]); + if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) + printf(" ANISO(%s)", aniso_filter[tex->aniso_filter]); + if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) + printf(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]); + if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) + printf(" VOL_MAG(%s)", filter[tex->vol_mag_filter]); + if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) + printf(" VOL_MIN(%s)", filter[tex->vol_min_filter]); + if (!tex->use_comp_lod) { + printf(" LOD(%u)", tex->use_comp_lod); + printf(" LOD_BIAS(%u)", tex->lod_bias); + } + if (tex->use_reg_gradients) + printf(" USE_REG_GRADIENTS"); + printf(" LOCATION(%s)", sample_loc[tex->sample_location]); + if (tex->offset_x || tex->offset_y || tex->offset_z) + printf(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z); +} + +struct { + const char *name; + void (*fxn)(instr_fetch_t *cf); +} fetch_instructions[] = { +#define INSTR(opc, name, fxn) [opc] = { name, fxn } + INSTR(VTX_FETCH, "VERTEX", print_fetch_vtx), + INSTR(TEX_FETCH, "SAMPLE", print_fetch_tex), + INSTR(TEX_GET_BORDER_COLOR_FRAC, "?", print_fetch_tex), + INSTR(TEX_GET_COMP_TEX_LOD, "?", print_fetch_tex), + INSTR(TEX_GET_GRADIENTS, "?", print_fetch_tex), + INSTR(TEX_GET_WEIGHTS, "?", print_fetch_tex), + INSTR(TEX_SET_TEX_LOD, "SET_TEX_LOD", print_fetch_tex), + INSTR(TEX_SET_GRADIENTS_H, "?", print_fetch_tex), + INSTR(TEX_SET_GRADIENTS_V, "?", print_fetch_tex), + INSTR(TEX_RESERVED_4, "?", print_fetch_tex), +#undef INSTR +}; + +static int disasm_fetch(uint32_t *dwords, uint32_t alu_off, int level, int sync) +{ + instr_fetch_t *fetch = (instr_fetch_t *)dwords; + + printf("%s", levels[level]); + if (debug & PRINT_RAW) { + printf("%02x: %08x %08x %08x\t", alu_off, + dwords[0], dwords[1], dwords[2]); + } + + printf(" %sFETCH:\t", sync ? "(S)" : " "); + printf("%s", fetch_instructions[fetch->opc].name); + fetch_instructions[fetch->opc].fxn(fetch); + printf("\n"); + + return 0; +} + +/* + * CF instructions: + */ + +static int cf_exec(instr_cf_t *cf) +{ + return (cf->opc == EXEC) || + (cf->opc == EXEC_END) || + (cf->opc == COND_EXEC) || + (cf->opc == COND_EXEC_END) || + (cf->opc == COND_PRED_EXEC) || + (cf->opc == COND_PRED_EXEC_END) || + (cf->opc == COND_EXEC_PRED_CLEAN) || + (cf->opc == COND_EXEC_PRED_CLEAN_END); +} + +static int cf_cond_exec(instr_cf_t *cf) +{ + return (cf->opc == COND_EXEC) || + (cf->opc == COND_EXEC_END) || + (cf->opc == COND_PRED_EXEC) || + (cf->opc == COND_PRED_EXEC_END) || + (cf->opc == COND_EXEC_PRED_CLEAN) || + (cf->opc == COND_EXEC_PRED_CLEAN_END); +} + +static void print_cf_nop(instr_cf_t *cf) +{ +} + +static void print_cf_exec(instr_cf_t *cf) +{ + printf(" ADDR(0x%x) CNT(0x%x)", cf->exec.address, cf->exec.count); + if (cf->exec.yeild) + printf(" YIELD"); + if (cf->exec.vc) + printf(" VC(0x%x)", cf->exec.vc); + if (cf->exec.bool_addr) + printf(" BOOL_ADDR(0x%x)", cf->exec.bool_addr); + if (cf->exec.address_mode == ABSOLUTE_ADDR) + printf(" ABSOLUTE_ADDR"); + if (cf_cond_exec(cf)) + printf(" COND(%d)", cf->exec.condition); +} + +static void print_cf_loop(instr_cf_t *cf) +{ + printf(" ADDR(0x%x) LOOP_ID(%d)", cf->loop.address, cf->loop.loop_id); + if (cf->loop.address_mode == ABSOLUTE_ADDR) + printf(" ABSOLUTE_ADDR"); +} + +static void print_cf_jmp_call(instr_cf_t *cf) +{ + printf(" ADDR(0x%x) DIR(%d)", cf->jmp_call.address, cf->jmp_call.direction); + if (cf->jmp_call.force_call) + printf(" FORCE_CALL"); + if (cf->jmp_call.predicated_jmp) + printf(" COND(%d)", cf->jmp_call.condition); + if (cf->jmp_call.bool_addr) + printf(" BOOL_ADDR(0x%x)", cf->jmp_call.bool_addr); + if (cf->jmp_call.address_mode == ABSOLUTE_ADDR) + printf(" ABSOLUTE_ADDR"); +} + +static void print_cf_alloc(instr_cf_t *cf) +{ + static const char *bufname[] = { + [SQ_NO_ALLOC] = "NO ALLOC", + [SQ_POSITION] = "POSITION", + [SQ_PARAMETER_PIXEL] = "PARAM/PIXEL", + [SQ_MEMORY] = "MEMORY", + }; + printf(" %s SIZE(0x%x)", bufname[cf->alloc.buffer_select], cf->alloc.size); + if (cf->alloc.no_serial) + printf(" NO_SERIAL"); + if (cf->alloc.alloc_mode) // ??? + printf(" ALLOC_MODE"); +} + +struct { + const char *name; + void (*fxn)(instr_cf_t *cf); +} cf_instructions[] = { +#define INSTR(opc, fxn) [opc] = { #opc, fxn } + INSTR(NOP, print_cf_nop), + INSTR(EXEC, print_cf_exec), + INSTR(EXEC_END, print_cf_exec), + INSTR(COND_EXEC, print_cf_exec), + INSTR(COND_EXEC_END, print_cf_exec), + INSTR(COND_PRED_EXEC, print_cf_exec), + INSTR(COND_PRED_EXEC_END, print_cf_exec), + INSTR(LOOP_START, print_cf_loop), + INSTR(LOOP_END, print_cf_loop), + INSTR(COND_CALL, print_cf_jmp_call), + INSTR(RETURN, print_cf_jmp_call), + INSTR(COND_JMP, print_cf_jmp_call), + INSTR(ALLOC, print_cf_alloc), + INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec), + INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec), + INSTR(MARK_VS_FETCH_DONE, print_cf_nop), // ?? +#undef INSTR +}; + +static void print_cf(instr_cf_t *cf, int level) +{ + printf("%s", levels[level]); + if (debug & PRINT_RAW) { + uint16_t *words = (uint16_t *)cf; + printf(" %04x %04x %04x \t", + words[0], words[1], words[2]); + } + printf("%s", cf_instructions[cf->opc].name); + cf_instructions[cf->opc].fxn(cf); + printf("\n"); +} + +/* + * The adreno shader microcode consists of two parts: + * 1) A CF (control-flow) program, at the header of the compiled shader, + * which refers to ALU/FETCH instructions that follow it by address. + * 2) ALU and FETCH instructions + */ + +int disasm(uint32_t *dwords, int sizedwords, int level, enum shader_t type) +{ + instr_cf_t *cfs = (instr_cf_t *)dwords; + int idx, max_idx; + + for (idx = 0; ; idx++) { + instr_cf_t *cf = &cfs[idx]; + if (cf_exec(cf)) { + max_idx = 2 * cf->exec.address; + break; + } + } + + for (idx = 0; idx < max_idx; idx++) { + instr_cf_t *cf = &cfs[idx]; + + print_cf(cf, level); + + if (cf_exec(cf)) { + uint32_t sequence = cf->exec.serialize; + uint32_t i; + for (i = 0; i < cf->exec.count; i++) { + uint32_t alu_off = (cf->exec.address + i); + if (sequence & 0x1) { + disasm_fetch(dwords + alu_off * 3, alu_off, level, sequence & 0x2); + } else { + disasm_alu(dwords + alu_off * 3, alu_off, level, sequence & 0x2, type); + } + sequence >>= 2; + } + } + } + + return 0; +} + +void disasm_set_debug(enum debug_t d) +{ + debug= d; +} diff --git a/src/gallium/drivers/freedreno/disasm.h b/src/gallium/drivers/freedreno/disasm.h new file mode 100644 index 00000000000..92efd5ae53c --- /dev/null +++ b/src/gallium/drivers/freedreno/disasm.h @@ -0,0 +1,40 @@ +/* + * Copyright © 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef DISASM_H_ +#define DISASM_H_ + +enum shader_t { + SHADER_VERTEX, + SHADER_FRAGMENT, +}; + +/* bitmask of debug flags */ +enum debug_t { + PRINT_RAW = 0x1, /* dump raw hexdump */ +}; + +int disasm(uint32_t *dwords, int sizedwords, int level, enum shader_t type); +void disasm_set_debug(enum debug_t debug); + +#endif /* DISASM_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_a2xx_reg.h b/src/gallium/drivers/freedreno/freedreno_a2xx_reg.h new file mode 100644 index 00000000000..7c5982cce97 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_a2xx_reg.h @@ -0,0 +1,1172 @@ +/* + * Copyright (c) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef FREEDRENO_A2XX_REG_H_ +#define FREEDRENO_A2XX_REG_H_ + +#include <GLES2/gl2.h> + +/* convert float to dword */ +static inline uint32_t f2d(float f) +{ + union { + float f; + uint32_t d; + } u = { + .f = f, + }; + return u.d; +} + +/* convert float to 12.4 fixed point */ +static inline uint32_t f2d12_4(float f) +{ + return (uint32_t)(f * 8.0); +} + +/* convert x,y to dword */ +static inline uint32_t xy2d(uint16_t x, uint16_t y) +{ + return ((y & 0x3fff) << 16) | (x & 0x3fff); +} + +/* + * Values for CP_EVENT_WRITE: + */ + +enum VGT_EVENT_TYPE { + VS_DEALLOC = 0, + PS_DEALLOC = 1, + VS_DONE_TS = 2, + PS_DONE_TS = 3, + CACHE_FLUSH_TS = 4, + CONTEXT_DONE = 5, + CACHE_FLUSH = 6, + VIZQUERY_START = 7, + VIZQUERY_END = 8, + SC_WAIT_WC = 9, + RST_PIX_CNT = 13, + RST_VTX_CNT = 14, + TILE_FLUSH = 15, + CACHE_FLUSH_AND_INV_TS_EVENT = 20, + ZPASS_DONE = 21, + CACHE_FLUSH_AND_INV_EVENT = 22, + PERFCOUNTER_START = 23, + PERFCOUNTER_STOP = 24, + VS_FETCH_DONE = 27, + FACENESS_FLUSH = 28, +}; + +/* + * Color/surface formats: + */ + +enum rb_colorformatx { + COLORX_4_4_4_4 = 0, + COLORX_1_5_5_5 = 1, + COLORX_5_6_5 = 2, + COLORX_8 = 3, + COLORX_8_8 = 4, + COLORX_8_8_8_8 = 5, + COLORX_S8_8_8_8 = 6, + COLORX_16_FLOAT = 7, + COLORX_16_16_FLOAT = 8, + COLORX_16_16_16_16_FLOAT = 9, + COLORX_32_FLOAT = 10, + COLORX_32_32_FLOAT = 11, + COLORX_32_32_32_32_FLOAT = 12, + COLORX_2_3_3 = 13, + COLORX_8_8_8 = 14, + COLORX_INVALID, +}; + +enum sq_surfaceformat { + FMT_1_REVERSE = 0, + FMT_1 = 1, + FMT_8 = 2, + FMT_1_5_5_5 = 3, + FMT_5_6_5 = 4, + FMT_6_5_5 = 5, + FMT_8_8_8_8 = 6, + FMT_2_10_10_10 = 7, + FMT_8_A = 8, + FMT_8_B = 9, + FMT_8_8 = 10, + FMT_Cr_Y1_Cb_Y0 = 11, + FMT_Y1_Cr_Y0_Cb = 12, + FMT_5_5_5_1 = 13, + FMT_8_8_8_8_A = 14, + FMT_4_4_4_4 = 15, + FMT_10_11_11 = 16, + FMT_11_11_10 = 17, + FMT_DXT1 = 18, + FMT_DXT2_3 = 19, + FMT_DXT4_5 = 20, + FMT_24_8 = 22, + FMT_24_8_FLOAT = 23, + FMT_16 = 24, + FMT_16_16 = 25, + FMT_16_16_16_16 = 26, + FMT_16_EXPAND = 27, + FMT_16_16_EXPAND = 28, + FMT_16_16_16_16_EXPAND = 29, + FMT_16_FLOAT = 30, + FMT_16_16_FLOAT = 31, + FMT_16_16_16_16_FLOAT = 32, + FMT_32 = 33, + FMT_32_32 = 34, + FMT_32_32_32_32 = 35, + FMT_32_FLOAT = 36, + FMT_32_32_FLOAT = 37, + FMT_32_32_32_32_FLOAT = 38, + FMT_32_AS_8 = 39, + FMT_32_AS_8_8 = 40, + FMT_16_MPEG = 41, + FMT_16_16_MPEG = 42, + FMT_8_INTERLACED = 43, + FMT_32_AS_8_INTERLACED = 44, + FMT_32_AS_8_8_INTERLACED = 45, + FMT_16_INTERLACED = 46, + FMT_16_MPEG_INTERLACED = 47, + FMT_16_16_MPEG_INTERLACED = 48, + FMT_DXN = 49, + FMT_8_8_8_8_AS_16_16_16_16 = 50, + FMT_DXT1_AS_16_16_16_16 = 51, + FMT_DXT2_3_AS_16_16_16_16 = 52, + FMT_DXT4_5_AS_16_16_16_16 = 53, + FMT_2_10_10_10_AS_16_16_16_16 = 54, + FMT_10_11_11_AS_16_16_16_16 = 55, + FMT_11_11_10_AS_16_16_16_16 = 56, + FMT_32_32_32_FLOAT = 57, + FMT_DXT3A = 58, + FMT_DXT5A = 59, + FMT_CTX1 = 60, + FMT_DXT3A_AS_1_1_1_1 = 61, + FMT_INVALID +}; + +/* + * Register addresses: + */ + +#define REG_COHER_BASE_PM4 0xa2a +#define REG_COHER_DEST_BASE_0 0x2006 +#define REG_COHER_SIZE_PM4 0xa29 +#define REG_COHER_STATUS_PM4 0xa2b +#define REG_CP_CSQ_IB1_STAT 0x01fe +#define REG_CP_CSQ_IB2_STAT 0x01ff +#define REG_CP_CSQ_RB_STAT 0x01fd +#define REG_CP_DEBUG 0x01fc +#define REG_CP_IB1_BASE 0x0458 +#define REG_CP_IB1_BUFSZ 0x0459 +#define REG_CP_IB2_BASE 0x045a +#define REG_CP_IB2_BUFSZ 0x045b +#define REG_CP_INT_ACK 0x01f4 +#define REG_CP_INT_CNTL 0x01f2 +#define REG_CP_INT_STATUS 0x01f3 +#define REG_CP_ME_CNTL 0x01f6 +#define REG_CP_ME_RAM_DATA 0x01fa +#define REG_CP_ME_RAM_RADDR 0x01f9 +#define REG_CP_ME_RAM_WADDR 0x01f8 +#define REG_CP_ME_STATUS 0x01f7 +#define REG_CP_PERFCOUNTER_HI 0x0447 +#define REG_CP_PERFCOUNTER_LO 0x0446 +#define REG_CP_PERFCOUNTER_SELECT 0x0445 +#define REG_CP_PERFMON_CNTL 0x0444 +#define REG_CP_PFP_UCODE_ADDR 0x00c0 +#define REG_CP_PFP_UCODE_DATA 0x00c1 +#define REG_CP_QUEUE_THRESHOLDS 0x01d5 +#define REG_CP_RB_BASE 0x01c0 +#define REG_CP_RB_CNTL 0x01c1 +#define REG_CP_RB_RPTR 0x01c4 +#define REG_CP_RB_RPTR_ADDR 0x01c3 +#define REG_CP_RB_RPTR_WR 0x01c7 +#define REG_CP_RB_WPTR 0x01c5 +#define REG_CP_RB_WPTR_BASE 0x01c8 +#define REG_CP_RB_WPTR_DELAY 0x01c6 +#define REG_CP_STAT 0x047f +#define REG_CP_STATE_DEBUG_DATA 0x01ed +#define REG_CP_STATE_DEBUG_INDEX 0x01ec +#define REG_CP_ST_BASE 0x044d +#define REG_CP_ST_BUFSZ 0x044e +#define REG_GRAS_DEBUG_CNTL 0x0c80 +#define REG_GRAS_DEBUG_DATA 0x0c81 +#define REG_MASTER_INT_SIGNAL 0x03b7 +#define REG_PA_CL_CLIP_CNTL 0x2204 +#define REG_PA_CL_GB_HORZ_CLIP_ADJ 0x2305 +#define REG_PA_CL_GB_HORZ_DISC_ADJ 0x2306 +#define REG_PA_CL_GB_VERT_CLIP_ADJ 0x2303 +#define REG_PA_CL_GB_VERT_DISC_ADJ 0x2304 +#define REG_PA_CL_VPORT_XOFFSET 0x2110 +#define REG_PA_CL_VPORT_XSCALE 0x210f +#define REG_PA_CL_VPORT_YOFFSET 0x2112 +#define REG_PA_CL_VPORT_YSCALE 0x2111 +#define REG_PA_CL_VPORT_ZOFFSET 0x2114 +#define REG_PA_CL_VPORT_ZSCALE 0x2113 +#define REG_PA_CL_VTE_CNTL 0x2206 +#define REG_PA_SC_AA_CONFIG 0x2301 +#define REG_PA_SC_AA_MASK 0x2312 +#define REG_PA_SC_LINE_CNTL 0x2300 +#define REG_PA_SC_LINE_STIPPLE 0x2283 +#define REG_PA_SC_SCREEN_SCISSOR_BR 0x200f +#define REG_PA_SC_SCREEN_SCISSOR_TL 0x200e +#define REG_PA_SC_VIZ_QUERY 0x2293 +#define REG_PA_SC_VIZ_QUERY_STATUS 0x0c44 +#define REG_PA_SC_WINDOW_OFFSET 0x2080 +#define REG_PA_SC_WINDOW_SCISSOR_BR 0x2082 +#define REG_PA_SC_WINDOW_SCISSOR_TL 0x2081 +#define REG_PA_SU_DEBUG_CNTL 0x0c80 +#define REG_PA_SU_DEBUG_DATA 0x0c81 +#define REG_PA_SU_FACE_DATA 0x0c86 +#define REG_PA_SU_LINE_CNTL 0x2282 +#define REG_PA_SU_POINT_MINMAX 0x2281 +#define REG_PA_SU_POINT_SIZE 0x2280 +#define REG_PA_SU_POLY_OFFSET_BACK_OFFSET 0x2383 +#define REG_PA_SU_POLY_OFFSET_FRONT_SCALE 0x2380 +#define REG_PA_SU_SC_MODE_CNTL 0x2205 +#define REG_PA_SU_VTX_CNTL 0x2302 +#define REG_PC_DEBUG_CNTL 0x0c38 +#define REG_PC_DEBUG_DATA 0x0c39 +#define REG_RB_ALPHA_REF 0x210e +#define REG_RB_BC_CONTROL 0x0f01 +#define REG_RB_BLEND_ALPHA 0x2108 +#define REG_RB_BLEND_BLUE 0x2107 +#define REG_RB_BLEND_CONTROL 0x2201 +#define REG_RB_BLEND_GREEN 0x2106 +#define REG_RB_BLEND_RED 0x2105 +#define REG_RBBM_CNTL 0x003b +#define REG_RBBM_DEBUG 0x039b +#define REG_RBBM_DEBUG_CNTL 0x03a1 +#define REG_RBBM_DEBUG_OUT 0x03a0 +#define REG_RBBM_INT_ACK 0x03b6 +#define REG_RBBM_INT_CNTL 0x03b4 +#define REG_RBBM_INT_STATUS 0x03b5 +#define REG_RBBM_PATCH_RELEASE 0x0001 +#define REG_RBBM_PERFCOUNTER1_HI 0x0398 +#define REG_RBBM_PERFCOUNTER1_LO 0x0397 +#define REG_RBBM_PERFCOUNTER1_SELECT 0x0395 +#define REG_RBBM_PERIPHID1 0x03f9 +#define REG_RBBM_PERIPHID2 0x03fa +#define REG_RBBM_PM_OVERRIDE1 0x039c +#define REG_RBBM_PM_OVERRIDE2 0x039d +#define REG_RBBM_READ_ERROR 0x03b3 +#define REG_RBBM_SOFT_RESET 0x003c +#define REG_RBBM_STATUS 0x05d0 +#define REG_RB_COLORCONTROL 0x2202 +#define REG_RB_COLOR_DEST_MASK 0x2326 +#define REG_RB_COLOR_INFO 0x2001 +#define REG_RB_COLOR_MASK 0x2104 +#define REG_RB_COPY_CONTROL 0x2318 +#define REG_RB_COPY_DEST_BASE 0x2319 +#define REG_RB_COPY_DEST_INFO 0x231b +#define REG_RB_COPY_DEST_OFFSET 0x231c +#define REG_RB_COPY_DEST_PITCH 0x231a +#define REG_RB_DEBUG_CNTL 0x0f26 +#define REG_RB_DEBUG_DATA 0x0f27 +#define REG_RB_DEPTH_CLEAR 0x231d +#define REG_RB_DEPTHCONTROL 0x2200 +#define REG_RB_DEPTH_INFO 0x2002 +#define REG_RB_EDRAM_INFO 0x0f02 +#define REG_RB_FOG_COLOR 0x2109 +#define REG_RB_MODECONTROL 0x2208 +#define REG_RB_SAMPLE_COUNT_CTL 0x2324 +#define REG_RB_SAMPLE_POS 0x220a +#define REG_RB_STENCILREFMASK 0x210d +#define REG_RB_STENCILREFMASK_BF 0x210c +#define REG_RB_SURFACE_INFO 0x2000 +#define REG_SCRATCH_ADDR 0x01dd +#define REG_SCRATCH_REG0 0x0578 +#define REG_SCRATCH_REG2 0x057a +#define REG_SCRATCH_UMSK 0x01dc +#define REG_SQ_CF_BOOLEANS 0x4900 +#define REG_SQ_CF_LOOP 0x4908 +#define REG_SQ_CONSTANT_0 0x4000 +#define REG_SQ_CONTEXT_MISC 0x2181 +#define REG_SQ_DEBUG_CONST_MGR_FSM 0x0daf +#define REG_SQ_DEBUG_EXP_ALLOC 0x0db3 +#define REG_SQ_DEBUG_FSM_ALU_0 0x0db1 +#define REG_SQ_DEBUG_FSM_ALU_1 0x0db2 +#define REG_SQ_DEBUG_GPR_PIX 0x0db6 +#define REG_SQ_DEBUG_GPR_VTX 0x0db5 +#define REG_SQ_DEBUG_INPUT_FSM 0x0dae +#define REG_SQ_DEBUG_MISC_0 0x2309 +#define REG_SQ_DEBUG_MISC 0x0d05 +#define REG_SQ_DEBUG_MISC_1 0x230a +#define REG_SQ_DEBUG_PIX_TB_0 0x0dbc +#define REG_SQ_DEBUG_PIX_TB_STATE_MEM 0x0dc1 +#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_0 0x0dbd +#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_1 0x0dbe +#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_2 0x0dbf +#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_3 0x0dc0 +#define REG_SQ_DEBUG_PTR_BUFF 0x0db4 +#define REG_SQ_DEBUG_TB_STATUS_SEL 0x0db7 +#define REG_SQ_DEBUG_TP_FSM 0x0db0 +#define REG_SQ_DEBUG_VTX_TB_0 0x0db8 +#define REG_SQ_DEBUG_VTX_TB_1 0x0db9 +#define REG_SQ_DEBUG_VTX_TB_STATE_MEM 0x0dbb +#define REG_SQ_DEBUG_VTX_TB_STATUS_REG 0x0dba +#define REG_SQ_FETCH_0 0x4800 +#define REG_SQ_FLOW_CONTROL 0x0d01 +#define REG_SQ_GPR_MANAGEMENT 0x0d00 +#define REG_SQ_INST_STORE_MANAGMENT 0x0d02 +#define REG_SQ_INT_ACK 0x0d36 +#define REG_SQ_INT_CNTL 0x0d34 +#define REG_SQ_INTERPOLATOR_CNTL 0x2182 +#define REG_SQ_INT_STATUS 0x0d35 +#define REG_SQ_PROGRAM_CNTL 0x2180 +#define REG_SQ_PS_CONST 0x2308 +#define REG_SQ_PS_PROGRAM 0x21f6 +#define REG_SQ_VS_CONST 0x2307 +#define REG_SQ_VS_PROGRAM 0x21f7 +#define REG_SQ_WRAPPING_0 0x2183 +#define REG_SQ_WRAPPING_1 0x2184 +#define REG_TC_CNTL_STATUS 0x0e00 +#define REG_TP0_CHICKEN 0x0e1e +#define REG_VGT_CURRENT_BIN_ID_MAX 0x2203 +#define REG_VGT_CURRENT_BIN_ID_MIN 0x2207 +#define REG_VGT_ENHANCE 0x2294 +#define REG_VGT_INDX_OFFSET 0x2102 +#define REG_VGT_MAX_VTX_INDX 0x2100 +#define REG_VGT_MIN_VTX_INDX 0x2101 +#define REG_VGT_OUT_DEALLOC_CNTL 0x2317 +#define REG_VGT_VERTEX_REUSE_BLOCK_CNTL 0x2316 + +/* Added in a220: */ +#define REG_A220_RB_LRZ_VSC_CONTROL 0x2209 +#define REG_A220_GRAS_CONTROL 0x2210 +#define REG_A220_VSC_BIN_SIZE 0x0c01 +#define REG_A220_VSC_PIPE_DATA_LENGTH_7 0x0c1d +#define REG_VSC_PIPE_CONFIG_0 0x0c06 +#define REG_VSC_PIPE_DATA_ADDRESS_0 0x0c07 +#define REG_VSC_PIPE_DATA_LENGTH_0 0x0c08 +#define REG_VSC_PIPE_CONFIG_1 0x0c09 +#define REG_VSC_PIPE_DATA_ADDRESS_1 0x0c0a +#define REG_VSC_PIPE_DATA_LENGTH_1 0x0c0b +#define REG_VSC_PIPE_CONFIG_2 0x0c0c +#define REG_VSC_PIPE_DATA_ADDRESS_2 0x0c0d +#define REG_VSC_PIPE_DATA_LENGTH_2 0x0c0e +#define REG_VSC_PIPE_CONFIG_3 0x0c0f +#define REG_VSC_PIPE_DATA_ADDRESS_3 0x0c10 +#define REG_VSC_PIPE_DATA_LENGTH_3 0x0c11 +#define REG_VSC_PIPE_CONFIG_4 0x0c12 +#define REG_VSC_PIPE_DATA_ADDRESS_4 0x0c13 +#define REG_VSC_PIPE_DATA_LENGTH_4 0x0c14 +#define REG_VSC_PIPE_CONFIG_5 0x0c15 +#define REG_VSC_PIPE_DATA_ADDRESS_5 0x0c16 +#define REG_VSC_PIPE_DATA_LENGTH_5 0x0c17 +#define REG_VSC_PIPE_CONFIG_6 0x0c18 +#define REG_VSC_PIPE_DATA_ADDRESS_6 0x0c19 +#define REG_VSC_PIPE_DATA_LENGTH_6 0x0c1a +#define REG_VSC_PIPE_CONFIG_7 0x0c1b +#define REG_VSC_PIPE_DATA_ADDRESS_7 0x0c1c +#define REG_VSC_PIPE_DATA_LENGTH_7 0x0c1d + +/* Added in a225: */ +#define REG_A225_RB_COLOR_INFO3 0x2005 +#define REG_A225_PC_MULTI_PRIM_IB_RESET_INDX 0x2103 +#define REG_A225_GRAS_UCP0X 0x2340 +#define REG_A225_GRAS_UCP5W 0x2357 +#define REG_A225_GRAS_UCP_ENABLED 0x2360 + +/* not sure, maybe RB_CLEAR_COLOR? */ +#define REG_CLEAR_COLOR 0x220b + +/* unnamed registers: */ +#define REG_0c02 0x0c02 +#define REG_0c04 0x0c04 +#define REG_0c06 0x0c06 +#define REG_2010 0x2010 + + +/* + * Format for 2nd dword in CP_DRAW_INDX and friends: + */ + +/* see VGT_PRIMITIVE_TYPE.PRIM_TYPE? */ +enum pc_di_primtype { + DI_PT_NONE = 0, + DI_PT_POINTLIST = 1, + DI_PT_LINELIST = 2, + DI_PT_LINESTRIP = 3, + DI_PT_TRILIST = 4, + DI_PT_TRIFAN = 5, + DI_PT_TRISTRIP = 6, + DI_PT_RECTLIST = 8, + DI_PT_QUADLIST = 13, + DI_PT_QUADSTRIP = 14, + DI_PT_POLYGON = 15, + DI_PT_2D_COPY_RECT_LIST_V0 = 16, + DI_PT_2D_COPY_RECT_LIST_V1 = 17, + DI_PT_2D_COPY_RECT_LIST_V2 = 18, + DI_PT_2D_COPY_RECT_LIST_V3 = 19, + DI_PT_2D_FILL_RECT_LIST = 20, + DI_PT_2D_LINE_STRIP = 21, + DI_PT_2D_TRI_STRIP = 22, +}; + +/* see VGT:VGT_DRAW_INITIATOR.SOURCE_SELECT? */ +enum pc_di_src_sel { + DI_SRC_SEL_DMA = 0, + DI_SRC_SEL_IMMEDIATE = 1, + DI_SRC_SEL_AUTO_INDEX = 2, + DI_SRC_SEL_RESERVED = 3, +}; + +/* see VGT_DMA_INDEX_TYPE.INDEX_TYPE? */ +enum pc_di_index_size { + INDEX_SIZE_IGN = 0, + INDEX_SIZE_16_BIT = 0, + INDEX_SIZE_32_BIT = 1, + INDEX_SIZE_8_BIT = 2, + INDEX_SIZE_INVALID +}; + +enum pc_di_vis_cull_mode { + IGNORE_VISIBILITY = 0, +}; + +static inline uint32_t DRAW(enum pc_di_primtype prim_type, + enum pc_di_src_sel source_select, enum pc_di_index_size index_size, + enum pc_di_vis_cull_mode vis_cull_mode) +{ + return (prim_type << 0) | + (source_select << 6) | + ((index_size & 1) << 11) | + ((index_size >> 1) << 13) | + (vis_cull_mode << 9) | + (1 << 14); +} + + +/* + * Bits for VGT_CURRENT_BIN_ID_MIN/MAX: + */ + +#define VGT_CURRENT_BIN_ID_MIN_COLUMN(val) (((val) & 0x7) << 0) +#define VGT_CURRENT_BIN_ID_MIN_ROW(val) (((val) & 0x7) << 3) +#define VGT_CURRENT_BIN_ID_MIN_GUARD_BAND(val) (((val) & 0x7) << 6) + + +/* + * Bits for PA_CL_VTE_CNTL: + */ + +#define PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA 0x00000001 +#define PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA 0x00000002 +#define PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA 0x00000004 +#define PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA 0x00000008 +#define PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA 0x00000010 +#define PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA 0x00000020 +#define PA_CL_VTE_CNTL_VTX_XY_FMT 0x00000100 +#define PA_CL_VTE_CNTL_VTX_Z_FMT 0x00000200 +#define PA_CL_VTE_CNTL_VTX_W0_FMT 0x00000400 +#define PA_CL_VTE_CNTL_PERFCOUNTER_REF 0x00000800 + + +/* + * Bits for PA_CL_CLIP_CNTL: + */ + +#define PA_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000 +#define PA_CL_CLIP_CNTL_BOUNDARY_EDGE_FLAG_ENA 0x00040000 +enum dx_clip_space { + DXCLIP_OPENGL = 0, + DXCLIP_DIRECTX = 1, +}; +static inline uint32_t PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF(enum dx_clip_space val) +{ + return val << 19; +} +#define PA_CL_CLIP_CNTL_DIS_CLIP_ERR_DETECT 0x00100000 +#define PA_CL_CLIP_CNTL_VTX_KILL_OR 0x00200000 +#define PA_CL_CLIP_CNTL_XY_NAN_RETAIN 0x00400000 +#define PA_CL_CLIP_CNTL_Z_NAN_RETAIN 0x00800000 +#define PA_CL_CLIP_CNTL_W_NAN_RETAIN 0x01000000 + + +/* + * Bits for PA_SU_SC_MODE_CNTL: + */ + +#define PA_SU_SC_MODE_CNTL_CULL_FRONT 0x00000001 +#define PA_SU_SC_MODE_CNTL_CULL_BACK 0x00000002 +#define PA_SU_SC_MODE_CNTL_FACE 0x00000004 +enum pa_su_sc_polymode { + POLY_DISABLED = 0, + POLY_DUALMODE = 1, +}; +static inline uint32_t PA_SU_SC_MODE_CNTL_POLYMODE(enum pa_su_sc_polymode val) +{ + return val << 3; +} +enum pa_su_sc_draw { + DRAW_POINTS = 0, + DRAW_LINES = 1, + DRAW_TRIANGLES = 2, +}; +static inline uint32_t PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(enum pa_su_sc_draw val) +{ + return val << 5; +} +static inline uint32_t PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(enum pa_su_sc_draw val) +{ + return val << 8; +} +#define PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE 0x00000800 +#define PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE 0x00001000 +#define PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE 0x00002000 +#define PA_SU_SC_MODE_CNTL_MSAA_ENABLE 0x00008000 +#define PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE 0x00010000 +#define PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE 0x00040000 +#define PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST 0x00080000 +#define PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS 0x00100000 +#define PA_SU_SC_MODE_CNTL_MULTI_PRIM_IB_ENA 0x00200000 +#define PA_SU_SC_MODE_CNTL_QUAD_ORDER_ENABLE 0x00800000 +#define PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_ALL_TRI 0x02000000 +#define PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_FIRST_TRI_NEW_STATE 0x04000000 +#define PA_SU_SC_MODE_CNTL_CLAMPED_FACENESS 0x10000000 +#define PA_SU_SC_MODE_CNTL_ZERO_AREA_FACENESS 0x20000000 +#define PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE 0x40000000 +#define PA_SU_SC_MODE_CNTL_FACE_WRITE_ENABLE 0x80000000 + + +/* + * Bits for PA_SC_LINE_STIPPLE: + */ + +#define PA_SC_LINE_STIPPLE_LINE_PATTERN(val) ((val) & 0x0000ffff) +#define PA_SC_LINE_STIPPLE_REPEAT_COUNT(val) (((val) << 16) & 0x00ff0000) +enum pa_sc_pattern_bit_order { + PATTERN_BIT_ORDER_LITTLE = 0, + PATTERN_BIT_ORDER_BIG = 1, +}; +static inline uint32_t PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER(enum pa_sc_pattern_bit_order val) +{ + return val << 28; +} +enum pa_sc_auto_reset_cntl { + AUTO_RESET_NEVER = 0, + AUTO_RESET_EACH_PRIMITIVE = 1, + AUTO_RESET_EACH_PACKET = 2, +}; +static inline uint32_t PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum pa_sc_auto_reset_cntl val) +{ + return val << 29; +} + + +/* + * Bits for PA_SC_LINE_CNTL: + */ + +#define PA_SC_LINE_CNTL_BRES_CNTL_MASK(val) ((val) & 0x000000ff) +#define PA_SC_LINE_CNTL_USE_BRES_CNTL 0x00000100 +#define PA_SC_LINE_CNTL_EXPAND_LINE_WIDTH 0x00000200 +#define PA_SC_LINE_CNTL_LAST_PIXEL 0x00000400 + + +/* + * Bits for PA_SU_VTX_CNTL: + */ + +enum pa_pixcenter { + PIXCENTER_D3D = 0, + PIXCENTER_OGL = 1, +}; +static inline uint32_t PA_SU_VTX_CNTL_PIX_CENTER(enum pa_pixcenter val) +{ + return val; +} + +enum pa_roundmode { + TRUNCATE = 0, + ROUND = 1, + ROUNDTOEVEN = 2, + ROUNDTOODD = 3, +}; +static inline uint32_t PA_SU_VTX_CNTL_ROUND_MODE_MASK(enum pa_roundmode val) +{ + return val << 1; +} + +enum pa_quantmode { + ONE_SIXTEENTH = 0, + ONE_EIGHTH = 1, + ONE_QUARTER = 2, + ONE_HALF = 3, + ONE = 4, +}; +static inline uint32_t PA_SU_VTX_CNTL_QUANT_MODE(enum pa_quantmode val) +{ + return val << 3; +} + + +/* + * Bits for PA_SU_POINT_SIZE: + */ + +#define PA_SU_POINT_SIZE_HEIGHT(val) (f2d12_4(val) & 0xffff) +#define PA_SU_POINT_SIZE_WIDTH(val) ((f2d12_4(val) << 16) & 0xffff) + + +/* + * Bits for PA_SU_POINT_MINMAX: + */ + +#define PA_SU_POINT_MINMAX_MIN_SIZE(val) (f2d12_4(val) & 0xffff) +#define PA_SU_POINT_MINMAX_MAX_SIZE(val) ((f2d12_4(val) << 16) & 0xffff) + + +/* + * Bits for PA_SU_LINE_CNTL: + */ + +#define PA_SU_LINE_CNTL_WIDTH(val) (f2d12_4(val) & 0xffff) + + +/* + * Bits for PA_SC_WINDOW_OFFSET: + * (seems to be same as r600) + */ +#define PA_SC_WINDOW_OFFSET_X(val) ((val) & 0x7fff) +#define PA_SC_WINDOW_OFFSET_Y(val) (((val) & 0x7fff) << 16) + +#define PA_SC_WINDOW_OFFSET_DISABLE 0x80000000 + + +/* + * Bits for SQ_CONTEXT_MISC: + */ + +#define SQ_CONTEXT_MISC_INST_PRED_OPTIMIZE 0x00000001 +#define SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY 0x00000002 +enum sq_sample_cntl { + CENTROIDS_ONLY = 0, + CENTERS_ONLY = 1, + CENTROIDS_AND_CENTERS = 2, +}; +static inline uint32_t SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(enum sq_sample_cntl val) +{ + return (val & 0x3) << 2; +} +#define SQ_CONTEXT_MISC_PARAM_GEN_POS(val) (((val) & 0xff) << 8) +#define SQ_CONTEXT_MISC_PERFCOUNTER_REF 0x00010000 +#define SQ_CONTEXT_MISC_YEILD_OPTIMIZE 0x00020000 +#define SQ_CONTEXT_MISC_TX_CACHE_SEL 0x00040000 + + +/* + * Bits for SQ_PROGRAM_CNTL: + */ +/* note: only 0x3f worth of valid register values, but high bit is + * set to indicate '0 registers used': + */ +#define SQ_PROGRAM_CNTL_VS_REGS(val) ((val) & 0xff) +#define SQ_PROGRAM_CNTL_PS_REGS(val) (((val) & 0xff) << 8) +#define SQ_PROGRAM_CNTL_VS_RESOURCE 0x00010000 +#define SQ_PROGRAM_CNTL_PS_RESOURCE 0x00020000 +#define SQ_PROGRAM_CNTL_PARAM_GEN 0x00040000 +#define SQ_PROGRAM_CNTL_GEN_INDEX_PIX 0x00080000 +#define SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(val) (((val) & 0xf) << 20) +#define SQ_PROGRAM_CNTL_VS_EXPORT_MODE(val) (((val) & 0x7) << 24) +enum sq_ps_vtx_mode { + POSITION_1_VECTOR = 0, + POSITION_2_VECTORS_UNUSED = 1, + POSITION_2_VECTORS_SPRITE = 2, + POSITION_2_VECTORS_EDGE = 3, + POSITION_2_VECTORS_KILL = 4, + POSITION_2_VECTORS_SPRITE_KILL = 5, + POSITION_2_VECTORS_EDGE_KILL = 6, + MULTIPASS = 7, +}; +static inline uint32_t SQ_PROGRAM_CNTL_PS_EXPORT_MODE(enum sq_ps_vtx_mode val) +{ + return val << 27; +} +#define SQ_PROGRAM_CNTL_GEN_INDEX_VTX 0x80000000 + + +/* + * Bits for SQ_VS_CONST + */ + +#define SQ_VS_CONST_BASE(val) ((val) & 0x1ff) +#define SQ_VS_CONST_SIZE(val) (((val) & 0x1ff) << 12) + + +/* + * Bits for SQ_PS_CONST + */ + +#define SQ_PS_CONST_BASE(val) ((val) & 0x1ff) +#define SQ_PS_CONST_SIZE(val) (((val) & 0x1ff) << 12) + + +/* + * Bits for tex sampler: + */ + +/* dword0 */ +enum sq_tex_clamp { + SQ_TEX_WRAP = 0, /* GL_REPEAT */ + SQ_TEX_MIRROR = 1, /* GL_MIRRORED_REPEAT */ + SQ_TEX_CLAMP_LAST_TEXEL = 2, /* GL_CLAMP_TO_EDGE */ + /* TODO confirm these: */ + SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 3, + SQ_TEX_CLAMP_HALF_BORDER = 4, + SQ_TEX_MIRROR_ONCE_HALF_BORDER = 5, + SQ_TEX_CLAMP_BORDER = 6, + SQ_TEX_MIRROR_ONCE_BORDER = 7, +}; +static inline uint32_t SQ_TEX0_CLAMP_X(enum sq_tex_clamp val) +{ + return (val & 0x7) << 10; +} +static inline uint32_t SQ_TEX0_CLAMP_Y(enum sq_tex_clamp val) +{ + return (val & 0x7) << 13; +} +static inline uint32_t SQ_TEX0_CLAMP_Z(enum sq_tex_clamp val) +{ + return (val & 0x7) << 16; +} +#define SQ_TEX0_PITCH(val) (((val) >> 5) << 22) + +/* dword2 */ +#define SQ_TEX2_HEIGHT(val) (((val) - 1) << 13) +#define SQ_TEX2_WIDTH(val) ((val) - 1) + +/* dword3 */ +enum sq_tex_swiz { + SQ_TEX_X = 0, + SQ_TEX_Y = 1, + SQ_TEX_Z = 2, + SQ_TEX_W = 3, + SQ_TEX_ZERO = 4, + SQ_TEX_ONE = 5, +}; +static inline uint32_t SQ_TEX3_SWIZ_X(enum sq_tex_swiz val) +{ + return (val & 0x7) << 1; +} +static inline uint32_t SQ_TEX3_SWIZ_Y(enum sq_tex_swiz val) +{ + return (val & 0x7) << 4; +} +static inline uint32_t SQ_TEX3_SWIZ_Z(enum sq_tex_swiz val) +{ + return (val & 0x7) << 7; +} +static inline uint32_t SQ_TEX3_SWIZ_W(enum sq_tex_swiz val) +{ + return (val & 0x7) << 10; +} + +enum sq_tex_filter { + SQ_TEX_FILTER_POINT = 0, + SQ_TEX_FILTER_BILINEAR = 1, + SQ_TEX_FILTER_BICUBIC = 2, /* presumed */ +}; +static inline uint32_t SQ_TEX3_XY_MAG_FILTER(enum sq_tex_filter val) +{ + return (val & 0x3) << 19; +} +static inline uint32_t SQ_TEX3_XY_MIN_FILTER(enum sq_tex_filter val) +{ + return (val & 0x3) << 21; +} + + +/* + * Bits for RB_BLEND_CONTROL: + */ + +enum rb_blend_op { + RB_BLEND_ZERO = 0, + RB_BLEND_ONE = 1, + RB_BLEND_SRC_COLOR = 4, + RB_BLEND_ONE_MINUS_SRC_COLOR = 5, + RB_BLEND_SRC_ALPHA = 6, + RB_BLEND_ONE_MINUS_SRC_ALPHA = 7, + RB_BLEND_DST_COLOR = 8, + RB_BLEND_ONE_MINUS_DST_COLOR = 9, + RB_BLEND_DST_ALPHA = 10, + RB_BLEND_ONE_MINUS_DST_ALPHA = 11, + RB_BLEND_CONSTANT_COLOR = 12, + RB_BLEND_ONE_MINUS_CONSTANT_COLOR = 13, + RB_BLEND_CONSTANT_ALPHA = 14, + RB_BLEND_ONE_MINUS_CONSTANT_ALPHA = 15, + RB_BLEND_SRC_ALPHA_SATURATE = 16, +}; + +enum rb_comb_func { + COMB_DST_PLUS_SRC = 0, + COMB_SRC_MINUS_DST = 1, + COMB_MIN_DST_SRC = 2, + COMB_MAX_DST_SRC = 3, + COMB_DST_MINUS_SRC = 4, + COMB_DST_PLUS_SRC_BIAS = 5, +}; + +#define RB_BLENDCONTROL_COLOR_SRCBLEND_MASK 0x0000001f +static inline uint32_t RB_BLENDCONTROL_COLOR_SRCBLEND(enum rb_blend_op val) +{ + return val & RB_BLENDCONTROL_COLOR_SRCBLEND_MASK; +} +#define RB_BLENDCONTROL_COLOR_COMB_FCN_MASK 0x000000e0 +static inline uint32_t RB_BLENDCONTROL_COLOR_COMB_FCN(enum rb_comb_func val) +{ + return (val << 5) & RB_BLENDCONTROL_COLOR_COMB_FCN_MASK; +} +#define RB_BLENDCONTROL_COLOR_DESTBLEND_MASK 0x00001f00 +static inline uint32_t RB_BLENDCONTROL_COLOR_DESTBLEND(enum rb_blend_op val) +{ + return (val << 8) & RB_BLENDCONTROL_COLOR_DESTBLEND_MASK; +} +#define RB_BLENDCONTROL_ALPHA_SRCBLEND_MASK 0x001f0000 +static inline uint32_t RB_BLENDCONTROL_ALPHA_SRCBLEND(enum rb_blend_op val) +{ + return (val << 16) & RB_BLENDCONTROL_ALPHA_SRCBLEND_MASK; +} +#define RB_BLENDCONTROL_ALPHA_COMB_FCN_MASK 0x00e00000 +static inline uint32_t RB_BLENDCONTROL_ALPHA_COMB_FCN(enum rb_comb_func val) +{ + return (val << 21) & RB_BLENDCONTROL_ALPHA_COMB_FCN_MASK; +} +#define RB_BLENDCONTROL_ALPHA_DESTBLEND_MASK 0x1f000000 +static inline uint32_t RB_BLENDCONTROL_ALPHA_DESTBLEND(enum rb_blend_op val) +{ + return (val << 24) & RB_BLENDCONTROL_ALPHA_DESTBLEND_MASK; +} +#define RB_BLENDCONTROL_BLEND_FORCE_ENABLE 0x20000000 +#define RB_BLENDCONTROL_BLEND_FORCE 0x40000000 + + +/* + * Bits for RB_COLOR_MASK: + */ +#define RB_COLOR_MASK_WRITE_RED 0x00000001 +#define RB_COLOR_MASK_WRITE_GREEN 0x00000002 +#define RB_COLOR_MASK_WRITE_BLUE 0x00000004 +#define RB_COLOR_MASK_WRITE_ALPHA 0x00000008 + + +/* + * Bits for RB_COLOR_INFO: + */ + +#define RB_COLOR_INFO_COLOR_FORMAT_MASK 0x0000000f +static inline uint32_t RB_COLOR_INFO_COLOR_FORMAT(enum rb_colorformatx val) +{ + return val & RB_COLOR_INFO_COLOR_FORMAT_MASK; +} + +#define RB_COLOR_INFO_COLOR_ROUND_MODE(val) (((val) & 0x3) << 4) +#define RB_COLOR_INFO_COLOR_LINEAR 0x00000040 +#define RB_COLOR_INFO_COLOR_ENDIAN(val) (((val) & 0x3) << 7) +#define RB_COLOR_INFO_COLOR_SWAP(val) (((val) & 0x3) << 9) +#define RB_COLOR_INFO_COLOR_BASE(val) (((val) & 0xfffff) << 12) + + +/* + * Bits for RB_MODECONTROL: + */ + +enum rb_edram_mode { + EDRAM_NOP = 0, + COLOR_DEPTH = 4, + DEPTH_ONLY = 5, + EDRAM_COPY = 6, +}; +static inline uint32_t RB_MODECONTROL_EDRAM_MODE(enum rb_edram_mode val) +{ + return val & 0x7; +} + + +/* + * Bits for RB_DEPTHCONTROL: + */ + +#define RB_DEPTHCONTROL_STENCIL_ENABLE 0x00000001 +#define RB_DEPTHCONTROL_Z_ENABLE 0x00000002 +#define RB_DEPTHCONTROL_Z_WRITE_ENABLE 0x00000004 +#define RB_DEPTHCONTROL_EARLY_Z_ENABLE 0x00000008 +#define RB_DEPTHCONTROL_ZFUNC_MASK 0x00000070 +#define RB_DEPTHCONTROL_ZFUNC(depth_func) \ + (((depth_func) << 4) & RB_DEPTHCONTROL_ZFUNC_MASK) +#define RB_DEPTHCONTROL_BACKFACE_ENABLE 0x00000080 +#define RB_DEPTHCONTROL_STENCILFUNC_MASK 0x00000700 +#define RB_DEPTHCONTROL_STENCILFUNC(depth_func) \ + (((depth_func) << 8) & RB_DEPTHCONTROL_STENCILFUNC_MASK) +enum rb_stencil_op { + STENCIL_KEEP = 0, + STENCIL_ZERO = 1, + STENCIL_REPLACE = 2, + STENCIL_INCR_CLAMP = 3, + STENCIL_DECR_CLAMP = 4, + STENCIL_INVERT = 5, + STENCIL_INCR_WRAP = 6, + STENCIL_DECR_WRAP = 7 +}; +#define RB_DEPTHCONTROL_STENCILFAIL_MASK 0x00003800 +static inline uint32_t RB_DEPTHCONTROL_STENCILFAIL(enum rb_stencil_op val) +{ + return (val << 11) & RB_DEPTHCONTROL_STENCILFAIL_MASK; +} +#define RB_DEPTHCONTROL_STENCILZPASS_MASK 0x0001c000 +static inline uint32_t RB_DEPTHCONTROL_STENCILZPASS(enum rb_stencil_op val) +{ + return (val << 14) & RB_DEPTHCONTROL_STENCILZPASS_MASK; +} +#define RB_DEPTHCONTROL_STENCILZFAIL_MASK 0x000e0000 +static inline uint32_t RB_DEPTHCONTROL_STENCILZFAIL(enum rb_stencil_op val) +{ + return (val << 17) & RB_DEPTHCONTROL_STENCILZFAIL_MASK; +} +#define RB_DEPTHCONTROL_STENCILFUNC_BF_MASK 0x00700000 +#define RB_DEPTHCONTROL_STENCILFUNC_BF(depth_func) \ + (((depth_func) << 20) & RB_DEPTHCONTROL_STENCILFUNC_BF_MASK) +#define RB_DEPTHCONTROL_STENCILFAIL_BF_MASK 0x03800000 +static inline uint32_t RB_DEPTHCONTROL_STENCILFAIL_BF(enum rb_stencil_op val) +{ + return (val << 23) & RB_DEPTHCONTROL_STENCILFAIL_BF_MASK; +} +#define RB_DEPTHCONTROL_STENCILZPASS_BF_MASK 0x1c000000 +static inline uint32_t RB_DEPTHCONTROL_STENCILZPASS_BF(enum rb_stencil_op val) +{ + return (val << 26) & RB_DEPTHCONTROL_STENCILZPASS_BF_MASK; +} +#define RB_DEPTHCONTROL_STENCILZFAIL_BF_MASK 0xe0000000 +static inline uint32_t RB_DEPTHCONTROL_STENCILZFAIL_BF(enum rb_stencil_op val) +{ + return (val << 29) & RB_DEPTHCONTROL_STENCILZFAIL_BF_MASK; +} + + +/* + * Bits for RB_COPY_DEST_INFO: + */ + +enum rb_surface_endian { + ENDIAN_NONE = 0, + ENDIAN_8IN16 = 1, + ENDIAN_8IN32 = 2, + ENDIAN_16IN32 = 3, + ENDIAN_8IN64 = 4, + ENDIAN_8IN128 = 5, +}; +static inline uint32_t RB_COPY_DEST_INFO_DEST_ENDIAN(enum rb_surface_endian val) +{ + return (val & 0x7) << 0; +} +#define RB_COPY_DEST_INFO_LINEAR 0x00000008 +static inline uint32_t RB_COPY_DEST_INFO_FORMAT(enum rb_colorformatx val) +{ + return val << 4; +} +#define RB_COPY_DEST_INFO_SWAP(val) (((val) & 0x3) << 8) /* maybe VGT_DMA_SWAP_MODE? */ +enum rb_dither_mode { + DITHER_DISABLE = 0, + DITHER_ALWAYS = 1, + DITHER_IF_ALPHA_OFF = 2, +}; +static inline uint32_t RB_COPY_DEST_INFO_DITHER_MODE(enum rb_dither_mode val) +{ + return val << 10; +} +enum rb_dither_type { + DITHER_PIXEL = 0, + DITHER_SUBPIXEL = 1, +}; +static inline uint32_t RB_COPY_DEST_INFO_DITHER_TYPE(enum rb_dither_type val) +{ + return val << 12; +} +#define RB_COPY_DEST_INFO_WRITE_RED 0x00004000 +#define RB_COPY_DEST_INFO_WRITE_GREEN 0x00008000 +#define RB_COPY_DEST_INFO_WRITE_BLUE 0x00010000 +#define RB_COPY_DEST_INFO_WRITE_ALPHA 0x00020000 + + +/* + * Bits for RB_COPY_DEST_OFFSET: + */ + +#define RB_COPY_DEST_OFFSET_X(val) ((val) & 0x3fff) +#define RB_COPY_DEST_OFFSET_Y(val) (((val) & 0x3fff) << 13) + + +/* + * Bits for RB_COPY_CONTROL: + */ + +#define RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE 0x00000008L +#define RB_COPY_CONTROL_CLEAR_MASK(val) ((val & 0xf) << 4) + + +/* + * Bits for RB_COLORCONTROL: + */ + +#define RB_COLORCONTROL_ALPHA_FUNC(val) ((val) & 0x7) +#define RB_COLORCONTROL_ALPHA_TEST_ENABLE 0x00000008 +#define RB_COLORCONTROL_ALPHA_TO_MASK_ENABLE 0x00000010 +#define RB_COLORCONTROL_BLEND_DISABLE 0x00000020 +#define RB_COLORCONTROL_FOG_ENABLE 0x00000040 +#define RB_COLORCONTROL_VS_EXPORTS_FOG 0x00000080 +#define RB_COLORCONTROL_ROP_CODE(val) (((val) & 0xf) << 8) +static inline uint32_t RB_COLORCONTROL_DITHER_MODE(enum rb_dither_mode val) +{ + return (val & 0x3) << 12; +} +static inline uint32_t RB_COLORCONTROL_DITHER_TYPE(enum rb_dither_type val) +{ + return (val & 0x3) << 14; +} +#define RB_COLORCONTROL_PIXEL_FOG 0x00010000 +#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0(val) (((val) & 0x3) << 24) +#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1(val) (((val) & 0x3) << 26) +#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2(val) (((val) & 0x3) << 28) +#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3(val) (((val) & 0x3) << 30) + + +/* + * Bits for RB_DEPTH_INFO: + */ + +enum rb_depth_format { + DEPTHX_16 = 0, + DEPTHX_24_8 = 1, + DEPTHX_INVALID, +}; + +static inline uint32_t RB_DEPTH_INFO_DEPTH_FORMAT(enum rb_depth_format val) +{ + return val & 0x1; +} +#define RB_DEPTH_INFO_DEPTH_BASE(val) ((val) << 12) + + +/* + * Bits for RB_STENCILREFMASK (RB_STENCILREFMASK_BF is same): + */ + +#define RB_STENCILREFMASK_STENCILREF_MASK 0x000000ff +#define RB_STENCILREFMASK_STENCILREF(val) ((val) & RB_STENCILREFMASK_STENCILREF_MASK) +#define RB_STENCILREFMASK_STENCILMASK_MASK 0x0000ff00 +#define RB_STENCILREFMASK_STENCILMASK(val) (((val) << 8) & RB_STENCILREFMASK_STENCILMASK_MASK) +#define RB_STENCILREFMASK_STENCILWRITEMASK_MASK 0x00ff0000 +#define RB_STENCILREFMASK_STENCILWRITEMASK(val) (((val) << 16) & RB_STENCILREFMASK_STENCILWRITEMASK_MASK) + + +/* + * Bits for RB_BC_CONTROL: + */ + +#define RB_BC_CONTROL_ACCUM_LINEAR_MODE_ENABLE 0x00000001 +#define RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(val) (((val) & 0x3) << 1) +#define RB_BC_CONTROL_DISABLE_EDRAM_CAM 0x00000008 +#define RB_BC_CONTROL_DISABLE_EZ_FAST_CONTEXT_SWITCH 0x00000010 +#define RB_BC_CONTROL_DISABLE_EZ_NULL_ZCMD_DROP 0x00000020 +#define RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP 0x00000040 +#define RB_BC_CONTROL_ENABLE_AZ_THROTTLE 0x00000080 +#define RB_BC_CONTROL_AZ_THROTTLE_COUNT(val) (((val) & 0x1f) << 8) +#define RB_BC_CONTROL_ENABLE_CRC_UPDATE 0x00004000 +#define RB_BC_CONTROL_CRC_MODE 0x00008000 +#define RB_BC_CONTROL_DISABLE_SAMPLE_COUNTERS 0x00010000 +#define RB_BC_CONTROL_DISABLE_ACCUM 0x00020000 +#define RB_BC_CONTROL_ACCUM_ALLOC_MASK(val) (((val) & 0xf) << 18) +#define RB_BC_CONTROL_LINEAR_PERFORMANCE_ENABLE 0x00400000 +#define RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(val) (((val) & 0xf) << 23) +#define RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(val) (((val) & 0x3) << 27) +#define RB_BC_CONTROL_MEM_EXPORT_LINEAR_MODE_ENABLE 0x20000000 +#define RB_BC_CONTROL_CRC_SYSTEM 0x40000000 +#define RB_BC_CONTROL_RESERVED6 0x80000000 + + +/* + * Bits for RBBM_PM_OVERRIDE1: + */ + +#define RBBM_PM_OVERRIDE1_RBBM_AHBCLK_PM_OVERRIDE 0x00000001 +#define RBBM_PM_OVERRIDE1_SC_REG_SCLK_PM_OVERRIDE 0x00000002 +#define RBBM_PM_OVERRIDE1_SC_SCLK_PM_OVERRIDE 0x00000004 +#define RBBM_PM_OVERRIDE1_SP_TOP_SCLK_PM_OVERRIDE 0x00000008 +#define RBBM_PM_OVERRIDE1_SP_V0_SCLK_PM_OVERRIDE 0x00000010 +#define RBBM_PM_OVERRIDE1_SQ_REG_SCLK_PM_OVERRIDE 0x00000020 +#define RBBM_PM_OVERRIDE1_SQ_REG_FIFOS_SCLK_PM_OVERRIDE 0x00000040 +#define RBBM_PM_OVERRIDE1_SQ_CONST_MEM_SCLK_PM_OVERRIDE 0x00000080 +#define RBBM_PM_OVERRIDE1_SQ_SQ_SCLK_PM_OVERRIDE 0x00000100 +#define RBBM_PM_OVERRIDE1_SX_SCLK_PM_OVERRIDE 0x00000200 +#define RBBM_PM_OVERRIDE1_SX_REG_SCLK_PM_OVERRIDE 0x00000400 +#define RBBM_PM_OVERRIDE1_TCM_TCO_SCLK_PM_OVERRIDE 0x00000800 +#define RBBM_PM_OVERRIDE1_TCM_TCM_SCLK_PM_OVERRIDE 0x00001000 +#define RBBM_PM_OVERRIDE1_TCM_TCD_SCLK_PM_OVERRIDE 0x00002000 +#define RBBM_PM_OVERRIDE1_TCM_REG_SCLK_PM_OVERRIDE 0x00004000 +#define RBBM_PM_OVERRIDE1_TPC_TPC_SCLK_PM_OVERRIDE 0x00008000 +#define RBBM_PM_OVERRIDE1_TPC_REG_SCLK_PM_OVERRIDE 0x00010000 +#define RBBM_PM_OVERRIDE1_TCF_TCA_SCLK_PM_OVERRIDE 0x00020000 +#define RBBM_PM_OVERRIDE1_TCF_TCB_SCLK_PM_OVERRIDE 0x00040000 +#define RBBM_PM_OVERRIDE1_TCF_TCB_READ_SCLK_PM_OVERRIDE 0x00080000 +#define RBBM_PM_OVERRIDE1_TP_TP_SCLK_PM_OVERRIDE 0x00100000 +#define RBBM_PM_OVERRIDE1_TP_REG_SCLK_PM_OVERRIDE 0x00200000 +#define RBBM_PM_OVERRIDE1_CP_G_SCLK_PM_OVERRIDE 0x00400000 +#define RBBM_PM_OVERRIDE1_CP_REG_SCLK_PM_OVERRIDE 0x00800000 +#define RBBM_PM_OVERRIDE1_CP_G_REG_SCLK_PM_OVERRIDE 0x01000000 +#define RBBM_PM_OVERRIDE1_SPI_SCLK_PM_OVERRIDE 0x02000000 +#define RBBM_PM_OVERRIDE1_RB_REG_SCLK_PM_OVERRIDE 0x04000000 +#define RBBM_PM_OVERRIDE1_RB_SCLK_PM_OVERRIDE 0x08000000 +#define RBBM_PM_OVERRIDE1_MH_MH_SCLK_PM_OVERRIDE 0x10000000 +#define RBBM_PM_OVERRIDE1_MH_REG_SCLK_PM_OVERRIDE 0x20000000 +#define RBBM_PM_OVERRIDE1_MH_MMU_SCLK_PM_OVERRIDE 0x40000000 +#define RBBM_PM_OVERRIDE1_MH_TCROQ_SCLK_PM_OVERRIDE 0x80000000 + + +/* + * Bits for RBBM_PM_OVERRIDE2: + */ + +#define RBBM_PM_OVERRIDE2_PA_REG_SCLK_PM_OVERRIDE 0x00000001 +#define RBBM_PM_OVERRIDE2_PA_PA_SCLK_PM_OVERRIDE 0x00000002 +#define RBBM_PM_OVERRIDE2_PA_AG_SCLK_PM_OVERRIDE 0x00000004 +#define RBBM_PM_OVERRIDE2_VGT_REG_SCLK_PM_OVERRIDE 0x00000008 +#define RBBM_PM_OVERRIDE2_VGT_FIFOS_SCLK_PM_OVERRIDE 0x00000010 +#define RBBM_PM_OVERRIDE2_VGT_VGT_SCLK_PM_OVERRIDE 0x00000020 +#define RBBM_PM_OVERRIDE2_DEBUG_PERF_SCLK_PM_OVERRIDE 0x00000040 +#define RBBM_PM_OVERRIDE2_PERM_SCLK_PM_OVERRIDE 0x00000080 +#define RBBM_PM_OVERRIDE2_GC_GA_GMEM0_PM_OVERRIDE 0x00000100 +#define RBBM_PM_OVERRIDE2_GC_GA_GMEM1_PM_OVERRIDE 0x00000200 +#define RBBM_PM_OVERRIDE2_GC_GA_GMEM2_PM_OVERRIDE 0x00000400 +#define RBBM_PM_OVERRIDE2_GC_GA_GMEM3_PM_OVERRIDE 0x00000800 + + +/* + * Bits for TC_CNTL_STATUS: + */ + +#define TC_CNTL_STATUS_L2_INVALIDATE 0x00000001 + + +#endif /* FREEDRENO_A2XX_REG_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_blend.c b/src/gallium/drivers/freedreno/freedreno_blend.c new file mode 100644 index 00000000000..c965a736706 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_blend.c @@ -0,0 +1,175 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "freedreno_blend.h" +#include "freedreno_context.h" +#include "freedreno_util.h" + +static enum rb_blend_op +blend_factor(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + return RB_BLEND_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return RB_BLEND_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return RB_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return RB_BLEND_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return RB_BLEND_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return RB_BLEND_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return RB_BLEND_CONSTANT_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return RB_BLEND_CONSTANT_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + case 0: + return RB_BLEND_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return RB_BLEND_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return RB_BLEND_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return RB_BLEND_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return RB_BLEND_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return RB_BLEND_ONE_MINUS_CONSTANT_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return RB_BLEND_ONE_MINUS_CONSTANT_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* I don't think these are supported */ + default: + DBG("invalid blend factor: %x", factor); + return 0; + } +} + +static enum rb_comb_func +blend_func(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return COMB_DST_PLUS_SRC; + case PIPE_BLEND_MIN: + return COMB_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return COMB_MAX_DST_SRC; + case PIPE_BLEND_SUBTRACT: + return COMB_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return COMB_DST_MINUS_SRC; + default: + DBG("invalid blend func: %x", func); + return 0; + } +} + +static void * +fd_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + const struct pipe_rt_blend_state *rt = &cso->rt[0]; + struct fd_blend_stateobj *so; + + if (cso->logicop_enable) { + DBG("Unsupported! logicop"); + return NULL; + } + + if (cso->independent_blend_enable) { + DBG("Unsupported! independent blend state"); + return NULL; + } + + so = CALLOC_STRUCT(fd_blend_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + so->rb_colorcontrol = RB_COLORCONTROL_ROP_CODE(12); + + so->rb_blendcontrol = + RB_BLENDCONTROL_COLOR_SRCBLEND(blend_factor(rt->rgb_src_factor)) | + RB_BLENDCONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) | + RB_BLENDCONTROL_COLOR_DESTBLEND(blend_factor(rt->rgb_dst_factor)) | + RB_BLENDCONTROL_ALPHA_SRCBLEND(blend_factor(rt->alpha_src_factor)) | + RB_BLENDCONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) | + RB_BLENDCONTROL_ALPHA_DESTBLEND(blend_factor(rt->alpha_dst_factor)); + + if (rt->colormask & PIPE_MASK_R) + so->rb_colormask |= RB_COLOR_MASK_WRITE_RED; + if (rt->colormask & PIPE_MASK_G) + so->rb_colormask |= RB_COLOR_MASK_WRITE_GREEN; + if (rt->colormask & PIPE_MASK_B) + so->rb_colormask |= RB_COLOR_MASK_WRITE_BLUE; + if (rt->colormask & PIPE_MASK_A) + so->rb_colormask |= RB_COLOR_MASK_WRITE_ALPHA; + + if (!rt->blend_enable) + so->rb_colorcontrol |= RB_COLORCONTROL_BLEND_DISABLE; + + if (cso->dither) + so->rb_colorcontrol |= RB_COLORCONTROL_DITHER_MODE(DITHER_ALWAYS); + + return so; +} + +static void +fd_blend_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->blend = hwcso; + ctx->dirty |= FD_DIRTY_BLEND; +} + +static void +fd_blend_state_delete(struct pipe_context *pctx, void *hwcso) +{ + FREE(hwcso); +} + +void +fd_blend_init(struct pipe_context *pctx) +{ + pctx->create_blend_state = fd_blend_state_create; + pctx->bind_blend_state = fd_blend_state_bind; + pctx->delete_blend_state = fd_blend_state_delete; +} + diff --git a/src/gallium/drivers/freedreno/freedreno_blend.h b/src/gallium/drivers/freedreno/freedreno_blend.h new file mode 100644 index 00000000000..70950dfa911 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_blend.h @@ -0,0 +1,44 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_BLEND_H_ +#define FREEDRENO_BLEND_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +struct fd_blend_stateobj { + struct pipe_blend_state base; + uint32_t rb_blendcontrol; + uint32_t rb_colorcontrol; /* must be OR'd w/ zsa->rb_colorcontrol */ + uint32_t rb_colormask; +}; + +void fd_blend_init(struct pipe_context *pctx); + +#endif /* FREEDRENO_BLEND_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_clear.c b/src/gallium/drivers/freedreno/freedreno_clear.c new file mode 100644 index 00000000000..04d85ad9198 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_clear.c @@ -0,0 +1,224 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_pack_color.h" + +#include "freedreno_clear.h" +#include "freedreno_context.h" +#include "freedreno_resource.h" +#include "freedreno_state.h" +#include "freedreno_program.h" +#include "freedreno_zsa.h" +#include "freedreno_util.h" + +static uint32_t +pack_rgba(enum pipe_format format, const float *rgba) +{ + union util_color uc; + util_pack_color(rgba, format, &uc); + return uc.ui; +} + +static void +fd_clear(struct pipe_context *pctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *fb = &ctx->framebuffer.base; + uint32_t reg, colr = 0; + + ctx->cleared |= buffers; + ctx->resolve |= buffers; + ctx->needs_flush = true; + + if (buffers & PIPE_CLEAR_COLOR) + fd_resource(fb->cbufs[0]->texture)->dirty = true; + + if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) + fd_resource(fb->zsbuf->texture)->dirty = true; + + DBG("depth=%f, stencil=%u", depth, stencil); + + if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs) + colr = pack_rgba(fb->cbufs[0]->format, color->f); + + /* emit generic state now: */ + fd_state_emit(pctx, ctx->dirty & + (FD_DIRTY_BLEND | FD_DIRTY_VIEWPORT | + FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR)); + + fd_emit_vertex_bufs(ring, 0x9c, (struct fd_vertex_buf[]) { + { .prsc = ctx->solid_vertexbuf, .size = 48 }, + }, 1); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET)); + OUT_RING(ring, 0); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x0000028f); + + fd_program_emit(ring, &ctx->solid_prog); + + OUT_PKT0(ring, REG_TC_CNTL_STATUS, 1); + OUT_RING(ring, TC_CNTL_STATUS_L2_INVALIDATE); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_CLEAR_COLOR)); + OUT_RING(ring, colr); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A220_RB_LRZ_VSC_CONTROL)); + OUT_RING(ring, 0x00000084); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COPY_CONTROL)); + reg = 0; + if (buffers & PIPE_CLEAR_DEPTH) { + reg |= RB_COPY_CONTROL_CLEAR_MASK(0xf) | + RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE; + } + OUT_RING(ring, reg); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_DEPTH_CLEAR)); + reg = 0; + if (fb->zsbuf) { + switch (fd_pipe2depth(fb->zsbuf->format)) { + case DEPTHX_24_8: + reg = (((uint32_t)(0xffffff * depth)) << 8) | + (stencil & 0xff); + break; + case DEPTHX_16: + reg = (uint32_t)(0xffffffff * depth); + break; + } + } + OUT_RING(ring, reg); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL)); + reg = 0; + if (buffers & PIPE_CLEAR_DEPTH) { + reg |= RB_DEPTHCONTROL_ZFUNC(GL_ALWAYS) | + RB_DEPTHCONTROL_Z_ENABLE | + RB_DEPTHCONTROL_Z_WRITE_ENABLE | + RB_DEPTHCONTROL_EARLY_Z_ENABLE; + } + if (buffers & PIPE_CLEAR_STENCIL) { + reg |= RB_DEPTHCONTROL_STENCILFUNC(GL_ALWAYS) | + RB_DEPTHCONTROL_STENCIL_ENABLE | + RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE); + } + OUT_RING(ring, reg); + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL)); + OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */ + OUT_RING(ring, PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */ + PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(DRAW_TRIANGLES) | + PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(DRAW_TRIANGLES)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK)); + OUT_RING(ring, 0x0000ffff); + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL)); + OUT_RING(ring, xy2d(0,0)); /* PA_SC_WINDOW_SCISSOR_TL */ + OUT_RING(ring, xy2d(fb->width, /* PA_SC_WINDOW_SCISSOR_BR */ + fb->height)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COLOR_INFO)); + OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(1) | + RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(fb->cbufs[0]->format))); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COLOR_MASK)); + if (buffers & PIPE_CLEAR_COLOR) { + OUT_RING(ring, RB_COLOR_MASK_WRITE_RED | + RB_COLOR_MASK_WRITE_GREEN | + RB_COLOR_MASK_WRITE_BLUE | + RB_COLOR_MASK_WRITE_ALPHA); + } else { + OUT_RING(ring, 0x0); + } + + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + OUT_RING(ring, 3); /* NumIndices */ + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A220_RB_LRZ_VSC_CONTROL)); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COPY_CONTROL)); + OUT_RING(ring, 0x00000000); + + ctx->dirty |= FD_DIRTY_ZSA | + FD_DIRTY_RASTERIZER | + FD_DIRTY_SAMPLE_MASK | + FD_DIRTY_PROG | + FD_DIRTY_CONSTBUF | + FD_DIRTY_BLEND; +} + +static void +fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps, + const union pipe_color_union *color, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h); +} + +static void +fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, + unsigned buffers, double depth, unsigned stencil, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u", + buffers, depth, stencil, x, y, w, h); +} + +void +fd_clear_init(struct pipe_context *pctx) +{ + pctx->clear = fd_clear; + pctx->clear_render_target = fd_clear_render_target; + pctx->clear_depth_stencil = fd_clear_depth_stencil; +} diff --git a/src/gallium/drivers/freedreno/freedreno_clear.h b/src/gallium/drivers/freedreno/freedreno_clear.h new file mode 100644 index 00000000000..31bb0377d0c --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_clear.h @@ -0,0 +1,37 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_CLEAR_H_ +#define FREEDRENO_CLEAR_H_ + +#include "pipe/p_context.h" + +void fd_clear_init(struct pipe_context *pctx); + + +#endif /* FREEDRENO_CLEAR_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_compiler.c b/src/gallium/drivers/freedreno/freedreno_compiler.c new file mode 100644 index 00000000000..0610902a896 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_compiler.c @@ -0,0 +1,1186 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_strings.h" +#include "tgsi/tgsi_dump.h" + +#include "freedreno_program.h" +#include "freedreno_compiler.h" +#include "freedreno_util.h" + +#include "instr.h" +#include "ir.h" + +struct fd_compile_context { + struct fd_program_stateobj *prog; + struct fd_shader_stateobj *so; + + struct tgsi_parse_context parser; + unsigned type; + + /* predicate stack: */ + int pred_depth; + enum ir_pred pred_stack[8]; + + /* Internal-Temporary and Predicate register assignment: + * + * Some TGSI instructions which translate into multiple actual + * instructions need one or more temporary registers (which are not + * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY). + * Whenever possible, the dst register is used as the first temporary, + * but this is not possible when the dst register is in an export (ie. + * in TGSI_FILE_OUTPUT). + * + * The predicate register must be valid across multiple TGSI + * instructions, but internal temporary's do not. For this reason, + * once the predicate register is requested, until it is no longer + * needed, it gets the first register slot after after the TGSI + * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the + * internal temporaries get the register slots above this. + */ + + int pred_reg; + int num_internal_temps; + + uint8_t num_regs[TGSI_FILE_COUNT]; + + /* maps input register idx to prog->export_linkage idx: */ + uint8_t input_export_idx[64]; + + /* maps output register idx to prog->export_linkage idx: */ + uint8_t output_export_idx[64]; + + /* idx/slot for last compiler generated immediate */ + unsigned immediate_idx; + + // TODO we can skip emit exports in the VS that the FS doesn't need.. + // and get rid perhaps of num_param.. + unsigned num_position, num_param; + unsigned position, psize; + + uint64_t need_sync; + + /* current exec CF instruction */ + struct ir_cf *cf; +}; + +static int +semantic_idx(struct tgsi_declaration_semantic *semantic) +{ + int idx = semantic->Name; + if (idx == TGSI_SEMANTIC_GENERIC) + idx = TGSI_SEMANTIC_COUNT + semantic->Index; + return idx; +} + +/* assign/get the input/export register # for given semantic idx as + * returned by semantic_idx(): + */ +static int +export_linkage(struct fd_compile_context *ctx, int idx) +{ + struct fd_program_stateobj *prog = ctx->prog; + + /* if first time we've seen this export, assign the next available slot: */ + if (prog->export_linkage[idx] == 0xff) + prog->export_linkage[idx] = prog->num_exports++; + + return prog->export_linkage[idx]; +} + +static unsigned +compile_init(struct fd_compile_context *ctx, struct fd_program_stateobj *prog, + struct fd_shader_stateobj *so) +{ + unsigned ret; + + ctx->prog = prog; + ctx->so = so; + ctx->cf = NULL; + ctx->pred_depth = 0; + + ret = tgsi_parse_init(&ctx->parser, so->tokens); + if (ret != TGSI_PARSE_OK) + return ret; + + ctx->type = ctx->parser.FullHeader.Processor.Processor; + ctx->position = ~0; + ctx->psize = ~0; + ctx->num_position = 0; + ctx->num_param = 0; + ctx->need_sync = 0; + ctx->immediate_idx = 0; + ctx->pred_reg = -1; + ctx->num_internal_temps = 0; + + memset(ctx->num_regs, 0, sizeof(ctx->num_regs)); + memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx)); + memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx)); + + /* do first pass to extract declarations: */ + while (!tgsi_parse_end_of_tokens(&ctx->parser)) { + tgsi_parse_token(&ctx->parser); + + switch (ctx->parser.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: { + struct tgsi_full_declaration *decl = + &ctx->parser.FullToken.FullDeclaration; + if (decl->Declaration.File == TGSI_FILE_OUTPUT) { + unsigned name = decl->Semantic.Name; + + assert(decl->Declaration.Semantic); // TODO is this ever not true? + + ctx->output_export_idx[decl->Range.First] = + semantic_idx(&decl->Semantic); + + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + switch (name) { + case TGSI_SEMANTIC_POSITION: + ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT]; + ctx->num_position++; + break; + case TGSI_SEMANTIC_PSIZE: + ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT]; + ctx->num_position++; + case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_GENERIC: + ctx->num_param++; + break; + default: + DBG("unknown VS semantic name: %s", + tgsi_semantic_names[name]); + assert(0); + } + } else { + switch (name) { + case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_GENERIC: + ctx->num_param++; + break; + default: + DBG("unknown PS semantic name: %s", + tgsi_semantic_names[name]); + assert(0); + } + } + } else if (decl->Declaration.File == TGSI_FILE_INPUT) { + ctx->input_export_idx[decl->Range.First] = + semantic_idx(&decl->Semantic); + } + ctx->num_regs[decl->Declaration.File] += + 1 + decl->Range.Last - decl->Range.First; + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: { + struct tgsi_full_immediate *imm = + &ctx->parser.FullToken.FullImmediate; + unsigned n = ctx->so->num_immediates++; + memcpy(ctx->so->immediates[n].val, imm->u, 16); + break; + } + default: + break; + } + } + + /* TGSI generated immediates are always entire vec4's, ones we + * generate internally are not: + */ + ctx->immediate_idx = ctx->so->num_immediates * 4; + + ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT]; + + tgsi_parse_free(&ctx->parser); + + return tgsi_parse_init(&ctx->parser, so->tokens); +} + +static void +compile_free(struct fd_compile_context *ctx) +{ + tgsi_parse_free(&ctx->parser); +} + +static struct ir_cf * +next_exec_cf(struct fd_compile_context *ctx) +{ + struct ir_cf *cf = ctx->cf; + if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs)) + ctx->cf = cf = ir_cf_create(ctx->so->ir, EXEC); + return cf; +} + +static void +compile_vtx_fetch(struct fd_compile_context *ctx) +{ + struct ir_instruction **vfetch_instrs = ctx->so->vfetch_instrs; + int i; + for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) { + struct ir_instruction *instr = ir_instr_create( + next_exec_cf(ctx), IR_FETCH); + instr->fetch.opc = VTX_FETCH; + + ctx->need_sync |= 1 << (i+1); + + ir_reg_create(instr, i+1, "xyzw", 0); + ir_reg_create(instr, 0, "x", 0); + + if (i == 0) + instr->sync = true; + + vfetch_instrs[i] = instr; + } + ctx->so->num_vfetch_instrs = i; + ctx->cf = NULL; +} + +/* + * For vertex shaders (VS): + * --- ------ ------------- + * + * Inputs: R1-R(num_input) + * Constants: C0-C(num_const-1) + * Immediates: C(num_const)-C(num_const+num_imm-1) + * Outputs: export0-export(n) and export62, export63 + * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63) + * Temps: R(num_input+1)-R(num_input+num_temps) + * + * R0 could be clobbered after the vertex fetch instructions.. so we + * could use it for one of the temporaries. + * + * TODO: maybe the vertex fetch part could fetch first input into R0 as + * the last vtx fetch instruction, which would let us use the same + * register layout in either case.. although this is not what the blob + * compiler does. + * + * + * For frag shaders (PS): + * --- ---- ------------- + * + * Inputs: R0-R(num_input-1) + * Constants: same as VS + * Immediates: same as VS + * Outputs: export0-export(num_outputs) + * Temps: R(num_input)-R(num_input+num_temps-1) + * + * In either case, immediates are are postpended to the constants + * (uniforms). + * + */ + +static unsigned +get_temp_gpr(struct fd_compile_context *ctx, int idx) +{ + unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT]; + if (ctx->type == TGSI_PROCESSOR_VERTEX) + num++; + return num; +} + +static struct ir_register * +add_dst_reg(struct fd_compile_context *ctx, struct ir_instruction *alu, + const struct tgsi_dst_register *dst) +{ + unsigned flags = 0, num = 0; + char swiz[5]; + + switch (dst->File) { + case TGSI_FILE_OUTPUT: + flags |= IR_REG_EXPORT; + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + if (dst->Index == ctx->position) { + num = 62; + } else if (dst->Index == ctx->psize) { + num = 63; + } else { + num = export_linkage(ctx, + ctx->output_export_idx[dst->Index]); + } + } else { + num = dst->Index; + } + break; + case TGSI_FILE_TEMPORARY: + num = get_temp_gpr(ctx, dst->Index); + break; + default: + DBG("unsupported dst register file: %s", + tgsi_file_names[dst->File]); + assert(0); + break; + } + + swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_'; + swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_'; + swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_'; + swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_'; + swiz[4] = '\0'; + + return ir_reg_create(alu, num, swiz, flags); +} + +static struct ir_register * +add_src_reg(struct fd_compile_context *ctx, struct ir_instruction *alu, + const struct tgsi_src_register *src) +{ + static const char swiz_vals[] = { + 'x', 'y', 'z', 'w', + }; + char swiz[5]; + unsigned flags = 0, num = 0; + + switch (src->File) { + case TGSI_FILE_CONSTANT: + num = src->Index; + flags |= IR_REG_CONST; + break; + case TGSI_FILE_INPUT: + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + num = src->Index + 1; + } else { + num = export_linkage(ctx, + ctx->input_export_idx[src->Index]); + } + break; + case TGSI_FILE_TEMPORARY: + num = get_temp_gpr(ctx, src->Index); + break; + case TGSI_FILE_IMMEDIATE: + num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT]; + flags |= IR_REG_CONST; + break; + default: + DBG("unsupported src register file: %s", + tgsi_file_names[src->File]); + assert(0); + break; + } + + if (src->Absolute) + flags |= IR_REG_ABS; + if (src->Negate) + flags |= IR_REG_NEGATE; + + swiz[0] = swiz_vals[src->SwizzleX]; + swiz[1] = swiz_vals[src->SwizzleY]; + swiz[2] = swiz_vals[src->SwizzleZ]; + swiz[3] = swiz_vals[src->SwizzleW]; + swiz[4] = '\0'; + + if ((ctx->need_sync & (uint64_t)(1 << num)) && + !(flags & IR_REG_CONST)) { + alu->sync = true; + ctx->need_sync &= ~(uint64_t)(1 << num); + } + + return ir_reg_create(alu, num, swiz, flags); +} + +static void +add_vector_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu) +{ + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + alu->alu.vector_clamp = true; + break; + case TGSI_SAT_MINUS_PLUS_ONE: + DBG("unsupported saturate"); + assert(0); + break; + } +} + +static void +add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu) +{ + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + alu->alu.scalar_clamp = true; + break; + case TGSI_SAT_MINUS_PLUS_ONE: + DBG("unsupported saturate"); + assert(0); + break; + } +} + +static void +add_regs_vector_1(struct fd_compile_context *ctx, + struct tgsi_full_instruction *inst, struct ir_instruction *alu) +{ + assert(inst->Instruction.NumSrcRegs == 1); + assert(inst->Instruction.NumDstRegs == 1); + + add_dst_reg(ctx, alu, &inst->Dst[0].Register); + add_src_reg(ctx, alu, &inst->Src[0].Register); + add_src_reg(ctx, alu, &inst->Src[0].Register); + add_vector_clamp(inst, alu); +} + +static void +add_regs_vector_2(struct fd_compile_context *ctx, + struct tgsi_full_instruction *inst, struct ir_instruction *alu) +{ + assert(inst->Instruction.NumSrcRegs == 2); + assert(inst->Instruction.NumDstRegs == 1); + + add_dst_reg(ctx, alu, &inst->Dst[0].Register); + add_src_reg(ctx, alu, &inst->Src[0].Register); + add_src_reg(ctx, alu, &inst->Src[1].Register); + add_vector_clamp(inst, alu); +} + +static void +add_regs_vector_3(struct fd_compile_context *ctx, + struct tgsi_full_instruction *inst, struct ir_instruction *alu) +{ + assert(inst->Instruction.NumSrcRegs == 3); + assert(inst->Instruction.NumDstRegs == 1); + + add_dst_reg(ctx, alu, &inst->Dst[0].Register); + /* maybe should re-arrange the syntax some day, but + * in assembler/disassembler and what ir.c expects + * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 + */ + add_src_reg(ctx, alu, &inst->Src[2].Register); + add_src_reg(ctx, alu, &inst->Src[0].Register); + add_src_reg(ctx, alu, &inst->Src[1].Register); + add_vector_clamp(inst, alu); +} + +static void +add_regs_dummy_vector(struct ir_instruction *alu) +{ + /* create dummy, non-written vector dst/src regs + * for unused vector instr slot: + */ + ir_reg_create(alu, 0, "____", 0); /* vector dst */ + ir_reg_create(alu, 0, NULL, 0); /* vector src1 */ + ir_reg_create(alu, 0, NULL, 0); /* vector src2 */ +} + +static void +add_regs_scalar_1(struct fd_compile_context *ctx, + struct tgsi_full_instruction *inst, struct ir_instruction *alu) +{ + assert(inst->Instruction.NumSrcRegs == 1); + assert(inst->Instruction.NumDstRegs == 1); + + add_regs_dummy_vector(alu); + + add_dst_reg(ctx, alu, &inst->Dst[0].Register); + add_src_reg(ctx, alu, &inst->Src[0].Register); + add_scalar_clamp(inst, alu); +} + +/* + * Helpers for TGSI instructions that don't map to a single shader instr: + */ + +/* Get internal-temp src/dst to use for a sequence of instructions + * generated by a single TGSI op.. if possible, use the final dst + * register as the temporary to avoid allocating a new register, but + * if necessary allocate one. If a single TGSI op needs multiple + * internal temps, pass NULL for orig_dst for all but the first one + * so that you don't end up using the same register for all your + * internal temps. + */ +static bool +get_internal_temp(struct fd_compile_context *ctx, + struct tgsi_dst_register *orig_dst, + struct tgsi_dst_register *tmp_dst, + struct tgsi_src_register *tmp_src) +{ + bool using_temp = false; + + tmp_dst->File = TGSI_FILE_TEMPORARY; + tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; + tmp_dst->Indirect = 0; + tmp_dst->Dimension = 0; + + if (orig_dst && (orig_dst->File != TGSI_FILE_OUTPUT)) { + /* if possible, use orig dst register for the temporary: */ + tmp_dst->Index = orig_dst->Index; + } else { + /* otherwise assign one: */ + int n = ctx->num_internal_temps++; + if (ctx->pred_reg != -1) + n++; + tmp_dst->Index = get_temp_gpr(ctx, + ctx->num_regs[TGSI_FILE_TEMPORARY] + n); + using_temp = true; + } + + tmp_src->File = tmp_dst->File; + tmp_src->Indirect = tmp_dst->Indirect; + tmp_src->Dimension = tmp_dst->Dimension; + tmp_src->Index = tmp_dst->Index; + tmp_src->Absolute = 0; + tmp_src->Negate = 0; + tmp_src->SwizzleX = TGSI_SWIZZLE_X; + tmp_src->SwizzleY = TGSI_SWIZZLE_Y; + tmp_src->SwizzleZ = TGSI_SWIZZLE_Z; + tmp_src->SwizzleW = TGSI_SWIZZLE_W; + + return using_temp; +} + +static void +get_predicate(struct fd_compile_context *ctx, struct tgsi_dst_register *dst, + struct tgsi_src_register *src) +{ + assert(ctx->pred_reg != -1); + + dst->File = TGSI_FILE_TEMPORARY; + dst->WriteMask = TGSI_WRITEMASK_W; + dst->Indirect = 0; + dst->Dimension = 0; + dst->Index = get_temp_gpr(ctx, ctx->pred_reg); + + if (src) { + src->File = dst->File; + src->Indirect = dst->Indirect; + src->Dimension = dst->Dimension; + src->Index = dst->Index; + src->Absolute = 0; + src->Negate = 0; + src->SwizzleX = TGSI_SWIZZLE_W; + src->SwizzleY = TGSI_SWIZZLE_W; + src->SwizzleZ = TGSI_SWIZZLE_W; + src->SwizzleW = TGSI_SWIZZLE_W; + } +} + +static void +push_predicate(struct fd_compile_context *ctx, struct tgsi_src_register *src) +{ + struct ir_instruction *alu; + struct tgsi_dst_register pred_dst; + + /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by + * themselves: + */ + ctx->cf = NULL; + + if (ctx->pred_depth == 0) { + /* assign predicate register: */ + ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY]; + + get_predicate(ctx, &pred_dst, NULL); + + alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs); + add_regs_dummy_vector(alu); + add_dst_reg(ctx, alu, &pred_dst); + add_src_reg(ctx, alu, src); + } else { + struct tgsi_src_register pred_src; + + get_predicate(ctx, &pred_dst, &pred_src); + + alu = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + add_dst_reg(ctx, alu, &pred_dst); + add_src_reg(ctx, alu, &pred_src); + add_src_reg(ctx, alu, src); + + // XXX need to make PRED_SETE_PUSHv IR_PRED_NONE.. but need to make + // sure src reg is valid if it was calculated with a predicate + // condition.. + alu->pred = IR_PRED_NONE; + } + + /* save previous pred state to restore in pop_predicate(): */ + ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred; + + ctx->cf = NULL; +} + +static void +pop_predicate(struct fd_compile_context *ctx) +{ + /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by + * themselves: + */ + ctx->cf = NULL; + + /* restore previous predicate state: */ + ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth]; + + if (ctx->pred_depth != 0) { + struct ir_instruction *alu; + struct tgsi_dst_register pred_dst; + struct tgsi_src_register pred_src; + + get_predicate(ctx, &pred_dst, &pred_src); + + alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs); + add_regs_dummy_vector(alu); + add_dst_reg(ctx, alu, &pred_dst); + add_src_reg(ctx, alu, &pred_src); + alu->pred = IR_PRED_NONE; + } else { + /* predicate register no longer needed: */ + ctx->pred_reg = -1; + } + + ctx->cf = NULL; +} + +static void +get_immediate(struct fd_compile_context *ctx, + struct tgsi_src_register *reg, uint32_t val) +{ + unsigned neg, swiz, idx, i; + /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ + static const unsigned swiz2tgsi[] = { + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, + }; + + for (i = 0; i < ctx->immediate_idx; i++) { + swiz = i % 4; + idx = i / 4; + + if (ctx->so->immediates[idx].val[swiz] == val) { + neg = 0; + break; + } + + if (ctx->so->immediates[idx].val[swiz] == -val) { + neg = 1; + break; + } + } + + if (i == ctx->immediate_idx) { + /* need to generate a new immediate: */ + swiz = i % 4; + idx = i / 4; + neg = 0; + ctx->so->immediates[idx].val[swiz] = val; + ctx->so->num_immediates = idx + 1; + ctx->immediate_idx++; + } + + reg->File = TGSI_FILE_IMMEDIATE; + reg->Indirect = 0; + reg->Dimension = 0; + reg->Index = idx; + reg->Absolute = 0; + reg->Negate = neg; + reg->SwizzleX = swiz2tgsi[swiz]; + reg->SwizzleY = swiz2tgsi[swiz]; + reg->SwizzleZ = swiz2tgsi[swiz]; + reg->SwizzleW = swiz2tgsi[swiz]; +} + +/* POW(a,b) = EXP2(b * LOG2(a)) */ +static void +translate_pow(struct fd_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + struct ir_instruction *alu; + + get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); + + alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP); + add_regs_dummy_vector(alu); + add_dst_reg(ctx, alu, &tmp_dst); + add_src_reg(ctx, alu, &inst->Src[0].Register); + + alu = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + add_dst_reg(ctx, alu, &tmp_dst); + add_src_reg(ctx, alu, &tmp_src); + add_src_reg(ctx, alu, &inst->Src[1].Register); + + /* NOTE: some of the instructions, like EXP_IEEE, seem hard- + * coded to take their input from the w component. + */ + switch(inst->Dst[0].Register.WriteMask) { + case TGSI_WRITEMASK_X: + tmp_src.SwizzleW = TGSI_SWIZZLE_X; + break; + case TGSI_WRITEMASK_Y: + tmp_src.SwizzleW = TGSI_SWIZZLE_Y; + break; + case TGSI_WRITEMASK_Z: + tmp_src.SwizzleW = TGSI_SWIZZLE_Z; + break; + case TGSI_WRITEMASK_W: + tmp_src.SwizzleW = TGSI_SWIZZLE_W; + break; + default: + DBG("invalid writemask!"); + assert(0); + break; + } + + alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE); + add_regs_dummy_vector(alu); + add_dst_reg(ctx, alu, &inst->Dst[0].Register); + add_src_reg(ctx, alu, &tmp_src); + add_scalar_clamp(inst, alu); +} + +static void +translate_tex(struct fd_compile_context *ctx, + struct tgsi_full_instruction *inst, unsigned opc) +{ + struct ir_instruction *instr; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + const struct tgsi_src_register *coord; + bool using_temp; + int idx; + + using_temp = get_internal_temp(ctx, + &inst->Dst[0].Register, &tmp_dst, &tmp_src); + + if (opc == TGSI_OPCODE_TXP) { + /* TXP - Projective Texture Lookup: + * + * coord.x = src0.x / src.w + * coord.y = src0.y / src.w + * coord.z = src0.z / src.w + * coord.w = src0.w + * bias = 0.0 + * + * dst = texture_sample(unit, coord, bias) + */ + instr = ir_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE); + + /* MAXv: */ + add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w"; + add_src_reg(ctx, instr, &inst->Src[0].Register); + add_src_reg(ctx, instr, &inst->Src[0].Register); + + /* RECIP_IEEE: */ + add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___"; + add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = "wwww"; + + instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_"; + add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx"; + add_src_reg(ctx, instr, &inst->Src[0].Register); + + coord = &tmp_src; + } else { + coord = &inst->Src[0].Register; + } + + instr = ir_instr_create(next_exec_cf(ctx), IR_FETCH); + instr->fetch.opc = TEX_FETCH; + assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases? + + /* save off the tex fetch to be patched later with correct const_idx: */ + idx = ctx->so->num_tfetch_instrs++; + ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index; + ctx->so->tfetch_instrs[idx].instr = instr; + + add_dst_reg(ctx, instr, &tmp_dst); + add_src_reg(ctx, instr, coord); + + /* dst register needs to be marked for sync: */ + ctx->need_sync |= 1 << instr->regs[0]->num; + + /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */ + instr->sync = true; + + if (using_temp) { + /* texture fetch can't write directly to export, so if tgsi + * is telling us the dst register is in output file, we load + * the texture to a temp and the use ALU instruction to move + * to output + */ + instr = ir_instr_create_alu(next_exec_cf(ctx), MAXv, ~0); + + add_dst_reg(ctx, instr, &inst->Dst[0].Register); + add_src_reg(ctx, instr, &tmp_src); + add_src_reg(ctx, instr, &tmp_src); + add_vector_clamp(inst, instr); + } +} + +/* SGE(a,b) = GTE((b - a), 1.0, 0.0) */ +/* SLT(a,b) = GTE((b - a), 0.0, 1.0) */ +static void +translate_sge_slt(struct fd_compile_context *ctx, + struct tgsi_full_instruction *inst, unsigned opc) +{ + struct ir_instruction *instr; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + struct tgsi_src_register tmp_const; + float c0, c1; + + switch (opc) { + default: + assert(0); + case TGSI_OPCODE_SGE: + c0 = 1.0; + c1 = 0.0; + break; + case TGSI_OPCODE_SLT: + c0 = 0.0; + c1 = 1.0; + break; + } + + get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); + + instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); + add_dst_reg(ctx, instr, &tmp_dst); + add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR_REG_NEGATE; + add_src_reg(ctx, instr, &inst->Src[1].Register); + + instr = ir_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0); + add_dst_reg(ctx, instr, &inst->Dst[0].Register); + /* maybe should re-arrange the syntax some day, but + * in assembler/disassembler and what ir.c expects + * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 + */ + get_immediate(ctx, &tmp_const, f2d(c0)); + add_src_reg(ctx, instr, &tmp_const); + add_src_reg(ctx, instr, &tmp_src); + get_immediate(ctx, &tmp_const, f2d(c1)); + add_src_reg(ctx, instr, &tmp_const); +} + +/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ +static void +translate_lrp(struct fd_compile_context *ctx, + struct tgsi_full_instruction *inst, + unsigned opc) +{ + struct ir_instruction *instr; + struct tgsi_dst_register tmp_dst1, tmp_dst2; + struct tgsi_src_register tmp_src1, tmp_src2; + struct tgsi_src_register tmp_const; + + get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst1, &tmp_src1); + get_internal_temp(ctx, NULL, &tmp_dst2, &tmp_src2); + + get_immediate(ctx, &tmp_const, f2d(1.0)); + + /* tmp1 = (a * b) */ + instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + add_dst_reg(ctx, instr, &tmp_dst1); + add_src_reg(ctx, instr, &inst->Src[0].Register); + add_src_reg(ctx, instr, &inst->Src[1].Register); + + /* tmp2 = (1 - a) */ + instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); + add_dst_reg(ctx, instr, &tmp_dst2); + add_src_reg(ctx, instr, &tmp_const); + add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR_REG_NEGATE; + + /* tmp2 = tmp2 * c */ + instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0); + add_dst_reg(ctx, instr, &tmp_dst2); + add_src_reg(ctx, instr, &tmp_src2); + add_src_reg(ctx, instr, &inst->Src[2].Register); + + /* dst = tmp1 + tmp2 */ + instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); + add_dst_reg(ctx, instr, &inst->Dst[0].Register); + add_src_reg(ctx, instr, &tmp_src1); + add_src_reg(ctx, instr, &tmp_src2); +} + +static void +translate_trig(struct fd_compile_context *ctx, + struct tgsi_full_instruction *inst, + unsigned opc) +{ + struct ir_instruction *instr; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + struct tgsi_src_register tmp_const; + instr_scalar_opc_t op; + + switch (opc) { + default: + assert(0); + case TGSI_OPCODE_SIN: + op = SIN; + break; + case TGSI_OPCODE_COS: + op = COS; + break; + } + + get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); + + tmp_dst.WriteMask = TGSI_WRITEMASK_X; + tmp_src.SwizzleX = tmp_src.SwizzleY = + tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X; + + /* maybe should re-arrange the syntax some day, but + * in assembler/disassembler and what ir.c expects + * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 + */ + instr = ir_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); + add_dst_reg(ctx, instr, &tmp_dst); + get_immediate(ctx, &tmp_const, f2d(0.5)); + add_src_reg(ctx, instr, &tmp_const); + add_src_reg(ctx, instr, &inst->Src[0].Register); + get_immediate(ctx, &tmp_const, f2d(0.159155)); + add_src_reg(ctx, instr, &tmp_const); + + instr = ir_instr_create_alu(next_exec_cf(ctx), FRACv, ~0); + add_dst_reg(ctx, instr, &tmp_dst); + add_src_reg(ctx, instr, &tmp_src); + add_src_reg(ctx, instr, &tmp_src); + + instr = ir_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); + add_dst_reg(ctx, instr, &tmp_dst); + get_immediate(ctx, &tmp_const, f2d(-3.141593)); + add_src_reg(ctx, instr, &tmp_const); + add_src_reg(ctx, instr, &tmp_src); + get_immediate(ctx, &tmp_const, f2d(6.283185)); + add_src_reg(ctx, instr, &tmp_const); + + instr = ir_instr_create_alu(next_exec_cf(ctx), ~0, op); + add_regs_dummy_vector(instr); + add_dst_reg(ctx, instr, &inst->Dst[0].Register); + add_src_reg(ctx, instr, &tmp_src); +} + +/* + * Main part of compiler/translator: + */ + +static void +translate_instruction(struct fd_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + unsigned opc = inst->Instruction.Opcode; + struct ir_instruction *instr; + static struct ir_cf *cf; + + if (opc == TGSI_OPCODE_END) + return; + + if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { + unsigned num = inst->Dst[0].Register.Index; + /* seems like we need to ensure that position vs param/pixel + * exports don't end up in the same EXEC clause.. easy way + * to do this is force a new EXEC clause on first appearance + * of an position or param/pixel export. + */ + if ((num == ctx->position) || (num == ctx->psize)) { + if (ctx->num_position > 0) { + ctx->cf = NULL; + ir_cf_create_alloc(ctx->so->ir, SQ_POSITION, + ctx->num_position - 1); + ctx->num_position = 0; + } + } else { + if (ctx->num_param > 0) { + ctx->cf = NULL; + ir_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL, + ctx->num_param - 1); + ctx->num_param = 0; + } + } + } + + cf = next_exec_cf(ctx); + + /* TODO turn this into a table: */ + switch (opc) { + case TGSI_OPCODE_MOV: + instr = ir_instr_create_alu(cf, MAXv, ~0); + add_regs_vector_1(ctx, inst, instr); + break; + case TGSI_OPCODE_RCP: + instr = ir_instr_create_alu(cf, ~0, RECIP_IEEE); + add_regs_scalar_1(ctx, inst, instr); + break; + case TGSI_OPCODE_RSQ: + instr = ir_instr_create_alu(cf, ~0, RECIPSQ_IEEE); + add_regs_scalar_1(ctx, inst, instr); + break; + case TGSI_OPCODE_MUL: + instr = ir_instr_create_alu(cf, MULv, ~0); + add_regs_vector_2(ctx, inst, instr); + break; + case TGSI_OPCODE_ADD: + instr = ir_instr_create_alu(cf, ADDv, ~0); + add_regs_vector_2(ctx, inst, instr); + break; + case TGSI_OPCODE_DP3: + instr = ir_instr_create_alu(cf, DOT3v, ~0); + add_regs_vector_2(ctx, inst, instr); + break; + case TGSI_OPCODE_DP4: + instr = ir_instr_create_alu(cf, DOT4v, ~0); + add_regs_vector_2(ctx, inst, instr); + break; + case TGSI_OPCODE_MIN: + instr = ir_instr_create_alu(cf, MINv, ~0); + add_regs_vector_2(ctx, inst, instr); + break; + case TGSI_OPCODE_MAX: + instr = ir_instr_create_alu(cf, MAXv, ~0); + add_regs_vector_2(ctx, inst, instr); + break; + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + translate_sge_slt(ctx, inst, opc); + break; + case TGSI_OPCODE_MAD: + instr = ir_instr_create_alu(cf, MULADDv, ~0); + add_regs_vector_3(ctx, inst, instr); + break; + case TGSI_OPCODE_LRP: + translate_lrp(ctx, inst, opc); + break; + case TGSI_OPCODE_FRC: + instr = ir_instr_create_alu(cf, FRACv, ~0); + add_regs_vector_1(ctx, inst, instr); + break; + case TGSI_OPCODE_FLR: + instr = ir_instr_create_alu(cf, FLOORv, ~0); + add_regs_vector_1(ctx, inst, instr); + break; + case TGSI_OPCODE_EX2: + instr = ir_instr_create_alu(cf, ~0, EXP_IEEE); + add_regs_scalar_1(ctx, inst, instr); + break; + case TGSI_OPCODE_POW: + translate_pow(ctx, inst); + break; + case TGSI_OPCODE_ABS: + instr = ir_instr_create_alu(cf, MAXv, ~0); + add_regs_vector_1(ctx, inst, instr); + instr->regs[1]->flags |= IR_REG_NEGATE; /* src0 */ + break; + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + translate_trig(ctx, inst, opc); + break; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + translate_tex(ctx, inst, opc); + break; + case TGSI_OPCODE_CMP: + instr = ir_instr_create_alu(cf, CNDGTEv, ~0); + add_regs_vector_3(ctx, inst, instr); + // TODO this should be src0 if regs where in sane order.. + instr->regs[2]->flags ^= IR_REG_NEGATE; /* src1 */ + break; + case TGSI_OPCODE_IF: + push_predicate(ctx, &inst->Src[0].Register); + ctx->so->ir->pred = IR_PRED_EQ; + break; + case TGSI_OPCODE_ELSE: + ctx->so->ir->pred = IR_PRED_NE; + /* not sure if this is required in all cases, but blob compiler + * won't combine EQ and NE in same CF: + */ + ctx->cf = NULL; + break; + case TGSI_OPCODE_ENDIF: + pop_predicate(ctx); + break; + case TGSI_OPCODE_F2I: + instr = ir_instr_create_alu(cf, TRUNCv, ~0); + add_regs_vector_1(ctx, inst, instr); + break; + default: + DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc)); + tgsi_dump(ctx->so->tokens, 0); + assert(0); + break; + } + + /* internal temporaries are only valid for the duration of a single + * TGSI instruction: + */ + ctx->num_internal_temps = 0; +} + +static void +compile_instructions(struct fd_compile_context *ctx) +{ + while (!tgsi_parse_end_of_tokens(&ctx->parser)) { + tgsi_parse_token(&ctx->parser); + + switch (ctx->parser.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + translate_instruction(ctx, + &ctx->parser.FullToken.FullInstruction); + break; + default: + break; + } + } + + ctx->cf->cf_type = EXEC_END; +} + +int +fd_compile_shader(struct fd_program_stateobj *prog, + struct fd_shader_stateobj *so) +{ + struct fd_compile_context ctx; + + ir_shader_destroy(so->ir); + so->ir = ir_shader_create(); + so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0; + + if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK) + return -1; + + if (ctx.type == TGSI_PROCESSOR_VERTEX) { + compile_vtx_fetch(&ctx); + } else if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { + prog->num_exports = 0; + memset(prog->export_linkage, 0xff, + sizeof(prog->export_linkage)); + } + + compile_instructions(&ctx); + + compile_free(&ctx); + + return 0; +} + diff --git a/src/gallium/drivers/freedreno/freedreno_compiler.h b/src/gallium/drivers/freedreno/freedreno_compiler.h new file mode 100644 index 00000000000..ce09788c10b --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_compiler.h @@ -0,0 +1,38 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_COMPILER_H_ +#define FREEDRENO_COMPILER_H_ + +#include "freedreno_program.h" +#include "freedreno_util.h" + +int fd_compile_shader(struct fd_program_stateobj *prog, + struct fd_shader_stateobj *so); + +#endif /* FREEDRENO_COMPILER_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c new file mode 100644 index 00000000000..cac10b70c9c --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -0,0 +1,205 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "freedreno_context.h" +#include "freedreno_vbo.h" +#include "freedreno_blend.h" +#include "freedreno_rasterizer.h" +#include "freedreno_zsa.h" +#include "freedreno_state.h" +#include "freedreno_resource.h" +#include "freedreno_clear.h" +#include "freedreno_program.h" +#include "freedreno_texture.h" +#include "freedreno_gmem.h" +#include "freedreno_util.h" + +/* there are two cases where we currently need to wait for render complete: + * 1) pctx->flush() .. since at the moment we have no way for DDX to sync + * the presentation blit with the 3d core + * 2) wrap-around for ringbuffer.. possibly we can do something more + * Intelligent here. Right now we need to ensure there is enough room + * at the end of the drawcmds in the cmdstream buffer for all the per- + * tile cmds. We do this the lamest way possible, by making the ringbuffer + * big, and flushing and resetting back to the beginning if we get too + * close to the end. + */ +static void +fd_context_wait(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + uint32_t ts = fd_ringbuffer_timestamp(ctx->ring); + + DBG("wait: %u", ts); + + fd_pipe_wait(ctx->screen->pipe, ts); + fd_ringbuffer_reset(ctx->ring); + fd_ringmarker_mark(ctx->draw_start); +} + +/* emit accumulated render cmds, needed for example if render target has + * changed, or for flush() + */ +void +fd_context_render(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + struct pipe_framebuffer_state *fb = &ctx->framebuffer.base; + + DBG("needs_flush: %d", ctx->needs_flush); + + if (!ctx->needs_flush) + return; + + fd_gmem_render_tiles(pctx); + + DBG("%p/%p/%p", ctx->ring->start, ctx->ring->cur, ctx->ring->end); + + /* if size in dwords is more than half the buffer size, then wait and + * wrap around: + */ + if ((ctx->ring->cur - ctx->ring->start) > ctx->ring->size/8) + fd_context_wait(pctx); + + ctx->needs_flush = false; + ctx->cleared = ctx->restore = ctx->resolve = 0; + + fd_resource(fb->cbufs[0]->texture)->dirty = false; + if (fb->zsbuf) + fd_resource(fb->zsbuf->texture)->dirty = false; +} + +static void +fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, + enum pipe_flush_flags flags) +{ + DBG("fence=%p", fence); + +#if 0 + if (fence) { + fd_fence_ref(ctx->screen->fence.current, + (struct fd_fence **)fence); + } +#endif + + fd_context_render(pctx); + fd_context_wait(pctx); +} + +static void +fd_context_destroy(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + DBG(""); + + if (ctx->blitter) + util_blitter_destroy(ctx->blitter); + + fd_ringmarker_del(ctx->draw_start); + fd_ringmarker_del(ctx->draw_end); + fd_ringbuffer_del(ctx->ring); + + fd_prog_fini(pctx); + + FREE(ctx); +} + +static struct pipe_resource * +create_solid_vertexbuf(struct pipe_context *pctx) +{ + static const float init_shader_const[] = { + /* for clear/gmem2mem: */ + -1.000000, +1.000000, +1.000000, +1.100000, + +1.000000, +1.000000, -1.000000, -1.100000, + +1.000000, +1.100000, -1.100000, +1.000000, + /* for mem2gmem: (vertices) */ + -1.000000, +1.000000, +1.000000, +1.000000, + +1.000000, +1.000000, -1.000000, -1.000000, + +1.000000, +1.000000, -1.000000, +1.000000, + /* for mem2gmem: (tex coords) */ + +0.000000, +0.000000, +1.000000, +0.000000, + +0.000000, +1.000000, +1.000000, +1.000000, + }; + struct pipe_resource *prsc = pipe_buffer_create(pctx->screen, + PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const)); + pipe_buffer_write(pctx, prsc, 0, + sizeof(init_shader_const), init_shader_const); + return prsc; +} + +struct pipe_context * +fd_context_create(struct pipe_screen *pscreen, void *priv) +{ + struct fd_screen *screen = fd_screen(pscreen); + struct fd_context *ctx = CALLOC_STRUCT(fd_context); + struct pipe_context *pctx; + + if (!ctx) + return NULL; + + DBG(""); + + ctx->screen = screen; + + ctx->ring = fd_ringbuffer_new(screen->pipe, 0x100000); + ctx->draw_start = fd_ringmarker_new(ctx->ring); + ctx->draw_end = fd_ringmarker_new(ctx->ring); + + pctx = &ctx->base; + pctx->screen = pscreen; + pctx->priv = priv; + pctx->flush = fd_context_flush; + pctx->destroy = fd_context_destroy; + + util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer), + 16, UTIL_SLAB_SINGLETHREADED); + + fd_vbo_init(pctx); + fd_blend_init(pctx); + fd_rasterizer_init(pctx); + fd_zsa_init(pctx); + fd_state_init(pctx); + fd_resource_context_init(pctx); + fd_clear_init(pctx); + fd_prog_init(pctx); + fd_texture_init(pctx); + + ctx->blitter = util_blitter_create(pctx); + if (!ctx->blitter) { + fd_context_destroy(pctx); + return NULL; + } + + /* construct vertex state used for solid ops (clear, and gmem<->mem) */ + ctx->solid_vertexbuf = create_solid_vertexbuf(pctx); + + fd_state_emit_setup(pctx); + + return pctx; +} diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h new file mode 100644 index 00000000000..6fff8f611b4 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -0,0 +1,184 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_CONTEXT_H_ +#define FREEDRENO_CONTEXT_H_ + +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "util/u_blitter.h" +#include "util/u_slab.h" +#include "util/u_string.h" + +#include "freedreno_screen.h" + +struct fd_blend_stateobj; +struct fd_rasterizer_stateobj; +struct fd_zsa_stateobj; +struct fd_sampler_stateobj; +struct fd_vertex_stateobj; +struct fd_shader_stateobj; + +struct fd_texture_stateobj { + struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + struct fd_sampler_stateobj *samplers[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; + unsigned dirty_samplers; +}; + +struct fd_program_stateobj { + struct fd_shader_stateobj *vp, *fp; + enum { + FD_SHADER_DIRTY_VP = (1 << 0), + FD_SHADER_DIRTY_FP = (1 << 1), + } dirty; + uint8_t num_exports; + /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index + * for TGSI_SEMANTIC_GENERIC. Special vs exports (position and point- + * size) are not included in this + */ + uint8_t export_linkage[63]; +}; + +struct fd_constbuf_stateobj { + struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct fd_vertexbuf_stateobj { + struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + unsigned count; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct fd_framebuffer_stateobj { + struct pipe_framebuffer_state base; + uint16_t bin_h, nbins_y; + uint16_t bin_w, nbins_x; + uint32_t pa_su_sc_mode_cntl; +}; + +struct fd_context { + struct pipe_context base; + + struct fd_screen *screen; + struct blitter_context *blitter; + + struct util_slab_mempool transfer_pool; + + /* shaders used by clear, and gmem->mem blits: */ + struct fd_program_stateobj solid_prog; // TODO move to screen? + + /* shaders used by mem->gmem blits: */ + struct fd_program_stateobj blit_prog; // TODO move to screen? + + /* vertex buff used for clear/gmem->mem vertices, and mem->gmem + * vertices and tex coords: + */ + struct pipe_resource *solid_vertexbuf; + + /* do we need to mem2gmem before rendering. We don't, if for example, + * there was a glClear() that invalidated the entire previous buffer + * contents. Keep track of which buffer(s) are cleared, or needs + * restore. Masks of PIPE_CLEAR_* + */ + enum { + /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */ + FD_BUFFER_COLOR = PIPE_CLEAR_COLOR, + FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH, + FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL, + FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL, + } cleared, restore, resolve; + + bool needs_flush; + + struct fd_ringbuffer *ring; + struct fd_ringmarker *draw_start, *draw_end; + + /* scissor can't really be changed mid-render.. we probably need + * to flush out all pending draws and then start a new tile pass + * w/ new stencil state.. + */ + struct pipe_scissor_state scissor; + + /* which state objects need to be re-emit'd: */ + enum { + FD_DIRTY_BLEND = (1 << 0), + FD_DIRTY_RASTERIZER = (1 << 1), + FD_DIRTY_ZSA = (1 << 2), + FD_DIRTY_FRAGTEX = (1 << 3), + FD_DIRTY_VERTTEX = (1 << 4), + FD_DIRTY_PROG = (1 << 5), + FD_DIRTY_VTX = (1 << 6), + FD_DIRTY_BLEND_COLOR = (1 << 7), + FD_DIRTY_STENCIL_REF = (1 << 8), + FD_DIRTY_SAMPLE_MASK = (1 << 9), + FD_DIRTY_FRAMEBUFFER = (1 << 10), + FD_DIRTY_STIPPLE = (1 << 12), + FD_DIRTY_VIEWPORT = (1 << 12), + FD_DIRTY_CONSTBUF = (1 << 13), + FD_DIRTY_VERTEXBUF = (1 << 14), + FD_DIRTY_INDEXBUF = (1 << 15), + FD_DIRTY_SCISSOR = (1 << 16), + } dirty; + + struct fd_blend_stateobj *blend; + struct fd_rasterizer_stateobj *rasterizer; + struct fd_zsa_stateobj *zsa; + + struct fd_texture_stateobj verttex, fragtex; + + struct fd_program_stateobj prog; + + struct fd_vertex_stateobj *vtx; + + struct pipe_blend_color blend_color; + struct pipe_stencil_ref stencil_ref; + unsigned sample_mask; + struct fd_framebuffer_stateobj framebuffer; + struct pipe_poly_stipple stipple; + struct pipe_viewport_state viewport; + struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; + struct fd_vertexbuf_stateobj vertexbuf; + struct pipe_index_buffer indexbuf; +}; + +static INLINE struct fd_context * +fd_context(struct pipe_context *pctx) +{ + return (struct fd_context *)pctx; +} + +struct pipe_context * fd_context_create(struct pipe_screen *pscreen, void *priv); + +void fd_context_render(struct pipe_context *pctx); + +#endif /* FREEDRENO_CONTEXT_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_fence.c b/src/gallium/drivers/freedreno/freedreno_fence.c new file mode 100644 index 00000000000..e6374655f5b --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_fence.c @@ -0,0 +1,52 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "freedreno_fence.h" +#include "freedreno_util.h" + +boolean +fd_fence_wait(struct fd_fence *fence) +{ + DBG("TODO: "); + return false; +} + +boolean +fd_fence_signalled(struct fd_fence *fence) +{ + DBG("TODO: "); + return false; +} + +void +fd_fence_del(struct fd_fence *fence) +{ + +} + + diff --git a/src/gallium/drivers/freedreno/freedreno_fence.h b/src/gallium/drivers/freedreno/freedreno_fence.h new file mode 100644 index 00000000000..7e8bee322dc --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_fence.h @@ -0,0 +1,65 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_FENCE_H_ +#define FREEDRENO_FENCE_H_ + +#include "util/u_inlines.h" +#include "util/u_double_list.h" + + +struct fd_fence { + int ref; +}; + +boolean fd_fence_wait(struct fd_fence *fence); +boolean fd_fence_signalled(struct fd_fence *fence); +void fd_fence_del(struct fd_fence *fence); + +static INLINE void +fd_fence_ref(struct fd_fence *fence, struct fd_fence **ref) +{ + if (fence) + ++fence->ref; + + if (*ref) { + if (--(*ref)->ref == 0) + fd_fence_del(*ref); + } + + *ref = fence; +} + +static INLINE struct fd_fence * +fd_fence(struct pipe_fence_handle *fence) +{ + return (struct fd_fence *)fence; +} + + +#endif /* FREEDRENO_FENCE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c new file mode 100644 index 00000000000..dae60c6d5d9 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -0,0 +1,491 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_pack_color.h" + +#include "freedreno_gmem.h" +#include "freedreno_context.h" +#include "freedreno_state.h" +#include "freedreno_program.h" +#include "freedreno_resource.h" +#include "freedreno_zsa.h" +#include "freedreno_util.h" + +/* + * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer + * inside the GPU. All rendering happens to GMEM. Larger render targets + * are split into tiles that are small enough for the color (and depth and/or + * stencil, if enabled) buffers to fit within GMEM. Before rendering a tile, + * if there was not a clear invalidating the previous tile contents, we need + * to restore the previous tiles contents (system mem -> GMEM), and after all + * the draw calls, before moving to the next tile, we need to save the tile + * contents (GMEM -> system mem). + * + * The code in this file handles dealing with GMEM and tiling. + * + * The structure of the ringbuffer ends up being: + * + * +--<---<-- IB ---<---+---<---+---<---<---<--+ + * | | | | + * v ^ ^ ^ + * ------------------------------------------------------ + * | clear/draw cmds | Tile0 | Tile1 | .... | TileN | + * ------------------------------------------------------ + * ^ + * | + * address submitted in issueibcmds + * + * Where the per-tile section handles scissor setup, mem2gmem restore (if + * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem + * resolve. + */ + +/* transfer from gmem to system memory (ie. normal RAM) */ + +static void +emit_gmem2mem_surf(struct fd_ringbuffer *ring, uint32_t swap, uint32_t base, + struct pipe_surface *psurf) +{ + struct fd_resource *rsc = fd_resource(psurf->texture); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COLOR_INFO)); + OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(swap) | + RB_COLOR_INFO_COLOR_BASE(base / 1024) | + RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(psurf->format))); + + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, CP_REG(REG_RB_COPY_CONTROL)); + OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */ + OUT_RELOC(ring, rsc->bo, 0, 0); /* RB_COPY_DEST_BASE */ + OUT_RING(ring, rsc->pitch >> 5); /* RB_COPY_DEST_PITCH */ + OUT_RING(ring, RB_COPY_DEST_INFO_FORMAT(fd_pipe2color(psurf->format)) | + RB_COPY_DEST_INFO_LINEAR | /* RB_COPY_DEST_INFO */ + RB_COPY_DEST_INFO_SWAP(swap) | + RB_COPY_DEST_INFO_WRITE_RED | + RB_COPY_DEST_INFO_WRITE_GREEN | + RB_COPY_DEST_INFO_WRITE_BLUE | + RB_COPY_DEST_INFO_WRITE_ALPHA); + + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x0000000); + + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + OUT_RING(ring, 3); /* NumIndices */ +} + +static void +emit_gmem2mem(struct fd_context *ctx, struct fd_ringbuffer *ring, + uint32_t xoff, uint32_t yoff, uint32_t bin_w, uint32_t bin_h) +{ + struct fd_framebuffer_stateobj *fb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &fb->base; + + fd_emit_vertex_bufs(ring, 0x9c, (struct fd_vertex_buf[]) { + { .prsc = ctx->solid_vertexbuf, .size = 48 }, + }, 1); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET)); + OUT_RING(ring, 0); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x0000028f); + + fd_program_emit(ring, &ctx->solid_prog); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK)); + OUT_RING(ring, 0x0000ffff); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL)); + OUT_RING(ring, RB_DEPTHCONTROL_EARLY_Z_ENABLE); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SU_SC_MODE_CNTL)); + OUT_RING(ring, PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */ + PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(DRAW_TRIANGLES) | + PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(DRAW_TRIANGLES)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL)); + OUT_RING(ring, xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */ + OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */ + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_CL_VTE_CNTL)); + OUT_RING(ring, PA_CL_VTE_CNTL_VTX_W0_FMT | + PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | + PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | + PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | + PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL)); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_MODECONTROL)); + OUT_RING(ring, RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COPY_DEST_OFFSET)); + OUT_RING(ring, RB_COPY_DEST_OFFSET_X(xoff) | RB_COPY_DEST_OFFSET_Y(yoff)); + + if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) + emit_gmem2mem_surf(ring, 0, bin_w * bin_h, pfb->zsbuf); + + if (ctx->resolve & FD_BUFFER_COLOR) + emit_gmem2mem_surf(ring, 1, 0, pfb->cbufs[0]); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_MODECONTROL)); + OUT_RING(ring, RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH)); +} + +/* transfer from system memory to gmem */ + +static void +emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t swap, uint32_t base, + struct pipe_surface *psurf) +{ + struct fd_resource *rsc = fd_resource(psurf->texture); + uint32_t swiz; + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COLOR_INFO)); + OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(swap) | + RB_COLOR_INFO_COLOR_BASE(base / 1024) | + RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(psurf->format))); + + swiz = fd_tex_swiz(psurf->format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, + PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA); + + /* emit fb as a texture: */ + OUT_PKT3(ring, CP_SET_CONSTANT, 7); + OUT_RING(ring, 0x00010000); + OUT_RING(ring, SQ_TEX0_CLAMP_X(SQ_TEX_WRAP) | + SQ_TEX0_CLAMP_Y(SQ_TEX_WRAP) | + SQ_TEX0_CLAMP_Z(SQ_TEX_WRAP) | + SQ_TEX0_PITCH(rsc->pitch)); + OUT_RELOC(ring, rsc->bo, 0, + fd_pipe2surface(psurf->format) | 0x800); + OUT_RING(ring, SQ_TEX2_WIDTH(psurf->width) | + SQ_TEX2_HEIGHT(psurf->height)); + OUT_RING(ring, 0x01000000 | // XXX + swiz | + SQ_TEX3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) | + SQ_TEX3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT)); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000200); + + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + OUT_RING(ring, 3); /* NumIndices */ +} + +static void +emit_mem2gmem(struct fd_context *ctx, struct fd_ringbuffer *ring, + uint32_t xoff, uint32_t yoff, uint32_t bin_w, uint32_t bin_h) +{ + struct fd_framebuffer_stateobj *fb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &fb->base; + float x0, y0, x1, y1; + + fd_emit_vertex_bufs(ring, 0x9c, (struct fd_vertex_buf[]) { + { .prsc = ctx->solid_vertexbuf, .size = 48, .offset = 0x30 }, + { .prsc = ctx->solid_vertexbuf, .size = 32, .offset = 0x60 }, + }, 2); + + /* write texture coordinates to vertexbuf: */ + x0 = ((float)xoff) / ((float)pfb->width); + x1 = ((float)xoff + bin_w) / ((float)pfb->width); + y0 = ((float)yoff) / ((float)pfb->height); + y1 = ((float)yoff + bin_h) / ((float)pfb->height); + OUT_PKT3(ring, CP_MEM_WRITE, 9); + OUT_RELOC(ring, fd_resource(ctx->solid_vertexbuf)->bo, 0x60, 0); + OUT_RING(ring, f2d(x0)); + OUT_RING(ring, f2d(y0)); + OUT_RING(ring, f2d(x1)); + OUT_RING(ring, f2d(y0)); + OUT_RING(ring, f2d(x0)); + OUT_RING(ring, f2d(y1)); + OUT_RING(ring, f2d(x1)); + OUT_RING(ring, f2d(y1)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET)); + OUT_RING(ring, 0); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x0000003b); + + fd_program_emit(ring, &ctx->blit_prog); + + OUT_PKT0(ring, REG_TC_CNTL_STATUS, 1); + OUT_RING(ring, TC_CNTL_STATUS_L2_INVALIDATE); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL)); + OUT_RING(ring, RB_DEPTHCONTROL_EARLY_Z_ENABLE); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SU_SC_MODE_CNTL)); + OUT_RING(ring, PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | + PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(DRAW_TRIANGLES) | + PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(DRAW_TRIANGLES)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK)); + OUT_RING(ring, 0x0000ffff); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COLORCONTROL)); + OUT_RING(ring, RB_COLORCONTROL_ALPHA_FUNC(PIPE_FUNC_ALWAYS) | + RB_COLORCONTROL_BLEND_DISABLE | + RB_COLORCONTROL_ROP_CODE(12) | + RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) | + RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_BLEND_CONTROL)); + OUT_RING(ring, RB_BLENDCONTROL_COLOR_SRCBLEND(RB_BLEND_ONE) | + RB_BLENDCONTROL_COLOR_COMB_FCN(COMB_DST_PLUS_SRC) | + RB_BLENDCONTROL_COLOR_DESTBLEND(RB_BLEND_ZERO) | + RB_BLENDCONTROL_ALPHA_SRCBLEND(RB_BLEND_ONE) | + RB_BLENDCONTROL_ALPHA_COMB_FCN(COMB_DST_PLUS_SRC) | + RB_BLENDCONTROL_ALPHA_DESTBLEND(RB_BLEND_ZERO)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL)); + OUT_RING(ring, PA_SC_WINDOW_OFFSET_DISABLE | + xy2d(0,0)); /* PA_SC_WINDOW_SCISSOR_TL */ + OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */ + + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, CP_REG(REG_PA_CL_VPORT_XSCALE)); + OUT_RING(ring, f2d((float)bin_w/2.0)); /* PA_CL_VPORT_XSCALE */ + OUT_RING(ring, f2d((float)bin_w/2.0)); /* PA_CL_VPORT_XOFFSET */ + OUT_RING(ring, f2d(-(float)bin_h/2.0)); /* PA_CL_VPORT_YSCALE */ + OUT_RING(ring, f2d((float)bin_h/2.0)); /* PA_CL_VPORT_YOFFSET */ + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_CL_VTE_CNTL)); + OUT_RING(ring, PA_CL_VTE_CNTL_VTX_XY_FMT | + PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this??? + PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | + PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | + PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | + PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL)); + OUT_RING(ring, 0x00000000); + + if (ctx->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) + emit_mem2gmem_surf(ring, 0, bin_w * bin_h, pfb->zsbuf); + + if (ctx->resolve & FD_BUFFER_COLOR) + emit_mem2gmem_surf(ring, 1, 0, pfb->cbufs[0]); + + /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */ +} + +void +fd_gmem_render_tiles(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_framebuffer_stateobj *fb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &fb->base; + struct fd_ringbuffer *ring; + uint32_t i, yoff = 0; + uint32_t timestamp; + ring = ctx->ring; + + DBG("rendering %dx%d tiles (%s/%s)", fb->nbins_x, fb->nbins_y, + util_format_name(pfb->cbufs[0]->format), + pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); + + /* mark the end of the clear/draw cmds before emitting per-tile cmds: */ + fd_ringmarker_mark(ctx->draw_end); + + for (i = 0; i < fb->nbins_y; i++) { + uint32_t j, xoff = 0; + uint32_t bin_h = fb->bin_h; + + /* clip bin height: */ + bin_h = min(bin_h, pfb->height - yoff); + + for (j = 0; j < fb->nbins_x; j++) { + uint32_t bin_w = fb->bin_w; + + /* clip bin width: */ + bin_w = min(bin_w, pfb->width - xoff); + + DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d", + bin_h, yoff, bin_w, xoff); + + fd_emit_framebuffer_state(ring, &ctx->framebuffer); + + /* setup screen scissor for current tile (same for mem2gmem): */ + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL)); + OUT_RING(ring, xy2d(0,0)); /* PA_SC_SCREEN_SCISSOR_TL */ + OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_SCREEN_SCISSOR_BR */ + + if (ctx->restore) + emit_mem2gmem(ctx, ring, xoff, yoff, bin_w, bin_h); + + /* setup window scissor and offset for current tile (different + * from mem2gmem): + */ + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_OFFSET)); + OUT_RING(ring, PA_SC_WINDOW_OFFSET_X(-xoff) | + PA_SC_WINDOW_OFFSET_Y(-yoff));/* PA_SC_WINDOW_OFFSET */ + + /* emit IB to drawcmds: */ + OUT_IB (ring, ctx->draw_start, ctx->draw_end); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_OFFSET)); + OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */ + + /* emit gmem2mem to transfer tile back to system memory: */ + emit_gmem2mem(ctx, ring, xoff, yoff, bin_w, bin_h); + + xoff += bin_w; + } + + yoff += bin_h; + } + + /* GPU executes starting from tile cmds, which IB back to draw cmds: */ + fd_ringmarker_flush(ctx->draw_end); + + /* mark start for next draw cmds: */ + fd_ringmarker_mark(ctx->draw_start); + + /* update timestamps on render targets: */ + fd_pipe_timestamp(ctx->screen->pipe, ×tamp); + fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp; + if (pfb->zsbuf) + fd_resource(pfb->zsbuf->texture)->timestamp = timestamp; + + /* Note that because the per-tile setup and mem2gmem/gmem2mem are emitted + * after the draw/clear calls, but executed before, we need to preemptively + * flag some state as dirty before the first draw/clear call. + * + * TODO maybe we need to mark all state as dirty to not worry about state + * being clobbered by other contexts? + */ + ctx->dirty |= FD_DIRTY_ZSA | + FD_DIRTY_RASTERIZER | + FD_DIRTY_FRAMEBUFFER | + FD_DIRTY_SAMPLE_MASK | + FD_DIRTY_VIEWPORT | + FD_DIRTY_CONSTBUF | + FD_DIRTY_PROG | + FD_DIRTY_SCISSOR | + /* probably only needed if we need to mem2gmem on the next + * draw.. but not sure if there is a good way to know? + */ + FD_DIRTY_VERTTEX | + FD_DIRTY_FRAGTEX | + FD_DIRTY_BLEND; +} + +void +fd_gmem_calculate_tiles(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_framebuffer_stateobj *fb = &ctx->framebuffer; + struct pipe_framebuffer_state *pfb = &fb->base; + uint32_t nbins_x = 1, nbins_y = 1; + uint32_t bin_w, bin_h; + uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format); + uint32_t gmem_size = ctx->screen->gmemsize_bytes; + uint32_t max_width = 992; + +// TODO we probably could optimize this a bit if we know that +// Z or stencil is not enabled for any of the draw calls.. +// if (fd_stencil_enabled(ctx->zsa) || fd_depth_enabled(ctx->zsa)) { + gmem_size /= 2; + max_width = 256; +// } + + bin_w = ALIGN(pfb->width, 32); + bin_h = ALIGN(pfb->height, 32); + + /* first, find a bin width that satisfies the maximum width + * restrictions: + */ + while (bin_w > max_width) { + nbins_x++; + bin_w = ALIGN(pfb->width / nbins_x, 32); + } + + /* then find a bin height that satisfies the memory constraints: + */ + while ((bin_w * bin_h * cpp) > gmem_size) { + nbins_y++; + bin_h = ALIGN(pfb->height / nbins_y, 32); + } + + if ((nbins_x > 1) || (nbins_y > 1)) { + fb->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE; + } else { + fb->pa_su_sc_mode_cntl &= ~PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE; + } + + DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h); + +//if we use hw binning, tile sizes (in multiple of 32) need to +//fit in 5 bits.. for now don't care because we aren't using +//that: +// assert(!(bin_h/32 & ~0x1f)); +// assert(!(bin_w/32 & ~0x1f)); + + fb->nbins_x = nbins_x; + fb->nbins_y = nbins_y; + fb->bin_w = bin_w; + fb->bin_h = bin_h; + +} diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h new file mode 100644 index 00000000000..7b46f6b5e4e --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_gmem.h @@ -0,0 +1,37 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_GMEM_H_ +#define FREEDRENO_GMEM_H_ + +#include "pipe/p_context.h" + +void fd_gmem_render_tiles(struct pipe_context *pctx); +void fd_gmem_calculate_tiles(struct pipe_context *pctx); + +#endif /* FREEDRENO_GMEM_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_pm4.h b/src/gallium/drivers/freedreno/freedreno_pm4.h new file mode 100644 index 00000000000..a536f9c2b4e --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_pm4.h @@ -0,0 +1,86 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_PM4_H_ +#define FREEDRENO_PM4_H_ + +#define CP_TYPE0_PKT (0 << 30) +#define CP_TYPE1_PKT (1 << 30) +#define CP_TYPE2_PKT (2 << 30) +#define CP_TYPE3_PKT (3 << 30) + + +#define CP_ME_INIT 0x48 +#define CP_NOP 0x10 +#define CP_INDIRECT_BUFFER 0x3f +#define CP_INDIRECT_BUFFER_PFD 0x37 +#define CP_WAIT_FOR_IDLE 0x26 +#define CP_WAIT_REG_MEM 0x3c +#define CP_WAIT_REG_EQ 0x52 +#define CP_WAT_REG_GTE 0x53 +#define CP_WAIT_UNTIL_READ 0x5c +#define CP_WAIT_IB_PFD_COMPLETE 0x5d +#define CP_REG_RMW 0x21 +#define CP_REG_TO_MEM 0x3e +#define CP_MEM_WRITE 0x3d +#define CP_MEM_WRITE_CNTR 0x4f +#define CP_COND_EXEC 0x44 +#define CP_COND_WRITE 0x45 +#define CP_EVENT_WRITE 0x46 +#define CP_EVENT_WRITE_SHD 0x58 +#define CP_EVENT_WRITE_CFL 0x59 +#define CP_EVENT_WRITE_ZPD 0x5b +#define CP_DRAW_INDX 0x22 +#define CP_DRAW_INDX_2 0x36 +#define CP_DRAW_INDX_BIN 0x34 +#define CP_DRAW_INDX_2_BIN 0x35 +#define CP_VIZ_QUERY 0x23 +#define CP_SET_STATE 0x25 +#define CP_SET_CONSTANT 0x2d +#define CP_IM_LOAD 0x27 +#define CP_IM_LOAD_IMMEDIATE 0x2b +#define CP_LOAD_CONSTANT_CONTEXT 0x2e +#define CP_INVALIDATE_STATE 0x3b +#define CP_SET_SHADER_BASES 0x4a +#define CP_SET_BIN_MASK 0x50 +#define CP_SET_BIN_SELECT 0x51 +#define CP_CONTEXT_UPDATE 0x5e +#define CP_INTERRUPT 0x40 +#define CP_IM_STORE 0x2c +#define CP_SET_BIN_BASE_OFFSET 0x4b /* for a20x */ +#define CP_SET_DRAW_INIT_FLAGS 0x4b /* for a22x */ +#define CP_SET_PROTECTED_MODE 0x5f +#define CP_LOAD_STATE 0x30 +#define CP_COND_INDIRECT_BUFFER_PFE 0x3a +#define CP_COND_INDIRECT_BUFFER_PFD 0x32 + + +#define CP_REG(reg) ((0x4 << 16) | ((reg) - 0x2000)) + + +#endif /* FREEDRENO_PM4_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c new file mode 100644 index 00000000000..b7fec6d7dc2 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_program.c @@ -0,0 +1,506 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "freedreno_program.h" +#include "freedreno_compiler.h" +#include "freedreno_vbo.h" +#include "freedreno_texture.h" +#include "freedreno_util.h" + +static struct fd_shader_stateobj * +create_shader(enum shader_t type) +{ + struct fd_shader_stateobj *so = CALLOC_STRUCT(fd_shader_stateobj); + if (!so) + return NULL; + so->type = type; + return so; +} + +static void +delete_shader(struct fd_shader_stateobj *so) +{ + ir_shader_destroy(so->ir); + FREE(so->tokens); + FREE(so); +} + +static struct fd_shader_stateobj * +assemble(struct fd_shader_stateobj *so) +{ + free(so->bin); + so->bin = ir_shader_assemble(so->ir, &so->info); + if (!so->bin) + goto fail; + + if (fd_mesa_debug & FD_DBG_DISASM) { + DBG("disassemble: type=%d", so->type); + disasm(so->bin, so->info.sizedwords, 0, so->type); + } + + return so; + +fail: + debug_error("assemble failed!"); + delete_shader(so); + return NULL; +} + +static struct fd_shader_stateobj * +compile(struct fd_program_stateobj *prog, struct fd_shader_stateobj *so) +{ + int ret; + + if (fd_mesa_debug & FD_DBG_DISASM) { + DBG("dump tgsi: type=%d", so->type); + tgsi_dump(so->tokens, 0); + } + + ret = fd_compile_shader(prog, so); + if (ret) + goto fail; + + /* NOTE: we don't assemble yet because for VS we don't know the + * type information for vertex fetch yet.. so those need to be + * patched up later before assembling. + */ + + so->info.sizedwords = 0; + + return so; + +fail: + debug_error("compile failed!"); + delete_shader(so); + return NULL; +} + +static void +emit(struct fd_ringbuffer *ring, struct fd_shader_stateobj *so) +{ + unsigned i; + + if (so->info.sizedwords == 0) + assemble(so); + + OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords); + OUT_RING(ring, (so->type == SHADER_VERTEX) ? 0 : 1); + OUT_RING(ring, so->info.sizedwords); + for (i = 0; i < so->info.sizedwords; i++) + OUT_RING(ring, so->bin[i]); +} + +static void * +fd_fp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT); + if (!so) + return NULL; + so->tokens = tgsi_dup_tokens(cso->tokens); + return so; +} + +static void +fd_fp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd_shader_stateobj *so = hwcso; + delete_shader(so); +} + +static void +fd_fp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->prog.fp = hwcso; + ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + ctx->dirty |= FD_DIRTY_PROG; +} + +static void * +fd_vp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX); + if (!so) + return NULL; + so->tokens = tgsi_dup_tokens(cso->tokens); + return so; +} + +static void +fd_vp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd_shader_stateobj *so = hwcso; + delete_shader(so); +} + +static void +fd_vp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->prog.vp = hwcso; + ctx->prog.dirty |= FD_SHADER_DIRTY_VP; + ctx->dirty |= FD_DIRTY_PROG; +} + +static void +patch_vtx_fetches(struct fd_context *ctx, struct fd_shader_stateobj *so, + struct fd_vertex_stateobj *vtx) +{ + unsigned i; + + assert(so->num_vfetch_instrs == vtx->num_elements); + + /* update vtx fetch instructions: */ + for (i = 0; i < so->num_vfetch_instrs; i++) { + struct ir_instruction *instr = so->vfetch_instrs[i]; + struct pipe_vertex_element *elem = &vtx->pipe[i]; + struct pipe_vertex_buffer *vb = + &ctx->vertexbuf.vb[elem->vertex_buffer_index]; + enum pipe_format format = elem->src_format; + const struct util_format_description *desc = + util_format_description(format); + unsigned j; + + /* Find the first non-VOID channel. */ + for (j = 0; j < 4; j++) + if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID) + break; + + /* CI/CIS can probably be set in compiler instead: */ + instr->fetch.const_idx = 20 + (i / 3); + instr->fetch.const_idx_sel = i % 3; + + instr->fetch.fmt = fd_pipe2surface(format); + instr->fetch.is_normalized = desc->channel[j].normalized; + instr->fetch.is_signed = + desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED; + instr->fetch.stride = vb->stride ? : 1; + instr->fetch.offset = elem->src_offset; + + for (j = 0; j < 4; j++) + instr->regs[0]->swizzle[j] = "xyzw01__"[desc->swizzle[j]]; + + assert(instr->fetch.fmt != FMT_INVALID); + + DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s, " + "stride=%d, offset=%d", + i, util_format_name(format), + instr->fetch.fmt, + instr->fetch.const_idx, + instr->fetch.const_idx_sel, + elem->instance_divisor, + instr->regs[0]->swizzle, + instr->fetch.stride, + instr->fetch.offset); + } + + /* trigger re-assemble: */ + so->info.sizedwords = 0; +} + +static void +patch_tex_fetches(struct fd_context *ctx, struct fd_shader_stateobj *so, + struct fd_texture_stateobj *tex) +{ + unsigned i; + + /* update tex fetch instructions: */ + for (i = 0; i < so->num_tfetch_instrs; i++) { + struct ir_instruction *instr = so->tfetch_instrs[i].instr; + unsigned samp_id = so->tfetch_instrs[i].samp_id; + unsigned const_idx = fd_get_const_idx(ctx, tex, samp_id); + + if (const_idx != instr->fetch.const_idx) { + instr->fetch.const_idx = const_idx; + /* trigger re-assemble: */ + so->info.sizedwords = 0; + } + } +} + +void +fd_program_validate(struct fd_context *ctx) +{ + struct fd_program_stateobj *prog = &ctx->prog; + + /* if vertex or frag shader is dirty, we may need to recompile. Compile + * frag shader first, as that assigns the register slots for exports + * from the vertex shader. And therefore if frag shader has changed we + * need to recompile both vert and frag shader. + */ + if (prog->dirty & FD_SHADER_DIRTY_FP) + compile(prog, prog->fp); + + if (prog->dirty & (FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP)) + compile(prog, prog->vp); + + if (prog->dirty) + ctx->dirty |= FD_DIRTY_PROG; + + prog->dirty = 0; + + /* if necessary, fix up vertex fetch instructions: */ + if (ctx->dirty & (FD_DIRTY_VTX | FD_DIRTY_VERTEXBUF | FD_DIRTY_PROG)) + patch_vtx_fetches(ctx, prog->vp, ctx->vtx); + + /* if necessary, fix up texture fetch instructions: */ + if (ctx->dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG)) { + patch_tex_fetches(ctx, prog->vp, &ctx->verttex); + patch_tex_fetches(ctx, prog->fp, &ctx->fragtex); + } +} + +void +fd_program_emit(struct fd_ringbuffer *ring, + struct fd_program_stateobj *prog) +{ + struct ir_shader_info *vsi = &prog->vp->info; + struct ir_shader_info *fsi = &prog->fp->info; + uint8_t vs_gprs, fs_gprs, vs_export; + + emit(ring, prog->vp); + emit(ring, prog->fp); + + vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg; + fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg; + vs_export = max(1, prog->num_exports) - 1; + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_SQ_PROGRAM_CNTL)); + OUT_RING(ring, SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) | + SQ_PROGRAM_CNTL_VS_RESOURCE | + SQ_PROGRAM_CNTL_PS_RESOURCE | + SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) | + SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) | + SQ_PROGRAM_CNTL_VS_REGS(vs_gprs)); +} + +/* Creates shader: + * EXEC ADDR(0x2) CNT(0x1) + * (S)FETCH: SAMPLE R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER) + * ALLOC PARAM/PIXEL SIZE(0x0) + * EXEC_END ADDR(0x3) CNT(0x1) + * ALU: MAXv export0 = R0, R0 ; gl_FragColor + * NOP + */ +static struct fd_shader_stateobj * +create_blit_fp(void) +{ + struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT); + struct ir_cf *cf; + struct ir_instruction *instr; + + if (!so) + return NULL; + + so->ir = ir_shader_create(); + + cf = ir_cf_create(so->ir, EXEC); + + instr = ir_instr_create_tex_fetch(cf, 0); + ir_reg_create(instr, 0, "xyzw", 0); + ir_reg_create(instr, 0, "xyx", 0); + instr->sync = true; + + cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); + cf = ir_cf_create(so->ir, EXEC_END); + + instr = ir_instr_create_alu(cf, MAXv, ~0); + ir_reg_create(instr, 0, NULL, IR_REG_EXPORT); + ir_reg_create(instr, 0, NULL, 0); + ir_reg_create(instr, 0, NULL, 0); + + return assemble(so); +} + +/* Creates shader: +* EXEC ADDR(0x3) CNT(0x2) +* FETCH: VERTEX R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1) +* FETCH: VERTEX R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0) +* ALLOC POSITION SIZE(0x0) +* EXEC ADDR(0x5) CNT(0x1) +* ALU: MAXv export62 = R2, R2 ; gl_Position +* ALLOC PARAM/PIXEL SIZE(0x0) +* EXEC_END ADDR(0x6) CNT(0x1) +* ALU: MAXv export0 = R1, R1 +* NOP + */ +static struct fd_shader_stateobj * +create_blit_vp(void) +{ + struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX); + struct ir_cf *cf; + struct ir_instruction *instr; + + if (!so) + return NULL; + + so->ir = ir_shader_create(); + + cf = ir_cf_create(so->ir, EXEC); + + instr = ir_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8); + instr->fetch.is_normalized = true; + ir_reg_create(instr, 1, "xy01", 0); + ir_reg_create(instr, 0, "x", 0); + + instr = ir_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12); + instr->fetch.is_normalized = true; + ir_reg_create(instr, 2, "xyz1", 0); + ir_reg_create(instr, 0, "x", 0); + + cf = ir_cf_create_alloc(so->ir, SQ_POSITION, 0); + cf = ir_cf_create(so->ir, EXEC); + + instr = ir_instr_create_alu(cf, MAXv, ~0); + ir_reg_create(instr, 62, NULL, IR_REG_EXPORT); + ir_reg_create(instr, 2, NULL, 0); + ir_reg_create(instr, 2, NULL, 0); + + cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); + cf = ir_cf_create(so->ir, EXEC_END); + + instr = ir_instr_create_alu(cf, MAXv, ~0); + ir_reg_create(instr, 0, NULL, IR_REG_EXPORT); + ir_reg_create(instr, 1, NULL, 0); + ir_reg_create(instr, 1, NULL, 0); + + + return assemble(so); + +} + +/* Creates shader: + * ALLOC PARAM/PIXEL SIZE(0x0) + * EXEC_END ADDR(0x1) CNT(0x1) + * ALU: MAXv export0 = C0, C0 ; gl_FragColor + */ +static struct fd_shader_stateobj * +create_solid_fp(void) +{ + struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT); + struct ir_cf *cf; + struct ir_instruction *instr; + + if (!so) + return NULL; + + so->ir = ir_shader_create(); + + cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); + cf = ir_cf_create(so->ir, EXEC_END); + + instr = ir_instr_create_alu(cf, MAXv, ~0); + ir_reg_create(instr, 0, NULL, IR_REG_EXPORT); + ir_reg_create(instr, 0, NULL, IR_REG_CONST); + ir_reg_create(instr, 0, NULL, IR_REG_CONST); + + return assemble(so); +} + +/* Creates shader: + * EXEC ADDR(0x3) CNT(0x1) + * (S)FETCH: VERTEX R1.xyz1 = R0.x FMT_32_32_32_FLOAT + * UNSIGNED STRIDE(12) CONST(26, 0) + * ALLOC POSITION SIZE(0x0) + * EXEC ADDR(0x4) CNT(0x1) + * ALU: MAXv export62 = R1, R1 ; gl_Position + * ALLOC PARAM/PIXEL SIZE(0x0) + * EXEC_END ADDR(0x5) CNT(0x0) + */ +static struct fd_shader_stateobj * +create_solid_vp(void) +{ + struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX); + struct ir_cf *cf; + struct ir_instruction *instr; + + if (!so) + return NULL; + + so->ir = ir_shader_create(); + + cf = ir_cf_create(so->ir, EXEC); + + instr = ir_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12); + ir_reg_create(instr, 1, "xyz1", 0); + ir_reg_create(instr, 0, "x", 0); + + cf = ir_cf_create_alloc(so->ir, SQ_POSITION, 0); + cf = ir_cf_create(so->ir, EXEC); + + instr = ir_instr_create_alu(cf, MAXv, ~0); + ir_reg_create(instr, 62, NULL, IR_REG_EXPORT); + ir_reg_create(instr, 1, NULL, 0); + ir_reg_create(instr, 1, NULL, 0); + + cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0); + cf = ir_cf_create(so->ir, EXEC_END); + + return assemble(so); +} + +void +fd_prog_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + pctx->create_fs_state = fd_fp_state_create; + pctx->bind_fs_state = fd_fp_state_bind; + pctx->delete_fs_state = fd_fp_state_delete; + + pctx->create_vs_state = fd_vp_state_create; + pctx->bind_vs_state = fd_vp_state_bind; + pctx->delete_vs_state = fd_vp_state_delete; + + ctx->solid_prog.fp = create_solid_fp(); + ctx->solid_prog.vp = create_solid_vp(); + ctx->blit_prog.fp = create_blit_fp(); + ctx->blit_prog.vp = create_blit_vp(); +} + +void +fd_prog_fini(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + delete_shader(ctx->solid_prog.vp); + delete_shader(ctx->solid_prog.fp); + delete_shader(ctx->blit_prog.vp); + delete_shader(ctx->blit_prog.fp); +} diff --git a/src/gallium/drivers/freedreno/freedreno_program.h b/src/gallium/drivers/freedreno/freedreno_program.h new file mode 100644 index 00000000000..e73cf1bbb97 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_program.h @@ -0,0 +1,82 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_PROGRAM_H_ +#define FREEDRENO_PROGRAM_H_ + +#include "pipe/p_context.h" + +#include "freedreno_context.h" + +#include "ir.h" +#include "disasm.h" + +struct fd_shader_stateobj { + enum shader_t type; + + uint32_t *bin; + + struct tgsi_token *tokens; + + /* note that we defer compiling shader until we know both vs and ps.. + * and if one changes, we potentially need to recompile in order to + * get varying linkages correct: + */ + struct ir_shader_info info; + struct ir_shader *ir; + + /* for vertex shaders, the fetch instructions which need to be + * patched up before assembly: + */ + unsigned num_vfetch_instrs; + struct ir_instruction *vfetch_instrs[64]; + + /* for all shaders, any tex fetch instructions which need to be + * patched before assembly: + */ + unsigned num_tfetch_instrs; + struct { + unsigned samp_id; + struct ir_instruction *instr; + } tfetch_instrs[64]; + + unsigned first_immediate; /* const reg # of first immediate */ + unsigned num_immediates; + struct { + uint32_t val[4]; + } immediates[64]; +}; + +void fd_program_emit(struct fd_ringbuffer *ring, + struct fd_program_stateobj *prog); +void fd_program_validate(struct fd_context *ctx); + +void fd_prog_init(struct pipe_context *pctx); +void fd_prog_fini(struct pipe_context *pctx); + +#endif /* FREEDRENO_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_rasterizer.c b/src/gallium/drivers/freedreno/freedreno_rasterizer.c new file mode 100644 index 00000000000..2d69133830a --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_rasterizer.c @@ -0,0 +1,151 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "freedreno_rasterizer.h" +#include "freedreno_context.h" +#include "freedreno_util.h" + + +static enum pa_su_sc_draw +polygon_mode(unsigned mode) +{ + switch (mode) { + case PIPE_POLYGON_MODE_POINT: + return DRAW_POINTS; + case PIPE_POLYGON_MODE_LINE: + return DRAW_LINES; + case PIPE_POLYGON_MODE_FILL: + return DRAW_TRIANGLES; + default: + DBG("invalid polygon mode: %u", mode); + return 0; + } +} + +static void * +fd_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct fd_rasterizer_stateobj *so; + float psize_min, psize_max; + + so = CALLOC_STRUCT(fd_rasterizer_stateobj); + if (!so) + return NULL; + + if (cso->point_size_per_vertex) { + psize_min = util_get_min_point_size(cso); + psize_max = 8192; + } else { + /* Force the point size to be as if the vertex output was disabled. */ + psize_min = cso->point_size; + psize_max = cso->point_size; + } + + so->base = *cso; + + so->pa_sc_line_stipple = cso->line_stipple_enable ? + PA_SC_LINE_STIPPLE_LINE_PATTERN(cso->line_stipple_pattern) | + PA_SC_LINE_STIPPLE_REPEAT_COUNT(cso->line_stipple_factor) : 0; + + so->pa_cl_clip_cntl = 0; // TODO + + so->pa_su_vtx_cntl = + PA_SU_VTX_CNTL_PIX_CENTER(cso->gl_rasterization_rules ? PIXCENTER_OGL : PIXCENTER_D3D) | + PA_SU_VTX_CNTL_QUANT_MODE(ONE_SIXTEENTH); + + so->pa_su_point_size = + PA_SU_POINT_SIZE_HEIGHT(cso->point_size/2) | + PA_SU_POINT_SIZE_WIDTH(cso->point_size/2); + + so->pa_su_point_minmax = + PA_SU_POINT_MINMAX_MIN_SIZE(psize_min/2) | + PA_SU_POINT_MINMAX_MAX_SIZE(psize_max/2); + + so->pa_su_line_cntl = + PA_SU_LINE_CNTL_WIDTH(cso->line_width/2); + + so->pa_su_sc_mode_cntl = + PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE | + PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(polygon_mode(cso->fill_front)) | + PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(polygon_mode(cso->fill_back)); + + if (cso->cull_face & PIPE_FACE_FRONT) + so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_CULL_FRONT; + if (cso->cull_face & PIPE_FACE_BACK) + so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_CULL_BACK; + if (!cso->flatshade_first) + so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST; + if (!cso->front_ccw) + so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_FACE; + if (cso->line_stipple_enable) + so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE; + if (cso->multisample) + so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_MSAA_ENABLE; + + if (cso->fill_front != PIPE_POLYGON_MODE_FILL || + cso->fill_back != PIPE_POLYGON_MODE_FILL) + so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DUALMODE); + else + so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DISABLED); + + if (cso->offset_tri) + so->pa_su_sc_mode_cntl |= + PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE | + PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE | + PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE; + + return so; +} + +static void +fd_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->rasterizer = hwcso; + ctx->dirty |= FD_DIRTY_RASTERIZER; +} + +static void +fd_rasterizer_state_delete(struct pipe_context *pctx, void *hwcso) +{ + FREE(hwcso); +} + +void +fd_rasterizer_init(struct pipe_context *pctx) +{ + pctx->create_rasterizer_state = fd_rasterizer_state_create; + pctx->bind_rasterizer_state = fd_rasterizer_state_bind; + pctx->delete_rasterizer_state = fd_rasterizer_state_delete; +} diff --git a/src/gallium/drivers/freedreno/freedreno_rasterizer.h b/src/gallium/drivers/freedreno/freedreno_rasterizer.h new file mode 100644 index 00000000000..519a05edfea --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_rasterizer.h @@ -0,0 +1,48 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_RASTERIZER_H_ +#define FREEDRENO_RASTERIZER_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +struct fd_rasterizer_stateobj { + struct pipe_rasterizer_state base; + uint32_t pa_sc_line_stipple; + uint32_t pa_cl_clip_cntl; + uint32_t pa_su_vtx_cntl; + uint32_t pa_su_point_size; + uint32_t pa_su_point_minmax; + uint32_t pa_su_line_cntl; + uint32_t pa_su_sc_mode_cntl; +}; + +void fd_rasterizer_init(struct pipe_context *pctx); + +#endif /* FREEDRENO_RASTERIZER_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c new file mode 100644 index 00000000000..4458ba99557 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -0,0 +1,248 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" +#include "util/u_string.h" + +#include "freedreno_resource.h" +#include "freedreno_screen.h" +#include "freedreno_surface.h" +#include "freedreno_context.h" +#include "freedreno_util.h" + +static void * +fd_resource_transfer_map(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned level, unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **pptrans) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_resource *rsc = fd_resource(prsc); + struct pipe_transfer *ptrans = util_slab_alloc(&ctx->transfer_pool); + enum pipe_format format = prsc->format; + char *buf; + + if (!ptrans) + return NULL; + + ptrans->resource = prsc; + ptrans->level = level; + ptrans->usage = usage; + ptrans->box = *box; + ptrans->stride = rsc->pitch * rsc->cpp; + ptrans->layer_stride = ptrans->stride; + + buf = fd_bo_map(rsc->bo); + + *pptrans = ptrans; + + return buf + + box->y / util_format_get_blockheight(format) * ptrans->stride + + box->x / util_format_get_blockwidth(format) * rsc->cpp; +} + +static void fd_resource_transfer_flush_region(struct pipe_context *pctx, + struct pipe_transfer *ptrans, + const struct pipe_box *box) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_resource *rsc = fd_resource(ptrans->resource); + + if (rsc->dirty) + fd_context_render(pctx); + + if (rsc->timestamp) { + fd_pipe_wait(ctx->screen->pipe, rsc->timestamp); + rsc->timestamp = 0; + } +} + +static void +fd_resource_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *ptrans) +{ + struct fd_context *ctx = fd_context(pctx); + util_slab_free(&ctx->transfer_pool, ptrans); +} + +static void +fd_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *prsc) +{ + struct fd_resource *rsc = fd_resource(prsc); + fd_bo_del(rsc->bo); + FREE(rsc); +} + +static boolean +fd_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *prsc, + struct winsys_handle *handle) +{ + struct fd_resource *rsc = fd_resource(prsc); + + return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->pitch, handle); +} + + +const struct u_resource_vtbl fd_resource_vtbl = { + .resource_get_handle = fd_resource_get_handle, + .resource_destroy = fd_resource_destroy, + .transfer_map = fd_resource_transfer_map, + .transfer_flush_region = fd_resource_transfer_flush_region, + .transfer_unmap = fd_resource_transfer_unmap, + .transfer_inline_write = u_default_transfer_inline_write, +}; + +/** + * Create a new texture object, using the given template info. + */ +static struct pipe_resource * +fd_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + struct fd_screen *screen = fd_screen(pscreen); + struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); + struct pipe_resource *prsc = &rsc->base.b; + uint32_t flags, size; + + DBG("target=%d, format=%s, %ux%u@%u, array_size=%u, last_level=%u, " + "nr_samples=%u, usage=%u, bind=%x, flags=%x", + tmpl->target, util_format_name(tmpl->format), + tmpl->width0, tmpl->height0, tmpl->depth0, + tmpl->array_size, tmpl->last_level, tmpl->nr_samples, + tmpl->usage, tmpl->bind, tmpl->flags); + + if (!rsc) + return NULL; + + *prsc = *tmpl; + + pipe_reference_init(&prsc->reference, 1); + prsc->screen = pscreen; + + rsc->base.vtbl = &fd_resource_vtbl; + rsc->pitch = ALIGN(tmpl->width0, 32); + rsc->cpp = util_format_get_blocksize(tmpl->format); + + size = rsc->pitch * tmpl->height0 * rsc->cpp; + flags = DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */ + + rsc->bo = fd_bo_new(screen->dev, size, flags); + + return prsc; +} + +/** + * Create a texture from a winsys_handle. The handle is often created in + * another process by first creating a pipe texture and then calling + * resource_get_handle. + */ +static struct pipe_resource * +fd_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + struct winsys_handle *handle) +{ + struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); + struct pipe_resource *prsc = &rsc->base.b; + + DBG("target=%d, format=%s, %ux%u@%u, array_size=%u, last_level=%u, " + "nr_samples=%u, usage=%u, bind=%x, flags=%x", + tmpl->target, util_format_name(tmpl->format), + tmpl->width0, tmpl->height0, tmpl->depth0, + tmpl->array_size, tmpl->last_level, tmpl->nr_samples, + tmpl->usage, tmpl->bind, tmpl->flags); + + if (!rsc) + return NULL; + + *prsc = *tmpl; + + pipe_reference_init(&prsc->reference, 1); + prsc->screen = pscreen; + + rsc->bo = fd_screen_bo_from_handle(pscreen, handle, &rsc->pitch); + + rsc->base.vtbl = &fd_resource_vtbl; + rsc->pitch = ALIGN(tmpl->width0, 32); + + return prsc; +} + +/** + * Copy a block of pixels from one resource to another. + * The resource must be of the same format. + * Resources with nr_samples > 1 are not allowed. + */ +static void +fd_resource_copy_region(struct pipe_context *pctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + DBG("TODO: "); + // TODO +} + +/* Optimal hardware path for blitting pixels. + * Scaling, format conversion, up- and downsampling (resolve) are allowed. + */ +static void +fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) +{ + DBG("TODO: "); + // TODO +} + +void +fd_resource_screen_init(struct pipe_screen *pscreen) +{ + pscreen->resource_create = fd_resource_create; + pscreen->resource_from_handle = fd_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; +} + +void +fd_resource_context_init(struct pipe_context *pctx) +{ + pctx->transfer_map = u_transfer_map_vtbl; + pctx->transfer_flush_region = u_transfer_flush_region_vtbl; + pctx->transfer_unmap = u_transfer_unmap_vtbl; + pctx->transfer_inline_write = u_transfer_inline_write_vtbl; + pctx->create_surface = fd_create_surface; + pctx->surface_destroy = fd_surface_destroy; + pctx->resource_copy_region = fd_resource_copy_region; + pctx->blit = fd_blit; +} diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h new file mode 100644 index 00000000000..575a143309f --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -0,0 +1,51 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_RESOURCE_H_ +#define FREEDRENO_RESOURCE_H_ + +#include "util/u_transfer.h" + +struct fd_resource { + struct u_resource base; + struct fd_bo *bo; + uint32_t pitch, cpp; + uint32_t timestamp; + bool dirty; +}; + +static INLINE struct fd_resource * +fd_resource(struct pipe_resource *ptex) +{ + return (struct fd_resource *)ptex; +} + +void fd_resource_screen_init(struct pipe_screen *pscreen); +void fd_resource_context_init(struct pipe_context *pctx); + +#endif /* FREEDRENO_RESOURCE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c new file mode 100644 index 00000000000..5310fc7fdf8 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -0,0 +1,471 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_format_s3tc.h" +#include "util/u_string.h" + +#include "os/os_time.h" + +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> + +#include "freedreno_context.h" +#include "freedreno_screen.h" +#include "freedreno_resource.h" +#include "freedreno_fence.h" +#include "freedreno_util.h" + +/* XXX this should go away */ +#include "state_tracker/drm_driver.h" + +int fd_mesa_debug = 0; + +static const char * +fd_screen_get_name(struct pipe_screen *pscreen) +{ + static char buffer[128]; + util_snprintf(buffer, sizeof(buffer), "FD%03d", + fd_screen(pscreen)->device_id); + return buffer; +} + +static const char * +fd_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "freedreno"; +} + +static uint64_t +fd_screen_get_timestamp(struct pipe_screen *pscreen) +{ + int64_t cpu_time = os_time_get() * 1000; + return cpu_time + fd_screen(pscreen)->cpu_gpu_time_delta; +} + +static void +fd_screen_fence_ref(struct pipe_screen *pscreen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *pfence) +{ + fd_fence_ref(fd_fence(pfence), (struct fd_fence **)ptr); +} + +static boolean +fd_screen_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *pfence) +{ + return fd_fence_signalled(fd_fence(pfence)); +} + +static boolean +fd_screen_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *pfence, + uint64_t timeout) +{ + return fd_fence_wait(fd_fence(pfence)); +} + +static void +fd_screen_destroy(struct pipe_screen *pscreen) +{ + // TODO + DBG("TODO"); +} + +/* +EGL Version 1.4 +EGL Vendor Qualcomm, Inc +EGL Extensions EGL_QUALCOMM_shared_image EGL_KHR_image EGL_AMD_create_image EGL_KHR_lock_surface EGL_KHR_lock_surface2 EGL_KHR_fence_sync EGL_IMG_context_priorityEGL_ANDROID_image_native_buffer +GL extensions: GL_AMD_compressed_ATC_texture GL_AMD_performance_monitor GL_AMD_program_binary_Z400 GL_EXT_texture_filter_anisotropic GL_EXT_texture_format_BGRA8888 GL_EXT_texture_type_2_10_10_10_REV GL_NV_fence GL_OES_compressed_ETC1_RGB8_texture GL_OES_depth_texture GL_OES_depth24 GL_OES_EGL_image GL_OES_EGL_image_external GL_OES_element_index_uint GL_OES_fbo_render_mipmap GL_OES_fragment_precision_high GL_OES_get_program_binary GL_OES_packed_depth_stencil GL_OES_rgb8_rgba8 GL_OES_standard_derivatives GL_OES_texture_3D GL_OES_texture_float GL_OES_texture_half_float GL_OES_texture_half_float_linear GL_OES_texture_npot GL_OES_vertex_half_float GL_OES_vertex_type_10_10_10_2 GL_QCOM_alpha_test GL_QCOM_binning_control GL_QCOM_driver_control GL_QCOM_perfmon_global_mode GL_QCOM_extended_get GL_QCOM_extended_get2 GL_QCOM_tiled_rendering GL_QCOM_writeonly_rendering GL_AMD_compressed_3DC_texture +GL_MAX_3D_TEXTURE_SIZE_OES: 1024 0 0 0 +no GL_MAX_SAMPLES_ANGLE: GL_INVALID_ENUM +no GL_MAX_SAMPLES_APPLE: GL_INVALID_ENUM +GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT: 16 0 0 0 +no GL_MAX_SAMPLES_IMG: GL_INVALID_ENUM +GL_MAX_TEXTURE_SIZE: 4096 0 0 0 +GL_MAX_VIEWPORT_DIMS: 4096 4096 0 0 +GL_MAX_VERTEX_ATTRIBS: 16 0 0 0 +GL_MAX_VERTEX_UNIFORM_VECTORS: 251 0 0 0 +GL_MAX_VARYING_VECTORS: 8 0 0 0 +GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS: 20 0 0 0 +GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS: 4 0 0 0 +GL_MAX_TEXTURE_IMAGE_UNITS: 16 0 0 0 +GL_MAX_FRAGMENT_UNIFORM_VECTORS: 221 0 0 0 +GL_MAX_CUBE_MAP_TEXTURE_SIZE: 4096 0 0 0 +GL_MAX_RENDERBUFFER_SIZE: 4096 0 0 0 +no GL_TEXTURE_NUM_LEVELS_QCOM: GL_INVALID_ENUM + */ +static int +fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + /* this is probably not totally correct.. but it's a start: */ + switch (param) { + /* Supported features (boolean caps). */ + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_SM3: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_COMPUTE: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + case PIPE_CAP_USER_CONSTANT_BUFFERS: + return 1; + + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 120; + + /* Unsupported features. */ + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_SCALED_RESOLVE: + case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS: + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + return 0; + + /* Stream output. */ + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 14; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 9192; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 20; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + + /* Timer queries. */ + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_QUERY_TIMESTAMP: + return 0; + + case PIPE_CAP_MIN_TEXEL_OFFSET: + return -8; + + case PIPE_CAP_MAX_TEXEL_OFFSET: + return 7; + + default: + DBG("unknown param %d", param); + return 0; + } +} + +static float +fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 8192.0f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0f; + default: + DBG("unknown paramf %d", param); + return 0; + } +} + +static int +fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + switch(shader) + { + case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_VERTEX: + break; + case PIPE_SHADER_COMPUTE: + case PIPE_SHADER_GEOMETRY: + /* maye we could emulate.. */ + return 0; + default: + DBG("unknown shader type %d", shader); + return 0; + } + + /* this is probably not totally correct.. but it's a start: */ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 8; /* XXX */ + case PIPE_SHADER_CAP_MAX_INPUTS: + return 32; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; /* Max native temporaries. */ + case PIPE_SHADER_CAP_MAX_ADDRS: + /* XXX Isn't this equal to TEMPS? */ + return 1; /* Max native address registers */ + case PIPE_SHADER_CAP_MAX_CONSTS: + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 64; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; /* nothing uses this */ + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + case PIPE_SHADER_CAP_INTEGERS: + return 0; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return 16; + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_TGSI; + default: + DBG("unknown shader param %d", param); + return 0; + } + return 0; +} + +static boolean +fd_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + (sample_count > 1) || /* TODO add MSAA */ + !util_format_is_supported(format, usage)) { + DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", + util_format_name(format), target, sample_count, usage); + return FALSE; + } + + /* TODO figure out how to render to other formats.. */ + if ((usage & PIPE_BIND_RENDER_TARGET) && + ((format != PIPE_FORMAT_B8G8R8A8_UNORM) && + (format != PIPE_FORMAT_B8G8R8X8_UNORM))) { + DBG("not supported render target: format=%s, target=%d, sample_count=%d, usage=%x", + util_format_name(format), target, sample_count, usage); + return FALSE; + } + + if ((usage & (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_VERTEX_BUFFER)) && + (fd_pipe2surface(format) != FMT_INVALID)) { + retval |= usage & (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_VERTEX_BUFFER); + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + (fd_pipe2color(format) != COLORX_INVALID)) { + retval |= usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + (fd_pipe2depth(format) != DEPTHX_INVALID)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + (fd_pipe2index(format) != INDEX_SIZE_INVALID)) { + retval |= PIPE_BIND_INDEX_BUFFER; + } + + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + + if (retval != usage) { + DBG("not supported: format=%s, target=%d, sample_count=%d, " + "usage=%x, retval=%x", util_format_name(format), + target, sample_count, usage, retval); + } + + return retval == usage; +} + +boolean +fd_screen_bo_get_handle(struct pipe_screen *pscreen, + struct fd_bo *bo, + unsigned stride, + struct winsys_handle *whandle) +{ + whandle->stride = stride; + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + return fd_bo_get_name(bo, &whandle->handle) == 0; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = fd_bo_handle(bo); + return TRUE; + } else { + return FALSE; + } +} + +struct fd_bo * +fd_screen_bo_from_handle(struct pipe_screen *pscreen, + struct winsys_handle *whandle, + unsigned *out_stride) +{ + struct fd_screen *screen = fd_screen(pscreen); + struct fd_bo *bo; + + bo = fd_bo_from_name(screen->dev, whandle->handle); + if (!bo) { + DBG("ref name 0x%08x failed", whandle->handle); + return NULL; + } + + *out_stride = whandle->stride; + + return bo; +} + +struct pipe_screen * +fd_screen_create(struct fd_device *dev) +{ + struct fd_screen *screen = CALLOC_STRUCT(fd_screen); + struct pipe_screen *pscreen; + uint64_t val; + + char *fd_dbg = getenv("FD_MESA_DEBUG"); + if (fd_dbg) + fd_mesa_debug = atoi(fd_dbg); + + if (!screen) + return NULL; + + DBG(""); + + screen->dev = dev; + + // maybe this should be in context? + screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D); + + fd_pipe_get_param(screen->pipe, FD_GMEM_SIZE, &val); + screen->gmemsize_bytes = val; + + fd_pipe_get_param(screen->pipe, FD_DEVICE_ID, &val); + screen->device_id = val; + + pscreen = &screen->base; + + pscreen->destroy = fd_screen_destroy; + pscreen->get_param = fd_screen_get_param; + pscreen->get_paramf = fd_screen_get_paramf; + pscreen->get_shader_param = fd_screen_get_shader_param; + pscreen->context_create = fd_context_create; + pscreen->is_format_supported = fd_screen_is_format_supported; + + fd_resource_screen_init(pscreen); + + pscreen->get_name = fd_screen_get_name; + pscreen->get_vendor = fd_screen_get_vendor; + + pscreen->get_timestamp = fd_screen_get_timestamp; + + pscreen->fence_reference = fd_screen_fence_ref; + pscreen->fence_signalled = fd_screen_fence_signalled; + pscreen->fence_finish = fd_screen_fence_finish; + + util_format_s3tc_init(); + + return pscreen; +} diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h new file mode 100644 index 00000000000..720ee054f89 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -0,0 +1,70 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_SCREEN_H_ +#define FREEDRENO_SCREEN_H_ + +#include <freedreno_drmif.h> +#include <freedreno_ringbuffer.h> + +#include "pipe/p_screen.h" +#include "util/u_memory.h" + +typedef uint32_t u32; + +struct fd_bo; + +struct fd_screen { + struct pipe_screen base; + + uint32_t gmemsize_bytes; + uint32_t device_id; + + struct fd_device *dev; + struct fd_pipe *pipe; + + int64_t cpu_gpu_time_delta; +}; + +static INLINE struct fd_screen * +fd_screen(struct pipe_screen *pscreen) +{ + return (struct fd_screen *)pscreen; +} + +boolean fd_screen_bo_get_handle(struct pipe_screen *pscreen, + struct fd_bo *bo, + unsigned stride, + struct winsys_handle *whandle); +struct fd_bo * fd_screen_bo_from_handle(struct pipe_screen *pscreen, + struct winsys_handle *whandle, + unsigned *out_stride); + +struct pipe_screen * fd_screen_create(struct fd_device *dev); + +#endif /* FREEDRENO_SCREEN_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c new file mode 100644 index 00000000000..529e246514c --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -0,0 +1,641 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_helpers.h" + +#include "freedreno_state.h" +#include "freedreno_context.h" +#include "freedreno_zsa.h" +#include "freedreno_rasterizer.h" +#include "freedreno_blend.h" +#include "freedreno_program.h" +#include "freedreno_resource.h" +#include "freedreno_texture.h" +#include "freedreno_gmem.h" +#include "freedreno_util.h" + +static void +fd_set_blend_color(struct pipe_context *pctx, + const struct pipe_blend_color *blend_color) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->blend_color = *blend_color; + ctx->dirty |= FD_DIRTY_BLEND_COLOR; +} + +static void +fd_set_stencil_ref(struct pipe_context *pctx, + const struct pipe_stencil_ref *stencil_ref) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->stencil_ref =* stencil_ref; + ctx->dirty |= FD_DIRTY_STENCIL_REF; +} + +static void +fd_set_clip_state(struct pipe_context *pctx, + const struct pipe_clip_state *clip) +{ + DBG("TODO: "); +} + +static void +fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->sample_mask = (uint16_t)sample_mask; + ctx->dirty |= FD_DIRTY_SAMPLE_MASK; +} + +/* notes from calim on #dri-devel: + * index==0 will be non-UBO (ie. glUniformXYZ()) all packed together padded + * out to vec4's + * I should be able to consider that I own the user_ptr until the next + * set_constant_buffer() call, at which point I don't really care about the + * previous values. + * index>0 will be UBO's.. well, I'll worry about that later + */ +static void +fd_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index, + struct pipe_constant_buffer *cb) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_constbuf_stateobj *so = &ctx->constbuf[shader]; + + /* Note that the state tracker can unbind constant buffers by + * passing NULL here. + */ + if (unlikely(!cb)) { + so->enabled_mask &= ~(1 << index); + so->dirty_mask &= ~(1 << index); + pipe_resource_reference(&so->cb[index].buffer, NULL); + return; + } + + pipe_resource_reference(&so->cb[index].buffer, cb->buffer); + so->cb[index].buffer_offset = cb->buffer_offset; + so->cb[index].buffer_size = cb->buffer_size; + so->cb[index].user_buffer = cb->user_buffer; + + so->enabled_mask |= 1 << index; + so->dirty_mask |= 1 << index; + ctx->dirty |= FD_DIRTY_CONSTBUF; +} + +static void +fd_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *framebuffer) +{ + struct fd_context *ctx = fd_context(pctx); + struct pipe_framebuffer_state *cso = &ctx->framebuffer.base; + unsigned i; + + DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush, + cso->cbufs[0], cso->zsbuf); + + fd_context_render(pctx); + + for (i = 0; i < framebuffer->nr_cbufs; i++) + pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]); + for (; i < ctx->framebuffer.base.nr_cbufs; i++) + pipe_surface_reference(&cso->cbufs[i], NULL); + + cso->nr_cbufs = framebuffer->nr_cbufs; + cso->width = framebuffer->width; + cso->height = framebuffer->height; + + pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf); + + if (cso->nr_cbufs > 0) + fd_gmem_calculate_tiles(pctx); + + ctx->dirty |= FD_DIRTY_FRAMEBUFFER; +} + +static void +fd_set_polygon_stipple(struct pipe_context *pctx, + const struct pipe_poly_stipple *stipple) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->stipple = *stipple; + ctx->dirty |= FD_DIRTY_STIPPLE; +} + +static void +fd_set_scissor_state(struct pipe_context *pctx, + const struct pipe_scissor_state *scissor) +{ + struct fd_context *ctx = fd_context(pctx); + + ctx->scissor = *scissor; + ctx->dirty |= FD_DIRTY_SCISSOR; +} + +static void +fd_set_viewport_state(struct pipe_context *pctx, + const struct pipe_viewport_state *viewport) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->viewport = *viewport; + ctx->dirty |= FD_DIRTY_VIEWPORT; +} + +static void +fd_set_vertex_buffers(struct pipe_context *pctx, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_vertexbuf_stateobj *so = &ctx->vertexbuf; + + util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count); + so->count = util_last_bit(so->enabled_mask); + + ctx->dirty |= FD_DIRTY_VERTEXBUF; +} + +static void +fd_set_index_buffer(struct pipe_context *pctx, + const struct pipe_index_buffer *ib) +{ + struct fd_context *ctx = fd_context(pctx); + + if (ib) { + pipe_resource_reference(&ctx->indexbuf.buffer, ib->buffer); + ctx->indexbuf.index_size = ib->index_size; + ctx->indexbuf.offset = ib->offset; + ctx->indexbuf.user_buffer = ib->user_buffer; + } else { + pipe_resource_reference(&ctx->indexbuf.buffer, NULL); + } + + ctx->dirty |= FD_DIRTY_INDEXBUF; +} + +void +fd_state_init(struct pipe_context *pctx) +{ + pctx->set_blend_color = fd_set_blend_color; + pctx->set_stencil_ref = fd_set_stencil_ref; + pctx->set_clip_state = fd_set_clip_state; + pctx->set_sample_mask = fd_set_sample_mask; + pctx->set_constant_buffer = fd_set_constant_buffer; + pctx->set_framebuffer_state = fd_set_framebuffer_state; + pctx->set_polygon_stipple = fd_set_polygon_stipple; + pctx->set_scissor_state = fd_set_scissor_state; + pctx->set_viewport_state = fd_set_viewport_state; + + pctx->set_vertex_buffers = fd_set_vertex_buffers; + pctx->set_index_buffer = fd_set_index_buffer; +} + +/* NOTE: just define the position for const regs statically.. the blob + * driver doesn't seem to change these dynamically, and I can't really + * think of a good reason to so.. + */ +#define VS_CONST_BASE 0x20 +#define PS_CONST_BASE 0x120 + +static void +emit_constants(struct fd_ringbuffer *ring, uint32_t base, + struct fd_constbuf_stateobj *constbuf, + struct fd_shader_stateobj *shader) +{ + uint32_t enabled_mask = constbuf->enabled_mask; + uint32_t start_base = base; + unsigned i; + + // XXX TODO only emit dirty consts.. but we need to keep track if + // they are clobbered by a clear, gmem2mem, or mem2gmem.. + constbuf->dirty_mask = enabled_mask; + + /* emit user constants: */ + while (enabled_mask) { + unsigned index = ffs(enabled_mask) - 1; + struct pipe_constant_buffer *cb = &constbuf->cb[index]; + unsigned size = ALIGN(cb->buffer_size, 4) / 4; /* size in dwords */ + + // I expect that size should be a multiple of vec4's: + assert(size == ALIGN(size, 4)); + + /* hmm, sometimes we still seem to end up with consts bound, + * even if shader isn't using them, which ends up overwriting + * const reg's used for immediates.. this is a hack to work + * around that: + */ + if (shader && ((base - start_base) >= (shader->first_immediate * 4))) + break; + + if (constbuf->dirty_mask & (1 << index)) { + const uint32_t *dwords; + + if (cb->user_buffer) { + dwords = cb->user_buffer; + } else { + struct fd_resource *rsc = fd_resource(cb->buffer); + dwords = fd_bo_map(rsc->bo); + } + + dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset); + + OUT_PKT3(ring, CP_SET_CONSTANT, size + 1); + OUT_RING(ring, base); + for (i = 0; i < size; i++) + OUT_RING(ring, *(dwords++)); + + constbuf->dirty_mask &= ~(1 << index); + } + + base += size; + enabled_mask &= ~(1 << index); + } + + /* emit shader immediates: */ + if (shader) { + for (i = 0; i < shader->num_immediates; i++) { + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, base); + OUT_RING(ring, shader->immediates[i].val[0]); + OUT_RING(ring, shader->immediates[i].val[1]); + OUT_RING(ring, shader->immediates[i].val[2]); + OUT_RING(ring, shader->immediates[i].val[3]); + base += 4; + } + } +} + +/* this works at least for a220 and earlier.. if later gpu's gain more than + * 32 texture units, might need to bump this up to uint64_t + */ +typedef uint32_t texmask; + +static texmask +emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx, + struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted) +{ + unsigned const_idx = fd_get_const_idx(ctx, tex, samp_id); + struct fd_sampler_stateobj *sampler; + struct fd_pipe_sampler_view *view; + + if (emitted & (1 << const_idx)) + return 0; + + sampler = tex->samplers[samp_id]; + view = fd_pipe_sampler_view(tex->textures[samp_id]); + + OUT_PKT3(ring, CP_SET_CONSTANT, 7); + OUT_RING(ring, 0x00010000 + (0x6 * const_idx)); + + OUT_RING(ring, sampler->tex0 | view->tex0); + OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt); + OUT_RING(ring, view->tex2); + OUT_RING(ring, sampler->tex3 | view->tex3); + OUT_RING(ring, sampler->tex4); + OUT_RING(ring, sampler->tex5); + + return (1 << const_idx); +} + +static void +emit_textures(struct fd_ringbuffer *ring, struct fd_context *ctx) +{ + texmask emitted = 0; + unsigned i; + + for (i = 0; i < ctx->verttex.num_samplers; i++) + if (ctx->verttex.samplers[i]) + emitted |= emit_texture(ring, ctx, &ctx->verttex, i, emitted); + + for (i = 0; i < ctx->fragtex.num_samplers; i++) + if (ctx->fragtex.samplers[i]) + emitted |= emit_texture(ring, ctx, &ctx->fragtex, i, emitted); +} + +void +fd_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val, + struct fd_vertex_buf *vbufs, uint32_t n) +{ + unsigned i; + + OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n)); + OUT_RING(ring, (0x1 << 16) | (val & 0xffff)); + for (i = 0; i < n; i++) { + struct fd_resource *rsc = fd_resource(vbufs[i].prsc); + OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3); + OUT_RING (ring, vbufs[i].size); + } +} + +void +fd_emit_framebuffer_state(struct fd_ringbuffer *ring, + struct fd_framebuffer_stateobj *fb) +{ + struct pipe_framebuffer_state *pfb = &fb->base; + uint32_t reg, base; + + /* this should be true because bin_w/bin_h should be multiples of 32: */ + assert(((fb->bin_w * fb->bin_h) % 1024) == 0); + + /* depth/stencil starts after color buffer in GMEM: */ + base = (fb->bin_w * fb->bin_h) / 1024; + + OUT_PKT3(ring, CP_SET_CONSTANT, 4); + OUT_RING(ring, CP_REG(REG_RB_SURFACE_INFO)); + OUT_RING(ring, fb->bin_w); /* RB_SURFACE_INFO */ + OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(1) | /* RB_COLOR_INFO */ + RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(pfb->cbufs[0]->format))); + reg = RB_DEPTH_INFO_DEPTH_BASE(ALIGN(base, 4)); + if (pfb->zsbuf) + reg |= RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); + OUT_RING(ring, reg); /* RB_DEPTH_INFO */ +} + +void +fd_state_emit(struct pipe_context *pctx, uint32_t dirty) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_ringbuffer *ring = ctx->ring; + + /* NOTE: we probably want to eventually refactor this so each state + * object handles emitting it's own state.. although the mapping of + * state to registers is not always orthogonal, sometimes a single + * register contains bitfields coming from multiple state objects, + * so not sure the best way to deal with that yet. + */ + + if (dirty & FD_DIRTY_SAMPLE_MASK) { + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK)); + OUT_RING(ring, ctx->sample_mask); + } + + if (dirty & FD_DIRTY_ZSA) { + struct pipe_stencil_ref *sr = &ctx->stencil_ref; + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL)); + OUT_RING(ring, ctx->zsa->rb_depthcontrol); + + OUT_PKT3(ring, CP_SET_CONSTANT, 4); + OUT_RING(ring, CP_REG(REG_RB_STENCILREFMASK_BF)); + OUT_RING(ring, ctx->zsa->rb_stencilrefmask_bf | + RB_STENCILREFMASK_STENCILREF(sr->ref_value[1])); + OUT_RING(ring, ctx->zsa->rb_stencilrefmask | + RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); + OUT_RING(ring, ctx->zsa->rb_alpha_ref); + } + + if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) { + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL)); + OUT_RING(ring, ctx->rasterizer->pa_cl_clip_cntl); + OUT_RING(ring, ctx->rasterizer->pa_su_sc_mode_cntl | + ctx->framebuffer.pa_su_sc_mode_cntl); + + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, CP_REG(REG_PA_SU_POINT_SIZE)); + OUT_RING(ring, ctx->rasterizer->pa_su_point_size); + OUT_RING(ring, ctx->rasterizer->pa_su_point_minmax); + OUT_RING(ring, ctx->rasterizer->pa_su_line_cntl); + OUT_RING(ring, ctx->rasterizer->pa_sc_line_stipple); + + OUT_PKT3(ring, CP_SET_CONSTANT, 6); + OUT_RING(ring, CP_REG(REG_PA_SU_VTX_CNTL)); + OUT_RING(ring, ctx->rasterizer->pa_su_vtx_cntl); + OUT_RING(ring, f2d(1.0)); /* PA_CL_GB_VERT_CLIP_ADJ */ + OUT_RING(ring, f2d(1.0)); /* PA_CL_GB_VERT_DISC_ADJ */ + OUT_RING(ring, f2d(1.0)); /* PA_CL_GB_HORZ_CLIP_ADJ */ + OUT_RING(ring, f2d(1.0)); /* PA_CL_GB_HORZ_DISC_ADJ */ + } + + if (dirty & FD_DIRTY_FRAMEBUFFER) + fd_emit_framebuffer_state(ring, &ctx->framebuffer); + + if (dirty & FD_DIRTY_SCISSOR) { + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL)); + OUT_RING(ring, xy2d(ctx->scissor.minx, /* PA_SC_WINDOW_SCISSOR_TL */ + ctx->scissor.miny)); + OUT_RING(ring, xy2d(ctx->scissor.maxx, /* PA_SC_WINDOW_SCISSOR_BR */ + ctx->scissor.maxy)); + } + + if (dirty & FD_DIRTY_VIEWPORT) { + OUT_PKT3(ring, CP_SET_CONSTANT, 7); + OUT_RING(ring, CP_REG(REG_PA_CL_VPORT_XSCALE)); + OUT_RING(ring, f2d(ctx->viewport.scale[0])); /* PA_CL_VPORT_XSCALE */ + OUT_RING(ring, f2d(ctx->viewport.translate[0])); /* PA_CL_VPORT_XOFFSET */ + OUT_RING(ring, f2d(ctx->viewport.scale[1])); /* PA_CL_VPORT_YSCALE */ + OUT_RING(ring, f2d(ctx->viewport.translate[1])); /* PA_CL_VPORT_YOFFSET */ + OUT_RING(ring, f2d(ctx->viewport.scale[2])); /* PA_CL_VPORT_ZSCALE */ + OUT_RING(ring, f2d(ctx->viewport.translate[2])); /* PA_CL_VPORT_ZOFFSET */ + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_CL_VTE_CNTL)); + OUT_RING(ring, PA_CL_VTE_CNTL_VTX_W0_FMT | + PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | + PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | + PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | + PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA | + PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA | + PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA); + } + + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTX | FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) { + fd_program_validate(ctx); + fd_program_emit(ring, &ctx->prog); + } + + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) { + emit_constants(ring, VS_CONST_BASE * 4, + &ctx->constbuf[PIPE_SHADER_VERTEX], + (dirty & FD_DIRTY_PROG) ? ctx->prog.vp : NULL); + emit_constants(ring, PS_CONST_BASE * 4, + &ctx->constbuf[PIPE_SHADER_FRAGMENT], + (dirty & FD_DIRTY_PROG) ? ctx->prog.fp : NULL); + } + + if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) { + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COLORCONTROL)); + OUT_RING(ring, ctx->zsa->rb_colorcontrol | ctx->blend->rb_colorcontrol); + } + + if (dirty & FD_DIRTY_BLEND) { + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_BLEND_CONTROL)); + OUT_RING(ring, ctx->blend->rb_blendcontrol); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COLOR_MASK)); + OUT_RING(ring, ctx->blend->rb_colormask); + } + + if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG)) + emit_textures(ring, ctx); + + ctx->dirty &= ~dirty; +} + +/* emit per-context initialization: + */ +void +fd_state_emit_setup(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_ringbuffer *ring = ctx->ring; + + OUT_PKT0(ring, REG_TP0_CHICKEN, 1); + OUT_RING(ring, 0x00000002); + + OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); + OUT_RING(ring, 0x00007fff); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_SQ_VS_CONST)); + OUT_RING(ring, SQ_VS_CONST_BASE(VS_CONST_BASE) | + SQ_VS_CONST_SIZE(0x100)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_SQ_PS_CONST)); + OUT_RING(ring, SQ_PS_CONST_BASE(PS_CONST_BASE) | + SQ_PS_CONST_SIZE(0xe0)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_VGT_MAX_VTX_INDX)); + OUT_RING(ring, 0xffffffff); /* VGT_MAX_VTX_INDX */ + OUT_RING(ring, 0x00000000); /* VGT_MIN_VTX_INDX */ + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET)); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x0000003b); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_SQ_CONTEXT_MISC)); + OUT_RING(ring, SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_SQ_INTERPOLATOR_CNTL)); + OUT_RING(ring, 0xffffffff); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SC_AA_CONFIG)); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SC_LINE_CNTL)); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_OFFSET)); + OUT_RING(ring, 0x00000000); + + // XXX we change this dynamically for draw/clear.. vs gmem<->mem.. + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_MODECONTROL)); + OUT_RING(ring, RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_SAMPLE_POS)); + OUT_RING(ring, 0x88888888); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COLOR_DEST_MASK)); + OUT_RING(ring, 0xffffffff); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COPY_DEST_INFO)); + OUT_RING(ring, RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) | + RB_COPY_DEST_INFO_WRITE_RED | + RB_COPY_DEST_INFO_WRITE_GREEN | + RB_COPY_DEST_INFO_WRITE_BLUE | + RB_COPY_DEST_INFO_WRITE_ALPHA); + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_SQ_WRAPPING_0)); + OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_0 */ + OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_1 */ + + OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_WAIT_REG_EQ, 4); + OUT_RING(ring, 0x000005d0); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x5f601000); + OUT_RING(ring, 0x00000001); + + OUT_PKT0(ring, REG_SQ_INST_STORE_MANAGMENT, 1); + OUT_RING(ring, 0x00000180); + + OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); + OUT_RING(ring, 0x00000300); + + OUT_PKT3(ring, CP_SET_SHADER_BASES, 1); + OUT_RING(ring, 0x80000180); + + /* not sure what this form of CP_SET_CONSTANT is.. */ + OUT_PKT3(ring, CP_SET_CONSTANT, 13); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x469c4000); + OUT_RING(ring, 0x3f800000); + OUT_RING(ring, 0x3f000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x40000000); + OUT_RING(ring, 0x3f400000); + OUT_RING(ring, 0x3ec00000); + OUT_RING(ring, 0x3e800000); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_RB_COLOR_MASK)); + OUT_RING(ring, RB_COLOR_MASK_WRITE_RED | + RB_COLOR_MASK_WRITE_GREEN | + RB_COLOR_MASK_WRITE_BLUE | + RB_COLOR_MASK_WRITE_ALPHA); + + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, CP_REG(REG_RB_BLEND_RED)); + OUT_RING(ring, 0x00000000); /* RB_BLEND_RED */ + OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */ + OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */ + OUT_RING(ring, 0x000000ff); /* RB_BLEND_ALPHA */ + + fd_ringbuffer_flush(ring); + fd_ringmarker_mark(ctx->draw_start); +} diff --git a/src/gallium/drivers/freedreno/freedreno_state.h b/src/gallium/drivers/freedreno/freedreno_state.h new file mode 100644 index 00000000000..4e68448f229 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_state.h @@ -0,0 +1,53 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_STATE_H_ +#define FREEDRENO_STATE_H_ + +#include "pipe/p_context.h" + +struct fd_vertexbuf_stateobj; +struct fd_zsa_stateobj; +struct fd_framebuffer_stateobj; +struct fd_ringbuffer; + +void fd_state_init(struct pipe_context *pctx); + +struct fd_vertex_buf { + unsigned offset, size; + struct pipe_resource *prsc; +}; + +void fd_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val, + struct fd_vertex_buf *vbufs, uint32_t n); +void fd_emit_framebuffer_state(struct fd_ringbuffer *ring, + struct fd_framebuffer_stateobj *fb); +void fd_state_emit(struct pipe_context *pctx, uint32_t dirty); +void fd_state_emit_setup(struct pipe_context *pctx); + +#endif /* FREEDRENO_STATE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_surface.c b/src/gallium/drivers/freedreno/freedreno_surface.c new file mode 100644 index 00000000000..250fe4bc0f5 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_surface.c @@ -0,0 +1,73 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "freedreno_surface.h" +#include "freedreno_resource.h" +#include "freedreno_util.h" + +#include "util/u_memory.h" +#include "util/u_inlines.h" + +struct pipe_surface * +fd_create_surface(struct pipe_context *pctx, + struct pipe_resource *ptex, + const struct pipe_surface *surf_tmpl) +{ +// struct fd_resource* tex = fd_resource(ptex); + struct fd_surface* surface = CALLOC_STRUCT(fd_surface); + + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + + if (surface) { + struct pipe_surface *psurf = &surface->base; + unsigned level = surf_tmpl->u.tex.level; + + pipe_reference_init(&psurf->reference, 1); + pipe_resource_reference(&psurf->texture, ptex); + + psurf->context = pctx; + psurf->format = surf_tmpl->format; + psurf->width = u_minify(ptex->width0, level); + psurf->height = u_minify(ptex->height0, level); + psurf->u.tex.level = level; + psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + + // TODO + DBG("TODO: %ux%u", psurf->width, psurf->height); + } + + return &surface->base; +} + +void +fd_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf) +{ + pipe_resource_reference(&psurf->texture, NULL); + FREE(psurf); +} diff --git a/src/gallium/drivers/freedreno/freedreno_surface.h b/src/gallium/drivers/freedreno/freedreno_surface.h new file mode 100644 index 00000000000..3293f33dd84 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_surface.h @@ -0,0 +1,54 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_SURFACE_H_ +#define FREEDRENO_SURFACE_H_ + +#include "pipe/p_state.h" + +struct fd_surface { + struct pipe_surface base; + uint32_t offset; + uint32_t pitch; + uint32_t width; + uint16_t height; + uint16_t depth; +}; + +static INLINE struct fd_surface * +fd_surface(struct pipe_surface *psurf) +{ + return (struct fd_surface *)psurf; +} + +struct pipe_surface* fd_create_surface(struct pipe_context *pctx, + struct pipe_resource *ptex, + const struct pipe_surface *surf_tmpl); +void fd_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf); + +#endif /* FREEDRENO_SURFACE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c new file mode 100644 index 00000000000..07bfbd3ce36 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_texture.c @@ -0,0 +1,286 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "freedreno_texture.h" +#include "freedreno_util.h" + +static enum sq_tex_clamp +tex_clamp(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return SQ_TEX_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return SQ_TEX_CLAMP_LAST_TEXEL; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return SQ_TEX_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return SQ_TEX_MIRROR; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return SQ_TEX_MIRROR_ONCE_BORDER; + default: + DBG("invalid wrap: %u", wrap); + return 0; + } +} + +static enum sq_tex_filter +tex_filter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return SQ_TEX_FILTER_POINT; + case PIPE_TEX_FILTER_LINEAR: + return SQ_TEX_FILTER_BILINEAR; + default: + DBG("invalid filter: %u", filter); + return 0; + } +} + +static void * +fd_sampler_state_create(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + struct fd_sampler_stateobj *so = CALLOC_STRUCT(fd_sampler_stateobj); + + if (!so) + return NULL; + + so->base = *cso; + + /* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */ + so->tex0 = + SQ_TEX0_CLAMP_X(tex_clamp(cso->wrap_s)) | + SQ_TEX0_CLAMP_Y(tex_clamp(cso->wrap_t)) | + SQ_TEX0_CLAMP_Z(tex_clamp(cso->wrap_r)); + + so->tex3 = + SQ_TEX3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) | + SQ_TEX3_XY_MIN_FILTER(tex_filter(cso->min_img_filter)); + + so->tex4 = 0x00000000; /* ??? */ + so->tex5 = 0x00000200; /* ??? */ + + return so; +} + +static void +fd_sampler_state_delete(struct pipe_context *pctx, void *hwcso) +{ + FREE(hwcso); +} + +static struct pipe_sampler_view * +fd_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct fd_pipe_sampler_view *so = CALLOC_STRUCT(fd_pipe_sampler_view); + struct fd_resource *rsc = fd_resource(prsc); + + if (!so) + return NULL; + + so->base = *cso; + pipe_reference(NULL, &prsc->reference); + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + so->tex_resource = rsc; + so->fmt = fd_pipe2surface(cso->format); + + so->tex0 = SQ_TEX0_PITCH(rsc->pitch); + so->tex2 = + SQ_TEX2_HEIGHT(prsc->height0) | + SQ_TEX2_WIDTH(prsc->width0); + so->tex3 = fd_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, + cso->swizzle_b, cso->swizzle_a); + + return &so->base; +} + +static void +fd_sampler_view_destroy(struct pipe_context *pctx, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + FREE(view); +} + +static void bind_sampler_states(struct fd_texture_stateobj *prog, + unsigned nr, void **hwcso) +{ + unsigned i; + + for (i = 0; i < nr; i++) { + prog->samplers[i] = hwcso[i]; + prog->dirty_samplers |= (1 << i); + } + + for (; i < prog->num_samplers; i++) { + prog->samplers[i] = NULL; + prog->dirty_samplers |= (1 << i); + } + + prog->num_samplers = nr; +} + +static void set_sampler_views(struct fd_texture_stateobj *prog, + unsigned nr, struct pipe_sampler_view **views) +{ + unsigned i; + + for (i = 0; i < nr; i++) { + pipe_sampler_view_reference(&prog->textures[i], views[i]); + prog->dirty_samplers |= (1 << i); + } + + for (; i < prog->num_textures; i++) { + pipe_sampler_view_reference(&prog->textures[i], NULL); + prog->dirty_samplers |= (1 << i); + } + + prog->num_textures = nr; +} + +static void +fd_fragtex_sampler_states_bind(struct pipe_context *pctx, + unsigned nr, void **hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + bind_sampler_states(&ctx->fragtex, nr, hwcso); + ctx->dirty |= FD_DIRTY_FRAGTEX; +} + + +static void +fd_fragtex_set_sampler_views(struct pipe_context *pctx, unsigned nr, + struct pipe_sampler_view **views) +{ + struct fd_context *ctx = fd_context(pctx); + set_sampler_views(&ctx->fragtex, nr, views); + ctx->dirty |= FD_DIRTY_FRAGTEX; +} + +static void +fd_verttex_sampler_states_bind(struct pipe_context *pctx, + unsigned nr, void **hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + bind_sampler_states(&ctx->verttex, nr, hwcso); + ctx->dirty |= FD_DIRTY_VERTTEX; +} + + +static void +fd_verttex_set_sampler_views(struct pipe_context *pctx, unsigned nr, + struct pipe_sampler_view **views) +{ + struct fd_context *ctx = fd_context(pctx); + set_sampler_views(&ctx->verttex, nr, views); + ctx->dirty |= FD_DIRTY_VERTTEX; +} + +static bool +tex_cmp(struct fd_texture_stateobj *tex1, unsigned samp_id1, + struct fd_texture_stateobj *tex2, unsigned samp_id2) +{ + if ((samp_id1 >= tex1->num_samplers) || + (samp_id2 >= tex2->num_samplers)) + return false; + + if ((tex1 == tex2) && (samp_id1 == samp_id2)) + return true; + + if (tex1->textures[samp_id1]->texture != tex2->textures[samp_id2]->texture) + return false; + + if (memcmp(&tex1->samplers[samp_id1]->base, &tex2->samplers[samp_id2]->base, + sizeof(tex1->samplers[samp_id1]->base))) + return false; + + return true; +} + +/* map gallium sampler-id to hw const-idx.. adreno uses a flat address + * space of samplers (const-idx), so we need to map the gallium sampler-id + * which is per-shader to a global const-idx space. + */ +unsigned +fd_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex, + unsigned samp_id) +{ + unsigned i, const_idx = 0; + + /* TODO maybe worth having some sort of cache, because we need to + * do this loop thru all the samplers both when patching shaders + * and also when emitting sampler state.. + */ + + for (i = 0; i < ctx->verttex.num_samplers; i++) { + if (tex_cmp(&ctx->verttex, i, tex, samp_id)) + return const_idx; + const_idx++; + } + + for (i = 0; i < ctx->fragtex.num_samplers; i++) { + if (tex_cmp(&ctx->fragtex, i, tex, samp_id)) + return const_idx; + const_idx++; + } + + return const_idx; +} + +void +fd_texture_init(struct pipe_context *pctx) +{ + pctx->create_sampler_state = fd_sampler_state_create; + pctx->delete_sampler_state = fd_sampler_state_delete; + + pctx->create_sampler_view = fd_sampler_view_create; + pctx->sampler_view_destroy = fd_sampler_view_destroy; + + pctx->bind_fragment_sampler_states = fd_fragtex_sampler_states_bind; + pctx->set_fragment_sampler_views = fd_fragtex_set_sampler_views; + + pctx->bind_vertex_sampler_states = fd_verttex_sampler_states_bind; + pctx->set_vertex_sampler_views = fd_verttex_set_sampler_views; +} diff --git a/src/gallium/drivers/freedreno/freedreno_texture.h b/src/gallium/drivers/freedreno/freedreno_texture.h new file mode 100644 index 00000000000..32bdb031ae5 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_texture.h @@ -0,0 +1,61 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_TEXTURE_H_ +#define FREEDRENO_TEXTURE_H_ + +#include "pipe/p_context.h" + +#include "freedreno_context.h" +#include "freedreno_resource.h" +#include "freedreno_util.h" + +struct fd_sampler_stateobj { + struct pipe_sampler_state base; + uint32_t tex0, tex3, tex4, tex5; +}; + +struct fd_pipe_sampler_view { + struct pipe_sampler_view base; + struct fd_resource *tex_resource; + enum sq_surfaceformat fmt; + uint32_t tex0, tex2, tex3; +}; + +static INLINE struct fd_pipe_sampler_view * +fd_pipe_sampler_view(struct pipe_sampler_view *pview) +{ + return (struct fd_pipe_sampler_view *)pview; +} + +unsigned fd_get_const_idx(struct fd_context *ctx, + struct fd_texture_stateobj *tex, unsigned samp_id); + +void fd_texture_init(struct pipe_context *pctx); + +#endif /* FREEDRENO_TEXTURE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_util.c b/src/gallium/drivers/freedreno/freedreno_util.c new file mode 100644 index 00000000000..3bc3e798e3f --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_util.c @@ -0,0 +1,351 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "util/u_format.h" + +#include "freedreno_util.h" + +enum sq_surfaceformat +fd_pipe2surface(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_A8_SNORM: + case PIPE_FORMAT_A8_UINT: + case PIPE_FORMAT_A8_SINT: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_I8_SNORM: + case PIPE_FORMAT_I8_UINT: + case PIPE_FORMAT_I8_SINT: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SNORM: + case PIPE_FORMAT_L8_UINT: + case PIPE_FORMAT_L8_SINT: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8_UINT: + case PIPE_FORMAT_R8_SINT: + return FMT_8; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return FMT_5_6_5; + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return FMT_1_5_5_5; + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return FMT_4_4_4_4; + case PIPE_FORMAT_Z16_UNORM: + return FMT_16; + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SNORM: + case PIPE_FORMAT_L8A8_UINT: + case PIPE_FORMAT_L8A8_SINT: + case PIPE_FORMAT_L8A8_SRGB: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8_UINT: + case PIPE_FORMAT_R8G8_SINT: + return FMT_8_8; + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16_UINT: + case PIPE_FORMAT_R16_SINT: + case PIPE_FORMAT_A16_UNORM: + case PIPE_FORMAT_A16_SNORM: + case PIPE_FORMAT_A16_UINT: + case PIPE_FORMAT_A16_SINT: + case PIPE_FORMAT_L16_UNORM: + case PIPE_FORMAT_L16_SNORM: + case PIPE_FORMAT_L16_UINT: + case PIPE_FORMAT_L16_SINT: + case PIPE_FORMAT_I16_UNORM: + case PIPE_FORMAT_I16_SNORM: + case PIPE_FORMAT_I16_UINT: + case PIPE_FORMAT_I16_SINT: + return FMT_16; + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_A16_FLOAT: + case PIPE_FORMAT_L16_FLOAT: + case PIPE_FORMAT_I16_FLOAT: + return FMT_16_FLOAT; + + /* 32-bit buffers. */ + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SINT: + case PIPE_FORMAT_R8G8B8A8_UINT: + return FMT_8_8_8_8; + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_B10G10R10A2_UINT: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return FMT_2_10_10_10; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return FMT_24_8; + case PIPE_FORMAT_R32_UINT: + case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_A32_UINT: + case PIPE_FORMAT_A32_SINT: + case PIPE_FORMAT_L32_UINT: + case PIPE_FORMAT_L32_SINT: + case PIPE_FORMAT_I32_UINT: + case PIPE_FORMAT_I32_SINT: + return FMT_32; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_A32_FLOAT: + case PIPE_FORMAT_L32_FLOAT: + case PIPE_FORMAT_I32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: + return FMT_32_FLOAT; + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_L16A16_FLOAT: + return FMT_16_16_FLOAT; + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_UINT: + case PIPE_FORMAT_R16G16_SINT: + case PIPE_FORMAT_L16A16_UNORM: + case PIPE_FORMAT_L16A16_SNORM: + case PIPE_FORMAT_L16A16_UINT: + case PIPE_FORMAT_L16A16_SINT: + return FMT_16_16; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16A16_UINT: + case PIPE_FORMAT_R16G16B16A16_SINT: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + return FMT_16_16_16_16; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return FMT_16_16_16_16_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_L32A32_FLOAT: + return FMT_32_32_FLOAT; + case PIPE_FORMAT_R32G32_SINT: + case PIPE_FORMAT_R32G32_UINT: + case PIPE_FORMAT_L32A32_UINT: + case PIPE_FORMAT_L32A32_SINT: + return FMT_32_32; + + /* 96-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + return FMT_32_32_32_FLOAT; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + case PIPE_FORMAT_R32G32B32A32_SINT: + case PIPE_FORMAT_R32G32B32A32_UINT: + return FMT_32_32_32_32; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return FMT_32_32_32_32_FLOAT; + + /* YUV buffers. */ + case PIPE_FORMAT_UYVY: + return FMT_Cr_Y1_Cb_Y0; + case PIPE_FORMAT_YUYV: + return FMT_Y1_Cr_Y0_Cb; + + default: + return FMT_INVALID; + } +} + +enum rb_colorformatx +fd_pipe2color(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_A8_SNORM: + case PIPE_FORMAT_A8_UINT: + case PIPE_FORMAT_A8_SINT: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_I8_SNORM: + case PIPE_FORMAT_I8_UINT: + case PIPE_FORMAT_I8_SINT: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SNORM: + case PIPE_FORMAT_L8_UINT: + case PIPE_FORMAT_L8_SINT: + case PIPE_FORMAT_L8_SRGB: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8_UINT: + case PIPE_FORMAT_R8_SINT: + return COLORX_8; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return COLORX_5_6_5; + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return COLORX_1_5_5_5; + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return COLORX_4_4_4_4; + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SNORM: + case PIPE_FORMAT_L8A8_UINT: + case PIPE_FORMAT_L8A8_SINT: + case PIPE_FORMAT_L8A8_SRGB: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8_UINT: + case PIPE_FORMAT_R8G8_SINT: + case PIPE_FORMAT_Z16_UNORM: + return COLORX_8_8; + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_A16_FLOAT: + case PIPE_FORMAT_L16_FLOAT: + case PIPE_FORMAT_I16_FLOAT: + return COLORX_16_FLOAT; + + /* 32-bit buffers. */ + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SINT: + case PIPE_FORMAT_R8G8B8A8_UINT: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24X8_UNORM: + return COLORX_8_8_8_8; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_A32_FLOAT: + case PIPE_FORMAT_L32_FLOAT: + case PIPE_FORMAT_I32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: + return COLORX_32_FLOAT; + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_L16A16_FLOAT: + return COLORX_16_16_FLOAT; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return COLORX_16_16_16_16_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_L32A32_FLOAT: + return COLORX_32_32_FLOAT; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return COLORX_32_32_32_32_FLOAT; + + default: + return COLORX_INVALID; + } +} + +enum rb_depth_format +fd_pipe2depth(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return DEPTHX_16; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return DEPTHX_24_8; + default: + return DEPTHX_INVALID; + } +} + +enum pc_di_index_size +fd_pipe2index(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_I8_UINT: + return INDEX_SIZE_8_BIT; + case PIPE_FORMAT_I16_UINT: + return INDEX_SIZE_16_BIT; + case PIPE_FORMAT_I32_UINT: + return INDEX_SIZE_32_BIT; + default: + return INDEX_SIZE_INVALID; + } +} + +static inline enum sq_tex_swiz +tex_swiz(unsigned swiz) +{ + switch (swiz) { + default: + case PIPE_SWIZZLE_RED: return SQ_TEX_X; + case PIPE_SWIZZLE_GREEN: return SQ_TEX_Y; + case PIPE_SWIZZLE_BLUE: return SQ_TEX_Z; + case PIPE_SWIZZLE_ALPHA: return SQ_TEX_W; + case PIPE_SWIZZLE_ZERO: return SQ_TEX_ZERO; + case PIPE_SWIZZLE_ONE: return SQ_TEX_ONE; + } +} + +uint32_t +fd_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, + unsigned swizzle_b, unsigned swizzle_a) +{ + const struct util_format_description *desc = + util_format_description(format); + uint8_t swiz[] = { + swizzle_r, swizzle_g, swizzle_b, swizzle_a, + PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ONE, + PIPE_SWIZZLE_ONE, PIPE_SWIZZLE_ONE, + }; + + return SQ_TEX3_SWIZ_X(tex_swiz(swiz[desc->swizzle[0]])) | + SQ_TEX3_SWIZ_Y(tex_swiz(swiz[desc->swizzle[1]])) | + SQ_TEX3_SWIZ_Z(tex_swiz(swiz[desc->swizzle[2]])) | + SQ_TEX3_SWIZ_W(tex_swiz(swiz[desc->swizzle[3]])); +} diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h new file mode 100644 index 00000000000..fb1e392bc1e --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -0,0 +1,124 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_UTIL_H_ +#define FREEDRENO_UTIL_H_ + +#include <freedreno_drmif.h> +#include <freedreno_ringbuffer.h> + +#include "pipe/p_format.h" +#include "util/u_debug.h" + +#include "freedreno_pm4.h" +#include "freedreno_a2xx_reg.h" + +enum sq_surfaceformat fd_pipe2surface(enum pipe_format format); +enum rb_colorformatx fd_pipe2color(enum pipe_format format); +enum rb_depth_format fd_pipe2depth(enum pipe_format format); +enum pc_di_index_size fd_pipe2index(enum pipe_format format); +uint32_t fd_tex_swiz(enum pipe_format format, unsigned swizzle_r, + unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); + + +#define FD_DBG_MSGS 0x1 +#define FD_DBG_DISASM 0x2 +extern int fd_mesa_debug; + +#define DBG(fmt, ...) \ + do { if (fd_mesa_debug & FD_DBG_MSGS) \ + debug_printf("%s:%d: "fmt "\n", \ + __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) + +#define ALIGN(v,a) (((v) + (a) - 1) & ~((a) - 1)) +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + + +#define LOG_DWORDS 0 + + +static inline void +OUT_RING(struct fd_ringbuffer *ring, uint32_t data) +{ + if (LOG_DWORDS) { + DBG("ring[%p]: OUT_RING %04x: %08x", ring, + (uint32_t)(ring->cur - ring->last_start), data); + } + *(ring->cur++) = data; +} + +static inline void +OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, + uint32_t offset, uint32_t or) +{ + if (LOG_DWORDS) { + DBG("ring[%p]: OUT_RELOC %04x: %p+%u", ring, + (uint32_t)(ring->cur - ring->last_start), bo, offset); + } + fd_ringbuffer_emit_reloc(ring, bo, offset, or); +} + +static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) +{ + if ((ring->cur + ndwords) >= ring->end) { + /* this probably won't really work if we have multiple tiles.. + * but it is ok for 2d.. we might need different behavior + * depending on 2d or 3d pipe. + */ + DBG("uh oh.."); + } +} + +static inline void +OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); +} + +static inline void +OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); +} + +static inline void +OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start, + struct fd_ringmarker *end) +{ + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); + fd_ringbuffer_emit_reloc_ring(ring, start); + OUT_RING(ring, fd_ringmarker_dwords(start, end)); +} + +#endif /* FREEDRENO_UTIL_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_vbo.c b/src/gallium/drivers/freedreno/freedreno_vbo.c new file mode 100644 index 00000000000..fc33539e213 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_vbo.c @@ -0,0 +1,232 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + +#include "freedreno_vbo.h" +#include "freedreno_context.h" +#include "freedreno_state.h" +#include "freedreno_zsa.h" +#include "freedreno_resource.h" +#include "freedreno_util.h" + + +static void * +fd_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct fd_vertex_stateobj *so = CALLOC_STRUCT(fd_vertex_stateobj); + + if (!so) + return NULL; + + memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + so->num_elements = num_elements; + + return so; +} + +static void +fd_vertex_state_delete(struct pipe_context *pctx, void *hwcso) +{ + FREE(hwcso); +} + +static void +fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->vtx = hwcso; + ctx->dirty |= FD_DIRTY_VTX; +} + +static void +emit_cacheflush(struct fd_ringbuffer *ring) +{ + unsigned i; + + for (i = 0; i < 12; i++) { + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, CACHE_FLUSH); + } +} + +static enum pc_di_primtype +mode2primtype(unsigned mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: return DI_PT_POINTLIST; + case PIPE_PRIM_LINES: return DI_PT_LINELIST; + case PIPE_PRIM_LINE_STRIP: return DI_PT_LINESTRIP; + case PIPE_PRIM_TRIANGLES: return DI_PT_TRILIST; + case PIPE_PRIM_TRIANGLE_STRIP: return DI_PT_TRISTRIP; + case PIPE_PRIM_TRIANGLE_FAN: return DI_PT_TRIFAN; + case PIPE_PRIM_QUADS: return DI_PT_QUADLIST; + case PIPE_PRIM_QUAD_STRIP: return DI_PT_QUADSTRIP; + case PIPE_PRIM_POLYGON: return DI_PT_POLYGON; + } + DBG("unsupported mode: (%s) %d", u_prim_name(mode), mode); + assert(0); + return DI_PT_NONE; +} + +static enum pc_di_index_size +size2indextype(unsigned index_size) +{ + switch (index_size) { + case 1: return INDEX_SIZE_8_BIT; + case 2: return INDEX_SIZE_16_BIT; + case 4: return INDEX_SIZE_32_BIT; + } + DBG("unsupported index size: %d", index_size); + assert(0); + return INDEX_SIZE_IGN; +} + +static void +emit_vertexbufs(struct fd_context *ctx, unsigned count) +{ + struct fd_vertex_stateobj *vtx = ctx->vtx; + struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf; + struct fd_vertex_buf bufs[PIPE_MAX_ATTRIBS]; + unsigned i; + + if (!vtx->num_elements) + return; + + for (i = 0; i < vtx->num_elements; i++) { + struct pipe_vertex_element *elem = &vtx->pipe[i]; + struct pipe_vertex_buffer *vb = + &vertexbuf->vb[elem->vertex_buffer_index]; + bufs[i].offset = vb->buffer_offset; + bufs[i].size = count * vb->stride; + bufs[i].prsc = vb->buffer; + } + + // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the + // CONST(20,0) (or CONST(26,0) in soliv_vp) + + fd_emit_vertex_bufs(ctx->ring, 0x78, bufs, vtx->num_elements); +} + +static void +fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) +{ + struct fd_context *ctx = fd_context(pctx); + struct pipe_framebuffer_state *fb = &ctx->framebuffer.base; + struct fd_ringbuffer *ring = ctx->ring; + struct fd_bo *idx_bo = NULL; + enum pc_di_index_size idx_type = INDEX_SIZE_IGN; + enum pc_di_src_sel src_sel; + uint32_t idx_size, idx_offset; + unsigned buffers; + + ctx->needs_flush = true; + + if (info->indexed) { + struct pipe_index_buffer *idx = &ctx->indexbuf; + + assert(!idx->user_buffer); + + idx_bo = fd_resource(idx->buffer)->bo; + idx_type = size2indextype(idx->index_size); + idx_size = idx->index_size * info->count; + idx_offset = idx->offset; + src_sel = DI_SRC_SEL_DMA; + } else { + idx_bo = NULL; + idx_type = INDEX_SIZE_IGN; + idx_size = 0; + idx_offset = 0; + src_sel = DI_SRC_SEL_AUTO_INDEX; + } + + fd_resource(fb->cbufs[0]->texture)->dirty = true; + + /* figure out the buffers we need: */ + buffers = FD_BUFFER_COLOR; + if (fd_depth_enabled(ctx->zsa)) { + buffers |= FD_BUFFER_DEPTH; + fd_resource(fb->zsbuf->texture)->dirty = true; + } + if (fd_stencil_enabled(ctx->zsa)) { + buffers |= FD_BUFFER_STENCIL; + fd_resource(fb->zsbuf->texture)->dirty = true; + } + + /* any buffers that haven't been cleared, we need to restore: */ + ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared); + /* and any buffers used, need to be resolved: */ + ctx->resolve |= buffers; + + fd_state_emit(pctx, ctx->dirty); + + emit_vertexbufs(ctx, info->count); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET)); + OUT_RING(ring, info->start); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x0000003b); + + OUT_PKT0(ring, REG_TC_CNTL_STATUS, 1); + OUT_RING(ring, TC_CNTL_STATUS_L2_INVALIDATE); + + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x0000000); + + OUT_PKT3(ring, CP_DRAW_INDX, info->indexed ? 5 : 3); + OUT_RING(ring, 0x00000000); /* viz query info. */ + OUT_RING(ring, DRAW(mode2primtype(info->mode), + src_sel, idx_type, IGNORE_VISIBILITY)); + OUT_RING(ring, info->count); /* NumIndices */ + if (info->indexed) { + OUT_RELOC(ring, idx_bo, idx_offset, 0); + OUT_RING (ring, idx_size); + } + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_2010)); + OUT_RING(ring, 0x00000000); + + emit_cacheflush(ring); +} + +void +fd_vbo_init(struct pipe_context *pctx) +{ + pctx->create_vertex_elements_state = fd_vertex_state_create; + pctx->delete_vertex_elements_state = fd_vertex_state_delete; + pctx->bind_vertex_elements_state = fd_vertex_state_bind; + pctx->draw_vbo = fd_draw_vbo; +} diff --git a/src/gallium/drivers/freedreno/freedreno_vbo.h b/src/gallium/drivers/freedreno/freedreno_vbo.h new file mode 100644 index 00000000000..081edf5b8b5 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_vbo.h @@ -0,0 +1,42 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_VBO_H_ +#define FREEDRENO_VBO_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +struct fd_vertex_stateobj { + struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; + unsigned num_elements; +}; + +void fd_vbo_init(struct pipe_context *pctx); + +#endif /* FREEDRENO_VBO_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_zsa.c b/src/gallium/drivers/freedreno/freedreno_zsa.c new file mode 100644 index 00000000000..e8daa37a358 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_zsa.c @@ -0,0 +1,144 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "freedreno_zsa.h" +#include "freedreno_context.h" +#include "freedreno_util.h" + +static enum rb_stencil_op +stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return STENCIL_KEEP; + case PIPE_STENCIL_OP_ZERO: + return STENCIL_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return STENCIL_REPLACE; + case PIPE_STENCIL_OP_INCR: + return STENCIL_INCR_CLAMP; + case PIPE_STENCIL_OP_DECR: + return STENCIL_DECR_CLAMP; + case PIPE_STENCIL_OP_INCR_WRAP: + return STENCIL_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: + return STENCIL_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return STENCIL_INVERT; + default: + DBG("invalid stencil op: %u", op); + return 0; + } +} + +static void * +fd_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct fd_zsa_stateobj *so; + + so = CALLOC_STRUCT(fd_zsa_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + so->rb_depthcontrol |= + RB_DEPTHCONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */ + + if (cso->depth.enabled) + so->rb_depthcontrol |= RB_DEPTHCONTROL_Z_ENABLE; + if (cso->depth.writemask) + so->rb_depthcontrol |= RB_DEPTHCONTROL_Z_WRITE_ENABLE; + + if (cso->stencil[0].enabled) { + const struct pipe_stencil_state *s = &cso->stencil[0]; + + so->rb_depthcontrol |= + RB_DEPTHCONTROL_STENCIL_ENABLE | + RB_DEPTHCONTROL_STENCILFUNC(s->func) | /* maps 1:1 */ + RB_DEPTHCONTROL_STENCILFAIL(stencil_op(s->fail_op)) | + RB_DEPTHCONTROL_STENCILZPASS(stencil_op(s->zpass_op)) | + RB_DEPTHCONTROL_STENCILZFAIL(stencil_op(s->zfail_op)); + so->rb_stencilrefmask |= + 0xff000000 | /* ??? */ + RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) | + RB_STENCILREFMASK_STENCILMASK(s->valuemask); + + if (cso->stencil[1].enabled) { + const struct pipe_stencil_state *bs = &cso->stencil[1]; + + so->rb_depthcontrol |= + RB_DEPTHCONTROL_BACKFACE_ENABLE | + RB_DEPTHCONTROL_STENCILFUNC_BF(bs->func) | /* maps 1:1 */ + RB_DEPTHCONTROL_STENCILFAIL_BF(stencil_op(bs->fail_op)) | + RB_DEPTHCONTROL_STENCILZPASS_BF(stencil_op(bs->zpass_op)) | + RB_DEPTHCONTROL_STENCILZFAIL_BF(stencil_op(bs->zfail_op)); + so->rb_stencilrefmask_bf |= + 0xff000000 | /* ??? */ + RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) | + RB_STENCILREFMASK_STENCILMASK(bs->valuemask); + } + } + + if (cso->alpha.enabled) { + so->rb_colorcontrol = + RB_COLORCONTROL_ALPHA_FUNC(cso->alpha.func) | + RB_COLORCONTROL_ALPHA_TEST_ENABLE; + so->rb_alpha_ref = f2d(cso->alpha.ref_value); + } + + return so; +} + +static void +fd_zsa_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->zsa = hwcso; + ctx->dirty |= FD_DIRTY_ZSA; +} + +static void +fd_zsa_state_delete(struct pipe_context *pctx, void *hwcso) +{ + FREE(hwcso); +} + +void +fd_zsa_init(struct pipe_context *pctx) +{ + pctx->create_depth_stencil_alpha_state = fd_zsa_state_create; + pctx->bind_depth_stencil_alpha_state = fd_zsa_state_bind; + pctx->delete_depth_stencil_alpha_state = fd_zsa_state_delete; +} diff --git a/src/gallium/drivers/freedreno/freedreno_zsa.h b/src/gallium/drivers/freedreno/freedreno_zsa.h new file mode 100644 index 00000000000..d1112f1572f --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_zsa.h @@ -0,0 +1,60 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FREEDRENO_ZSA_H_ +#define FREEDRENO_ZSA_H_ + + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +#include "freedreno_util.h" + +struct fd_zsa_stateobj { + struct pipe_depth_stencil_alpha_state base; + uint32_t rb_depthcontrol; + uint32_t rb_colorcontrol; /* must be OR'd w/ blend->rb_colorcontrol */ + uint32_t rb_alpha_ref; + uint32_t rb_stencilrefmask; + uint32_t rb_stencilrefmask_bf; +}; + +void fd_zsa_init(struct pipe_context *pctx); + +static inline bool fd_depth_enabled(struct fd_zsa_stateobj *zsa) +{ + return !!(zsa->rb_depthcontrol & RB_DEPTHCONTROL_Z_ENABLE); +} + +static inline bool fd_stencil_enabled(struct fd_zsa_stateobj *zsa) +{ + //RB_DEPTHCONTROL_STENCIL_ENABLE + return !!(zsa->rb_depthcontrol & RB_DEPTHCONTROL_STENCIL_ENABLE); +} + +#endif /* FREEDRENO_ZSA_H_ */ diff --git a/src/gallium/drivers/freedreno/instr.h b/src/gallium/drivers/freedreno/instr.h new file mode 100644 index 00000000000..fd192342c93 --- /dev/null +++ b/src/gallium/drivers/freedreno/instr.h @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef INSTR_H_ +#define INSTR_H_ + +#include "freedreno_a2xx_reg.h" + +#define PACKED __attribute__((__packed__)) + +/* + * ALU instructions: + */ + +typedef enum { + ADDs = 0, + ADD_PREVs = 1, + MULs = 2, + MUL_PREVs = 3, + MUL_PREV2s = 4, + MAXs = 5, + MINs = 6, + SETEs = 7, + SETGTs = 8, + SETGTEs = 9, + SETNEs = 10, + FRACs = 11, + TRUNCs = 12, + FLOORs = 13, + EXP_IEEE = 14, + LOG_CLAMP = 15, + LOG_IEEE = 16, + RECIP_CLAMP = 17, + RECIP_FF = 18, + RECIP_IEEE = 19, + RECIPSQ_CLAMP = 20, + RECIPSQ_FF = 21, + RECIPSQ_IEEE = 22, + MOVAs = 23, + MOVA_FLOORs = 24, + SUBs = 25, + SUB_PREVs = 26, + PRED_SETEs = 27, + PRED_SETNEs = 28, + PRED_SETGTs = 29, + PRED_SETGTEs = 30, + PRED_SET_INVs = 31, + PRED_SET_POPs = 32, + PRED_SET_CLRs = 33, + PRED_SET_RESTOREs = 34, + KILLEs = 35, + KILLGTs = 36, + KILLGTEs = 37, + KILLNEs = 38, + KILLONEs = 39, + SQRT_IEEE = 40, + MUL_CONST_0 = 42, + MUL_CONST_1 = 43, + ADD_CONST_0 = 44, + ADD_CONST_1 = 45, + SUB_CONST_0 = 46, + SUB_CONST_1 = 47, + SIN = 48, + COS = 49, + RETAIN_PREV = 50, +} instr_scalar_opc_t; + +typedef enum { + ADDv = 0, + MULv = 1, + MAXv = 2, + MINv = 3, + SETEv = 4, + SETGTv = 5, + SETGTEv = 6, + SETNEv = 7, + FRACv = 8, + TRUNCv = 9, + FLOORv = 10, + MULADDv = 11, + CNDEv = 12, + CNDGTEv = 13, + CNDGTv = 14, + DOT4v = 15, + DOT3v = 16, + DOT2ADDv = 17, + CUBEv = 18, + MAX4v = 19, + PRED_SETE_PUSHv = 20, + PRED_SETNE_PUSHv = 21, + PRED_SETGT_PUSHv = 22, + PRED_SETGTE_PUSHv = 23, + KILLEv = 24, + KILLGTv = 25, + KILLGTEv = 26, + KILLNEv = 27, + DSTv = 28, + MOVAv = 29, +} instr_vector_opc_t; + +typedef struct PACKED { + /* dword0: */ + uint8_t vector_dest : 6; + uint8_t vector_dest_rel : 1; + uint8_t low_precision_16b_fp : 1; + uint8_t scalar_dest : 6; + uint8_t scalar_dest_rel : 1; + uint8_t export_data : 1; + uint8_t vector_write_mask : 4; + uint8_t scalar_write_mask : 4; + uint8_t vector_clamp : 1; + uint8_t scalar_clamp : 1; + instr_scalar_opc_t scalar_opc : 6; + /* dword1: */ + uint8_t src3_swiz : 8; + uint8_t src2_swiz : 8; + uint8_t src1_swiz : 8; + uint8_t src3_reg_negate : 1; + uint8_t src2_reg_negate : 1; + uint8_t src1_reg_negate : 1; + uint8_t pred_select : 2; + uint8_t relative_addr : 1; + uint8_t const_1_rel_abs : 1; + uint8_t const_0_rel_abs : 1; + /* dword2: */ + uint8_t src3_reg : 6; + uint8_t src3_reg_select : 1; + uint8_t src3_reg_abs : 1; + uint8_t src2_reg : 6; + uint8_t src2_reg_select : 1; + uint8_t src2_reg_abs : 1; + uint8_t src1_reg : 6; + uint8_t src1_reg_select : 1; + uint8_t src1_reg_abs : 1; + instr_vector_opc_t vector_opc : 5; + uint8_t src3_sel : 1; + uint8_t src2_sel : 1; + uint8_t src1_sel : 1; +} instr_alu_t; + + + +/* + * CF instructions: + */ + +typedef enum { + NOP = 0, + EXEC = 1, + EXEC_END = 2, + COND_EXEC = 3, + COND_EXEC_END = 4, + COND_PRED_EXEC = 5, + COND_PRED_EXEC_END = 6, + LOOP_START = 7, + LOOP_END = 8, + COND_CALL = 9, + RETURN = 10, + COND_JMP = 11, + ALLOC = 12, + COND_EXEC_PRED_CLEAN = 13, + COND_EXEC_PRED_CLEAN_END = 14, + MARK_VS_FETCH_DONE = 15, +} instr_cf_opc_t; + +typedef enum { + RELATIVE_ADDR = 0, + ABSOLUTE_ADDR = 1, +} instr_addr_mode_t; + +typedef enum { + SQ_NO_ALLOC = 0, + SQ_POSITION = 1, + SQ_PARAMETER_PIXEL = 2, + SQ_MEMORY = 3, +} instr_alloc_type_t; + +typedef struct PACKED { + uint16_t address : 9; + uint8_t reserved0 : 3; + uint8_t count : 3; + uint8_t yeild : 1; + uint16_t serialize : 12; + uint8_t vc : 6; /* vertex cache? */ + uint8_t bool_addr : 8; + uint8_t condition : 1; + instr_addr_mode_t address_mode : 1; + instr_cf_opc_t opc : 4; +} instr_cf_exec_t; + +typedef struct PACKED { + uint16_t address : 10; + uint8_t reserved0 : 6; + uint8_t loop_id : 5; + uint32_t reserved1 : 22; + instr_addr_mode_t address_mode : 1; + instr_cf_opc_t opc : 4; +} instr_cf_loop_t; + +typedef struct PACKED { + uint16_t address : 10; + uint8_t reserved0 : 3; + uint8_t force_call : 1; + uint8_t predicated_jmp : 1; + uint32_t reserved1 : 18; + uint8_t direction : 1; + uint8_t bool_addr : 8; + uint8_t condition : 1; + instr_addr_mode_t address_mode : 1; + instr_cf_opc_t opc : 4; +} instr_cf_jmp_call_t; + +typedef struct PACKED { + uint8_t size : 4; + uint64_t reserved0 : 36; + uint8_t no_serial : 1; + instr_alloc_type_t buffer_select : 2; + uint8_t alloc_mode : 1; + instr_cf_opc_t opc : 4; +} instr_cf_alloc_t; + +typedef union PACKED { + instr_cf_exec_t exec; + instr_cf_loop_t loop; + instr_cf_jmp_call_t jmp_call; + instr_cf_alloc_t alloc; + struct PACKED { + uint64_t dummy : 44; + instr_cf_opc_t opc : 4; + }; +} instr_cf_t; + + + +/* + * FETCH instructions: + */ + +typedef enum { + VTX_FETCH = 0, + TEX_FETCH = 1, + TEX_GET_BORDER_COLOR_FRAC = 16, + TEX_GET_COMP_TEX_LOD = 17, + TEX_GET_GRADIENTS = 18, + TEX_GET_WEIGHTS = 19, + TEX_SET_TEX_LOD = 24, + TEX_SET_GRADIENTS_H = 25, + TEX_SET_GRADIENTS_V = 26, + TEX_RESERVED_4 = 27, +} instr_fetch_opc_t; + +typedef enum { + TEX_FILTER_POINT = 0, + TEX_FILTER_LINEAR = 1, + TEX_FILTER_BASEMAP = 2, /* only applicable for mip-filter */ + TEX_FILTER_USE_FETCH_CONST = 3, +} instr_tex_filter_t; + +typedef enum { + ANISO_FILTER_DISABLED = 0, + ANISO_FILTER_MAX_1_1 = 1, + ANISO_FILTER_MAX_2_1 = 2, + ANISO_FILTER_MAX_4_1 = 3, + ANISO_FILTER_MAX_8_1 = 4, + ANISO_FILTER_MAX_16_1 = 5, + ANISO_FILTER_USE_FETCH_CONST = 7, +} instr_aniso_filter_t; + +typedef enum { + ARBITRARY_FILTER_2X4_SYM = 0, + ARBITRARY_FILTER_2X4_ASYM = 1, + ARBITRARY_FILTER_4X2_SYM = 2, + ARBITRARY_FILTER_4X2_ASYM = 3, + ARBITRARY_FILTER_4X4_SYM = 4, + ARBITRARY_FILTER_4X4_ASYM = 5, + ARBITRARY_FILTER_USE_FETCH_CONST = 7, +} instr_arbitrary_filter_t; + +typedef enum { + SAMPLE_CENTROID = 0, + SAMPLE_CENTER = 1, +} instr_sample_loc_t; + +typedef enum sq_surfaceformat instr_surf_fmt_t; + +typedef struct PACKED { + /* dword0: */ + instr_fetch_opc_t opc : 5; + uint8_t src_reg : 6; + uint8_t src_reg_am : 1; + uint8_t dst_reg : 6; + uint8_t dst_reg_am : 1; + uint8_t fetch_valid_only : 1; + uint8_t const_idx : 5; + uint8_t tx_coord_denorm : 1; + uint8_t src_swiz : 6; + /* dword1: */ + uint16_t dst_swiz : 12; + instr_tex_filter_t mag_filter : 2; + instr_tex_filter_t min_filter : 2; + instr_tex_filter_t mip_filter : 2; + instr_aniso_filter_t aniso_filter : 3; + instr_arbitrary_filter_t arbitrary_filter : 3; + instr_tex_filter_t vol_mag_filter : 2; + instr_tex_filter_t vol_min_filter : 2; + uint8_t use_comp_lod : 1; + uint8_t use_reg_lod : 2; + uint8_t pred_select : 1; + /* dword2: */ + uint8_t use_reg_gradients : 1; + instr_sample_loc_t sample_location : 1; + uint8_t lod_bias : 7; + uint8_t unused : 7; + uint8_t offset_x : 5; + uint8_t offset_y : 5; + uint8_t offset_z : 5; + uint8_t pred_condition : 1; +} instr_fetch_tex_t; + +typedef struct PACKED { + /* dword0: */ + instr_fetch_opc_t opc : 5; + uint8_t src_reg : 6; + uint8_t src_reg_am : 1; + uint8_t dst_reg : 6; + uint8_t dst_reg_am : 1; + uint8_t must_be_one : 1; + uint8_t const_index : 5; + uint8_t const_index_sel : 2; + uint8_t reserved0 : 3; + uint8_t src_swiz : 2; + /* dword1: */ + uint16_t dst_swiz : 12; + uint8_t format_comp_all : 1; /* '1' for signed, '0' for unsigned? */ + uint8_t num_format_all : 1; /* '0' for normalized, '1' for unnormalized */ + uint8_t signed_rf_mode_all : 1; + uint8_t reserved1 : 1; + instr_surf_fmt_t format : 6; + uint8_t reserved2 : 1; + uint8_t exp_adjust_all : 7; + uint8_t reserved3 : 1; + uint8_t pred_select : 1; + /* dword2: */ + uint8_t stride : 8; + /* possibly offset and reserved4 are swapped on a200? */ + uint8_t offset : 8; + uint8_t reserved4 : 8; + uint8_t reserved5 : 7; + uint8_t pred_condition : 1; +} instr_fetch_vtx_t; + +typedef union PACKED { + instr_fetch_tex_t tex; + instr_fetch_vtx_t vtx; + struct PACKED { + /* dword0: */ + instr_fetch_opc_t opc : 5; + uint32_t dummy0 : 27; + /* dword1: */ + uint32_t dummy1 : 32; + /* dword2: */ + uint32_t dummy2 : 32; + }; +} instr_fetch_t; + +#endif /* INSTR_H_ */ diff --git a/src/gallium/drivers/freedreno/ir.c b/src/gallium/drivers/freedreno/ir.c new file mode 100644 index 00000000000..cbc1230fa77 --- /dev/null +++ b/src/gallium/drivers/freedreno/ir.c @@ -0,0 +1,701 @@ +/* + * Copyright (c) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ir.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "freedreno_util.h" +#include "instr.h" + +#define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0) +#define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__) +#define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__) + +#define REG_MASK 0x3f + +static int cf_emit(struct ir_cf *cf, instr_cf_t *instr); + +static int instr_emit(struct ir_instruction *instr, uint32_t *dwords, + uint32_t idx, struct ir_shader_info *info); + +static void reg_update_stats(struct ir_register *reg, + struct ir_shader_info *info, bool dest); +static uint32_t reg_fetch_src_swiz(struct ir_register *reg, uint32_t n); +static uint32_t reg_fetch_dst_swiz(struct ir_register *reg); +static uint32_t reg_alu_dst_swiz(struct ir_register *reg); +static uint32_t reg_alu_src_swiz(struct ir_register *reg); + +/* simple allocator to carve allocations out of an up-front allocated heap, + * so that we can free everything easily in one shot. + */ +static void * ir_alloc(struct ir_shader *shader, int sz) +{ + void *ptr = &shader->heap[shader->heap_idx]; + shader->heap_idx += ALIGN(sz, 4); + return ptr; +} + +static char * ir_strdup(struct ir_shader *shader, const char *str) +{ + char *ptr = NULL; + if (str) { + int len = strlen(str); + ptr = ir_alloc(shader, len+1); + memcpy(ptr, str, len); + ptr[len] = '\0'; + } + return ptr; +} + +struct ir_shader * ir_shader_create(void) +{ + DEBUG_MSG(""); + return calloc(1, sizeof(struct ir_shader)); +} + +void ir_shader_destroy(struct ir_shader *shader) +{ + DEBUG_MSG(""); + free(shader); +} + +/* resolve addr/cnt/sequence fields in the individual CF's */ +static int shader_resolve(struct ir_shader *shader, struct ir_shader_info *info) +{ + uint32_t addr; + unsigned i; + int j; + + addr = shader->cfs_count / 2; + for (i = 0; i < shader->cfs_count; i++) { + struct ir_cf *cf = shader->cfs[i]; + if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { + uint32_t sequence = 0; + + if (cf->exec.addr && (cf->exec.addr != addr)) + WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i); + if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count)) + WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i); + + for (j = cf->exec.instrs_count - 1; j >= 0; j--) { + struct ir_instruction *instr = cf->exec.instrs[j]; + sequence <<= 2; + if (instr->instr_type == IR_FETCH) + sequence |= 0x1; + if (instr->sync) + sequence |= 0x2; + } + + cf->exec.addr = addr; + cf->exec.cnt = cf->exec.instrs_count; + cf->exec.sequence = sequence; + + addr += cf->exec.instrs_count; + } + } + + info->sizedwords = 3 * addr; + + return 0; +} + +void * ir_shader_assemble(struct ir_shader *shader, struct ir_shader_info *info) +{ + uint32_t i, j; + uint32_t *ptr, *dwords = NULL; + uint32_t idx = 0; + int ret; + + info->sizedwords = 0; + info->max_reg = -1; + info->max_input_reg = 0; + info->regs_written = 0; + + /* we need an even # of CF's.. insert a NOP if needed */ + if (shader->cfs_count != ALIGN(shader->cfs_count, 2)) + ir_cf_create(shader, NOP); + + /* first pass, resolve sizes and addresses: */ + ret = shader_resolve(shader, info); + if (ret) { + ERROR_MSG("resolve failed: %d", ret); + goto fail; + } + + ptr = dwords = calloc(1, 4 * info->sizedwords); + + /* second pass, emit CF program in pairs: */ + for (i = 0; i < shader->cfs_count; i += 2) { + instr_cf_t *cfs = (instr_cf_t *)ptr; + ret = cf_emit(shader->cfs[i], &cfs[0]); + if (ret) { + ERROR_MSG("CF emit failed: %d\n", ret); + goto fail; + } + ret = cf_emit(shader->cfs[i+1], &cfs[1]); + if (ret) { + ERROR_MSG("CF emit failed: %d\n", ret); + goto fail; + } + ptr += 3; + assert((ptr - dwords) <= info->sizedwords); + } + + /* third pass, emit ALU/FETCH: */ + for (i = 0; i < shader->cfs_count; i++) { + struct ir_cf *cf = shader->cfs[i]; + if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { + for (j = 0; j < cf->exec.instrs_count; j++) { + ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info); + if (ret) { + ERROR_MSG("instruction emit failed: %d", ret); + goto fail; + } + ptr += 3; + assert((ptr - dwords) <= info->sizedwords); + } + } + } + + return dwords; + +fail: + free(dwords); + return NULL; +} + + +struct ir_attribute * ir_attribute_create(struct ir_shader *shader, + int rstart, int num, const char *name) +{ + struct ir_attribute *a = ir_alloc(shader, sizeof(struct ir_attribute)); + DEBUG_MSG("R%d-R%d: %s", rstart, rstart + num - 1, name); + a->name = ir_strdup(shader, name); + a->rstart = rstart; + a->num = num; + assert(shader->attributes_count < ARRAY_SIZE(shader->attributes)); + shader->attributes[shader->attributes_count++] = a; + return a; +} + +struct ir_const * ir_const_create(struct ir_shader *shader, + int cstart, float v0, float v1, float v2, float v3) +{ + struct ir_const *c = ir_alloc(shader, sizeof(struct ir_const)); + DEBUG_MSG("C%d: %f, %f, %f, %f", cstart, v0, v1, v2, v3); + c->val[0] = v0; + c->val[1] = v1; + c->val[2] = v2; + c->val[3] = v3; + c->cstart = cstart; + assert(shader->consts_count < ARRAY_SIZE(shader->consts)); + shader->consts[shader->consts_count++] = c; + return c; +} + +struct ir_sampler * ir_sampler_create(struct ir_shader *shader, + int idx, const char *name) +{ + struct ir_sampler *s = ir_alloc(shader, sizeof(struct ir_sampler)); + DEBUG_MSG("CONST(%d): %s", idx, name); + s->name = ir_strdup(shader, name); + s->idx = idx; + assert(shader->samplers_count < ARRAY_SIZE(shader->samplers)); + shader->samplers[shader->samplers_count++] = s; + return s; +} + +struct ir_uniform * ir_uniform_create(struct ir_shader *shader, + int cstart, int num, const char *name) +{ + struct ir_uniform *u = ir_alloc(shader, sizeof(struct ir_uniform)); + DEBUG_MSG("C%d-C%d: %s", cstart, cstart + num - 1, name); + u->name = ir_strdup(shader, name); + u->cstart = cstart; + u->num = num; + assert(shader->uniforms_count < ARRAY_SIZE(shader->uniforms)); + shader->uniforms[shader->uniforms_count++] = u; + return u; +} + +struct ir_varying * ir_varying_create(struct ir_shader *shader, + int rstart, int num, const char *name) +{ + struct ir_varying *v = ir_alloc(shader, sizeof(struct ir_varying)); + DEBUG_MSG("R%d-R%d: %s", rstart, rstart + num - 1, name); + v->name = ir_strdup(shader, name); + v->rstart = rstart; + v->num = num; + assert(shader->varyings_count < ARRAY_SIZE(shader->varyings)); + shader->varyings[shader->varyings_count++] = v; + return v; +} + + +struct ir_cf * ir_cf_create(struct ir_shader *shader, instr_cf_opc_t cf_type) +{ + struct ir_cf *cf = ir_alloc(shader, sizeof(struct ir_cf)); + DEBUG_MSG("%d", cf_type); + cf->shader = shader; + cf->cf_type = cf_type; + assert(shader->cfs_count < ARRAY_SIZE(shader->cfs)); + shader->cfs[shader->cfs_count++] = cf; + return cf; +} + + +/* + * CF instructions: + */ + +static int cf_emit(struct ir_cf *cf, instr_cf_t *instr) +{ + memset(instr, 0, sizeof(*instr)); + + instr->opc = cf->cf_type; + + switch (cf->cf_type) { + case NOP: + break; + case EXEC: + case EXEC_END: + assert(cf->exec.addr <= 0x1ff); + assert(cf->exec.cnt <= 0x6); + assert(cf->exec.sequence <= 0xfff); + instr->exec.address = cf->exec.addr; + instr->exec.count = cf->exec.cnt; + instr->exec.serialize = cf->exec.sequence; + break; + case ALLOC: + assert(cf->alloc.size <= 0xf); + instr->alloc.size = cf->alloc.size; + switch (cf->alloc.type) { + case SQ_POSITION: + case SQ_PARAMETER_PIXEL: + instr->alloc.buffer_select = cf->alloc.type; + break; + default: + ERROR_MSG("invalid alloc type: %d", cf->alloc.type); + return -1; + } + break; + case COND_EXEC: + case COND_EXEC_END: + case COND_PRED_EXEC: + case COND_PRED_EXEC_END: + case LOOP_START: + case LOOP_END: + case COND_CALL: + case RETURN: + case COND_JMP: + case COND_EXEC_PRED_CLEAN: + case COND_EXEC_PRED_CLEAN_END: + case MARK_VS_FETCH_DONE: + ERROR_MSG("TODO"); + return -1; + } + + return 0; +} + + +struct ir_instruction * ir_instr_create(struct ir_cf *cf, int instr_type) +{ + struct ir_instruction *instr = + ir_alloc(cf->shader, sizeof(struct ir_instruction)); + DEBUG_MSG("%d", instr_type); + instr->shader = cf->shader; + instr->pred = cf->shader->pred; + instr->instr_type = instr_type; + assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs)); + cf->exec.instrs[cf->exec.instrs_count++] = instr; + return instr; +} + + +/* + * FETCH instructions: + */ + +static int instr_emit_fetch(struct ir_instruction *instr, + uint32_t *dwords, uint32_t idx, + struct ir_shader_info *info) +{ + instr_fetch_t *fetch = (instr_fetch_t *)dwords; + int reg = 0; + struct ir_register *dst_reg = instr->regs[reg++]; + struct ir_register *src_reg = instr->regs[reg++]; + + memset(fetch, 0, sizeof(*fetch)); + + reg_update_stats(dst_reg, info, true); + reg_update_stats(src_reg, info, false); + + fetch->opc = instr->fetch.opc; + + if (instr->fetch.opc == VTX_FETCH) { + instr_fetch_vtx_t *vtx = &fetch->vtx; + + assert(instr->fetch.stride <= 0xff); + assert(instr->fetch.fmt <= 0x3f); + assert(instr->fetch.const_idx <= 0x1f); + assert(instr->fetch.const_idx_sel <= 0x3); + + vtx->src_reg = src_reg->num; + vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1); + vtx->dst_reg = dst_reg->num; + vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg); + vtx->must_be_one = 1; + vtx->const_index = instr->fetch.const_idx; + vtx->const_index_sel = instr->fetch.const_idx_sel; + vtx->format_comp_all = !!instr->fetch.is_signed; + vtx->num_format_all = !instr->fetch.is_normalized; + vtx->format = instr->fetch.fmt; + vtx->stride = instr->fetch.stride; + vtx->offset = instr->fetch.offset; + + if (instr->pred != IR_PRED_NONE) { + vtx->pred_select = 1; + vtx->pred_condition = (instr->pred == IR_PRED_EQ) ? 1 : 0; + } + + /* XXX seems like every FETCH but the first has + * this bit set: + */ + vtx->reserved3 = (idx > 0) ? 0x1 : 0x0; + vtx->reserved0 = (idx > 0) ? 0x2 : 0x3; + } else if (instr->fetch.opc == TEX_FETCH) { + instr_fetch_tex_t *tex = &fetch->tex; + + assert(instr->fetch.const_idx <= 0x1f); + + tex->src_reg = src_reg->num; + tex->src_swiz = reg_fetch_src_swiz(src_reg, 3); + tex->dst_reg = dst_reg->num; + tex->dst_swiz = reg_fetch_dst_swiz(dst_reg); + tex->const_idx = instr->fetch.const_idx; + tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; + tex->min_filter = TEX_FILTER_USE_FETCH_CONST; + tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; + tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; + tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; + tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; + tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; + tex->use_comp_lod = 1; + tex->sample_location = SAMPLE_CENTER; + + if (instr->pred != IR_PRED_NONE) { + tex->pred_select = 1; + tex->pred_condition = (instr->pred == IR_PRED_EQ) ? 1 : 0; + } + + } else { + ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc); + return -1; + } + + return 0; +} + +/* + * ALU instructions: + */ + +static int instr_emit_alu(struct ir_instruction *instr, uint32_t *dwords, + struct ir_shader_info *info) +{ + int reg = 0; + instr_alu_t *alu = (instr_alu_t *)dwords; + struct ir_register *dst_reg = instr->regs[reg++]; + struct ir_register *src1_reg; + struct ir_register *src2_reg; + struct ir_register *src3_reg; + + memset(alu, 0, sizeof(*alu)); + + /* handle instructions w/ 3 src operands: */ + switch (instr->alu.vector_opc) { + case MULADDv: + case CNDEv: + case CNDGTEv: + case CNDGTv: + case DOT2ADDv: + /* note: disassembler lists 3rd src first, ie: + * MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2) + * which is the reason for this strange ordering. + */ + src3_reg = instr->regs[reg++]; + break; + default: + src3_reg = NULL; + break; + } + + src1_reg = instr->regs[reg++]; + src2_reg = instr->regs[reg++]; + + reg_update_stats(dst_reg, info, true); + reg_update_stats(src1_reg, info, false); + reg_update_stats(src2_reg, info, false); + + assert((dst_reg->flags & ~IR_REG_EXPORT) == 0); + assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4)); + assert((src1_reg->flags & IR_REG_EXPORT) == 0); + assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4)); + assert((src2_reg->flags & IR_REG_EXPORT) == 0); + assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4)); + + if (instr->alu.vector_opc == ~0) { + alu->vector_opc = MAXv; + alu->vector_write_mask = 0; + } else { + alu->vector_opc = instr->alu.vector_opc; + alu->vector_write_mask = reg_alu_dst_swiz(dst_reg); + } + + alu->vector_dest = dst_reg->num; + alu->export_data = !!(dst_reg->flags & IR_REG_EXPORT); + + // TODO predicate case/condition.. need to add to parser + + alu->src2_reg = src2_reg->num; + alu->src2_swiz = reg_alu_src_swiz(src2_reg); + alu->src2_reg_negate = !!(src2_reg->flags & IR_REG_NEGATE); + alu->src2_reg_abs = !!(src2_reg->flags & IR_REG_ABS); + alu->src2_sel = !(src2_reg->flags & IR_REG_CONST); + + alu->src1_reg = src1_reg->num; + alu->src1_swiz = reg_alu_src_swiz(src1_reg); + alu->src1_reg_negate = !!(src1_reg->flags & IR_REG_NEGATE); + alu->src1_reg_abs = !!(src1_reg->flags & IR_REG_ABS); + alu->src1_sel = !(src1_reg->flags & IR_REG_CONST); + + alu->vector_clamp = instr->alu.vector_clamp; + alu->scalar_clamp = instr->alu.scalar_clamp; + + if (instr->alu.scalar_opc != ~0) { + struct ir_register *sdst_reg = instr->regs[reg++]; + + reg_update_stats(sdst_reg, info, true); + + assert(sdst_reg->flags == dst_reg->flags); + + if (src3_reg) { + assert(src3_reg == instr->regs[reg++]); + } else { + src3_reg = instr->regs[reg++]; + } + + alu->scalar_dest = sdst_reg->num; + alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg); + alu->scalar_opc = instr->alu.scalar_opc; + } else { + /* not sure if this is required, but adreno compiler seems + * to always set scalar opc to MAXs if it is not used: + */ + alu->scalar_opc = MAXs; + } + + if (src3_reg) { + reg_update_stats(src3_reg, info, false); + + alu->src3_reg = src3_reg->num; + alu->src3_swiz = reg_alu_src_swiz(src3_reg); + alu->src3_reg_negate = !!(src3_reg->flags & IR_REG_NEGATE); + alu->src3_reg_abs = !!(src3_reg->flags & IR_REG_ABS); + alu->src3_sel = !(src3_reg->flags & IR_REG_CONST); + } else { + /* not sure if this is required, but adreno compiler seems + * to always set register bank for 3rd src if unused: + */ + alu->src3_sel = 1; + } + + if (instr->pred != IR_PRED_NONE) { + alu->pred_select = (instr->pred == IR_PRED_EQ) ? 3 : 2; + } + + return 0; +} + +static int instr_emit(struct ir_instruction *instr, uint32_t *dwords, + uint32_t idx, struct ir_shader_info *info) +{ + switch (instr->instr_type) { + case IR_FETCH: return instr_emit_fetch(instr, dwords, idx, info); + case IR_ALU: return instr_emit_alu(instr, dwords, info); + } + return -1; +} + + +struct ir_register * ir_reg_create(struct ir_instruction *instr, + int num, const char *swizzle, int flags) +{ + struct ir_register *reg = + ir_alloc(instr->shader, sizeof(struct ir_register)); + DEBUG_MSG("%x, %d, %s", flags, num, swizzle); + assert(num <= REG_MASK); + reg->flags = flags; + reg->num = num; + reg->swizzle = ir_strdup(instr->shader, swizzle); + assert(instr->regs_count < ARRAY_SIZE(instr->regs)); + instr->regs[instr->regs_count++] = reg; + return reg; +} + +static void reg_update_stats(struct ir_register *reg, + struct ir_shader_info *info, bool dest) +{ + if (!(reg->flags & (IR_REG_CONST|IR_REG_EXPORT))) { + info->max_reg = max(info->max_reg, reg->num); + + if (dest) { + info->regs_written |= (1 << reg->num); + } else if (!(info->regs_written & (1 << reg->num))) { + /* for registers that haven't been written, they must be an + * input register that the thread scheduler (presumably?) + * needs to know about: + */ + info->max_input_reg = max(info->max_input_reg, reg->num); + } + } +} + +static uint32_t reg_fetch_src_swiz(struct ir_register *reg, uint32_t n) +{ + uint32_t swiz = 0; + int i; + + assert(reg->flags == 0); + assert(reg->swizzle); + + DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle); + + for (i = n-1; i >= 0; i--) { + swiz <<= 2; + switch (reg->swizzle[i]) { + default: + ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle); + case 'x': swiz |= 0x0; break; + case 'y': swiz |= 0x1; break; + case 'z': swiz |= 0x2; break; + case 'w': swiz |= 0x3; break; + } + } + + return swiz; +} + +static uint32_t reg_fetch_dst_swiz(struct ir_register *reg) +{ + uint32_t swiz = 0; + int i; + + assert(reg->flags == 0); + assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); + + DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle); + + if (reg->swizzle) { + for (i = 3; i >= 0; i--) { + swiz <<= 3; + switch (reg->swizzle[i]) { + default: + ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); + case 'x': swiz |= 0x0; break; + case 'y': swiz |= 0x1; break; + case 'z': swiz |= 0x2; break; + case 'w': swiz |= 0x3; break; + case '0': swiz |= 0x4; break; + case '1': swiz |= 0x5; break; + case '_': swiz |= 0x7; break; + } + } + } else { + swiz = 0x688; + } + + return swiz; +} + +/* actually, a write-mask */ +static uint32_t reg_alu_dst_swiz(struct ir_register *reg) +{ + uint32_t swiz = 0; + int i; + + assert((reg->flags & ~IR_REG_EXPORT) == 0); + assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); + + DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle); + + if (reg->swizzle) { + for (i = 3; i >= 0; i--) { + swiz <<= 1; + if (reg->swizzle[i] == "xyzw"[i]) { + swiz |= 0x1; + } else if (reg->swizzle[i] != '_') { + ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); + break; + } + } + } else { + swiz = 0xf; + } + + return swiz; +} + +static uint32_t reg_alu_src_swiz(struct ir_register *reg) +{ + uint32_t swiz = 0; + int i; + + assert((reg->flags & IR_REG_EXPORT) == 0); + assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); + + DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle); + + if (reg->swizzle) { + for (i = 3; i >= 0; i--) { + swiz <<= 2; + switch (reg->swizzle[i]) { + default: + ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle); + case 'x': swiz |= (0x0 - i) & 0x3; break; + case 'y': swiz |= (0x1 - i) & 0x3; break; + case 'z': swiz |= (0x2 - i) & 0x3; break; + case 'w': swiz |= (0x3 - i) & 0x3; break; + } + } + } else { + swiz = 0x0; + } + + return swiz; +} diff --git a/src/gallium/drivers/freedreno/ir.h b/src/gallium/drivers/freedreno/ir.h new file mode 100644 index 00000000000..e8025446b3e --- /dev/null +++ b/src/gallium/drivers/freedreno/ir.h @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IR_H_ +#define IR_H_ + +#include <stdint.h> +#include <stdbool.h> + +#include "instr.h" + +/* low level intermediate representation of an adreno shader program */ + +struct ir_shader; + +struct ir_shader * fd_asm_parse(const char *src); + +struct ir_shader_info { + uint16_t sizedwords; + int8_t max_reg; /* highest GPR # used by shader */ + uint8_t max_input_reg; + uint64_t regs_written; +}; + +struct ir_register { + enum { + IR_REG_CONST = 0x1, + IR_REG_EXPORT = 0x2, + IR_REG_NEGATE = 0x4, + IR_REG_ABS = 0x8, + } flags; + int num; + char *swizzle; +}; + +enum ir_pred { + IR_PRED_NONE = 0, + IR_PRED_EQ = 1, + IR_PRED_NE = 2, +}; + +struct ir_instruction { + struct ir_shader *shader; + enum { + IR_FETCH, + IR_ALU, + } instr_type; + enum ir_pred pred; + int sync; + unsigned regs_count; + struct ir_register *regs[5]; + union { + /* FETCH specific: */ + struct { + instr_fetch_opc_t opc; + unsigned const_idx; + /* maybe vertex fetch specific: */ + unsigned const_idx_sel; + enum sq_surfaceformat fmt; + bool is_signed : 1; + bool is_normalized : 1; + uint32_t stride; + uint32_t offset; + } fetch; + /* ALU specific: */ + struct { + instr_vector_opc_t vector_opc; + instr_scalar_opc_t scalar_opc; + bool vector_clamp : 1; + bool scalar_clamp : 1; + } alu; + }; +}; + +struct ir_cf { + struct ir_shader *shader; + instr_cf_opc_t cf_type; + + union { + /* EXEC/EXEC_END specific: */ + struct { + unsigned instrs_count; + struct ir_instruction *instrs[6]; + uint32_t addr, cnt, sequence; + } exec; + /* ALLOC specific: */ + struct { + instr_alloc_type_t type; /* SQ_POSITION or SQ_PARAMETER_PIXEL */ + int size; + } alloc; + }; +}; + +/* somewhat arbitrary limits.. */ +#define MAX_ATTRIBUTES 32 +#define MAX_CONSTS 32 +#define MAX_SAMPLERS 32 +#define MAX_UNIFORMS 32 +#define MAX_VARYINGS 32 + +struct ir_attribute { + const char *name; + int rstart; /* first register */ + int num; /* number of registers */ +}; + +struct ir_const { + float val[4]; + int cstart; /* first const register */ +}; + +struct ir_sampler { + const char *name; + int idx; +}; + +struct ir_uniform { + const char *name; + int cstart; /* first const register */ + int num; /* number of const registers */ +}; + +struct ir_varying { + const char *name; + int rstart; /* first register */ + int num; /* number of registers */ +}; + +struct ir_shader { + unsigned cfs_count; + struct ir_cf *cfs[0x56]; + uint32_t heap[100 * 4096]; + unsigned heap_idx; + + enum ir_pred pred; /* pred inherited by newly created instrs */ + + /* @ headers: */ + uint32_t attributes_count; + struct ir_attribute *attributes[MAX_ATTRIBUTES]; + + uint32_t consts_count; + struct ir_const *consts[MAX_CONSTS]; + + uint32_t samplers_count; + struct ir_sampler *samplers[MAX_SAMPLERS]; + + uint32_t uniforms_count; + struct ir_uniform *uniforms[MAX_UNIFORMS]; + + uint32_t varyings_count; + struct ir_varying *varyings[MAX_VARYINGS]; + +}; + +struct ir_shader * ir_shader_create(void); +void ir_shader_destroy(struct ir_shader *shader); +void * ir_shader_assemble(struct ir_shader *shader, + struct ir_shader_info *info); + +struct ir_attribute * ir_attribute_create(struct ir_shader *shader, + int rstart, int num, const char *name); +struct ir_const * ir_const_create(struct ir_shader *shader, + int cstart, float v0, float v1, float v2, float v3); +struct ir_sampler * ir_sampler_create(struct ir_shader *shader, + int idx, const char *name); +struct ir_uniform * ir_uniform_create(struct ir_shader *shader, + int cstart, int num, const char *name); +struct ir_varying * ir_varying_create(struct ir_shader *shader, + int rstart, int num, const char *name); + +struct ir_cf * ir_cf_create(struct ir_shader *shader, instr_cf_opc_t cf_type); + +struct ir_instruction * ir_instr_create(struct ir_cf *cf, int instr_type); + +struct ir_register * ir_reg_create(struct ir_instruction *instr, + int num, const char *swizzle, int flags); + +/* some helper fxns: */ + +static inline struct ir_cf * +ir_cf_create_alloc(struct ir_shader *shader, instr_alloc_type_t type, int size) +{ + struct ir_cf *cf = ir_cf_create(shader, ALLOC); + if (!cf) + return cf; + cf->alloc.type = type; + cf->alloc.size = size; + return cf; +} +static inline struct ir_instruction * +ir_instr_create_alu(struct ir_cf *cf, instr_vector_opc_t vop, instr_scalar_opc_t sop) +{ + struct ir_instruction *instr = ir_instr_create(cf, IR_ALU); + if (!instr) + return instr; + instr->alu.vector_opc = vop; + instr->alu.scalar_opc = sop; + return instr; +} +static inline struct ir_instruction * +ir_instr_create_vtx_fetch(struct ir_cf *cf, int ci, int cis, + enum sq_surfaceformat fmt, bool is_signed, int stride) +{ + struct ir_instruction *instr = instr = ir_instr_create(cf, IR_FETCH); + instr->fetch.opc = VTX_FETCH; + instr->fetch.const_idx = ci; + instr->fetch.const_idx_sel = cis; + instr->fetch.fmt = fmt; + instr->fetch.is_signed = is_signed; + instr->fetch.stride = stride; + return instr; +} +static inline struct ir_instruction * +ir_instr_create_tex_fetch(struct ir_cf *cf, int ci) +{ + struct ir_instruction *instr = instr = ir_instr_create(cf, IR_FETCH); + instr->fetch.opc = TEX_FETCH; + instr->fetch.const_idx = ci; + return instr; +} + + +#endif /* IR_H_ */ |