aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600/sb/sb_bc.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600/sb/sb_bc.h')
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc.h785
1 files changed, 785 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
new file mode 100644
index 00000000000..be68f9fd925
--- /dev/null
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -0,0 +1,785 @@
+/*
+ * Copyright 2013 Vadim Girlin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Vadim Girlin
+ */
+
+#ifndef SB_BC_H_
+#define SB_BC_H_
+
+extern "C" {
+#include <stdint.h>
+#include "r600_isa.h"
+}
+
+#include <vector>
+#include <stack>
+
+struct r600_bytecode;
+struct r600_shader;
+
+namespace r600_sb {
+
+class hw_encoding_format;
+class alu_node;
+class cf_node;
+class fetch_node;
+class alu_group_node;
+class region_node;
+class shader;
+
+enum shader_target
+{
+ TARGET_UNKNOWN,
+ TARGET_VS,
+ TARGET_PS,
+ TARGET_GS,
+ TARGET_COMPUTE,
+ TARGET_FETCH,
+
+ TARGET_NUM
+};
+
+enum sb_hw_class_bits
+{
+ HB_R6 = (1<<0),
+ HB_R7 = (1<<1),
+ HB_EG = (1<<2),
+ HB_CM = (1<<3),
+
+ HB_R6R7 = (HB_R6 | HB_R7),
+ HB_EGCM = (HB_EG | HB_CM),
+ HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
+ HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
+
+ HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
+};
+
+enum sb_hw_chip
+{
+ HW_CHIP_UNKNOWN,
+ HW_CHIP_R600,
+ HW_CHIP_RV610,
+ HW_CHIP_RV630,
+ HW_CHIP_RV670,
+ HW_CHIP_RV620,
+ HW_CHIP_RV635,
+ HW_CHIP_RS780,
+ HW_CHIP_RS880,
+ HW_CHIP_RV770,
+ HW_CHIP_RV730,
+ HW_CHIP_RV710,
+ HW_CHIP_RV740,
+ HW_CHIP_CEDAR,
+ HW_CHIP_REDWOOD,
+ HW_CHIP_JUNIPER,
+ HW_CHIP_CYPRESS,
+ HW_CHIP_HEMLOCK,
+ HW_CHIP_PALM,
+ HW_CHIP_SUMO,
+ HW_CHIP_SUMO2,
+ HW_CHIP_BARTS,
+ HW_CHIP_TURKS,
+ HW_CHIP_CAICOS,
+ HW_CHIP_CAYMAN,
+ HW_CHIP_ARUBA
+};
+
+enum sb_hw_class
+{
+ HW_CLASS_UNKNOWN,
+ HW_CLASS_R600,
+ HW_CLASS_R700,
+ HW_CLASS_EVERGREEN,
+ HW_CLASS_CAYMAN
+};
+
+enum alu_slots {
+ SLOT_X = 0,
+ SLOT_Y = 1,
+ SLOT_Z = 2,
+ SLOT_W = 3,
+ SLOT_TRANS = 4
+};
+
+enum misc_consts {
+ MAX_ALU_LITERALS = 4,
+ MAX_ALU_SLOTS = 128,
+ MAX_GPR = 128,
+ MAX_CHAN = 4
+
+};
+
+enum alu_src_sel {
+
+ ALU_SRC_LDS_OQ_A = 219,
+ ALU_SRC_LDS_OQ_B = 220,
+ ALU_SRC_LDS_OQ_A_POP = 221,
+ ALU_SRC_LDS_OQ_B_POP = 222,
+ ALU_SRC_LDS_DIRECT_A = 223,
+ ALU_SRC_LDS_DIRECT_B = 224,
+ ALU_SRC_TIME_HI = 227,
+ ALU_SRC_TIME_LO = 228,
+ ALU_SRC_MASK_HI = 229,
+ ALU_SRC_MASK_LO = 230,
+ ALU_SRC_HW_WAVE_ID = 231,
+ ALU_SRC_SIMD_ID = 232,
+ ALU_SRC_SE_ID = 233,
+ ALU_SRC_HW_THREADGRP_ID = 234,
+ ALU_SRC_WAVE_ID_IN_GRP = 235,
+ ALU_SRC_NUM_THREADGRP_WAVES = 236,
+ ALU_SRC_HW_ALU_ODD = 237,
+ ALU_SRC_LOOP_IDX = 238,
+ ALU_SRC_PARAM_BASE_ADDR = 240,
+ ALU_SRC_NEW_PRIM_MASK = 241,
+ ALU_SRC_PRIM_MASK_HI = 242,
+ ALU_SRC_PRIM_MASK_LO = 243,
+ ALU_SRC_1_DBL_L = 244,
+ ALU_SRC_1_DBL_M = 245,
+ ALU_SRC_0_5_DBL_L = 246,
+ ALU_SRC_0_5_DBL_M = 247,
+ ALU_SRC_0 = 248,
+ ALU_SRC_1 = 249,
+ ALU_SRC_1_INT = 250,
+ ALU_SRC_M_1_INT = 251,
+ ALU_SRC_0_5 = 252,
+ ALU_SRC_LITERAL = 253,
+ ALU_SRC_PV = 254,
+ ALU_SRC_PS = 255,
+
+ ALU_SRC_PARAM_OFFSET = 448
+};
+
+enum alu_predicate_select
+{
+ PRED_SEL_OFF = 0,
+// RESERVED = 1,
+ PRED_SEL_0 = 2,
+ PRED_SEL_1 = 3
+};
+
+
+enum alu_omod {
+ OMOD_OFF = 0,
+ OMOD_M2 = 1,
+ OMOD_M4 = 2,
+ OMOD_D2 = 3
+};
+
+enum alu_index_mode {
+ INDEX_AR_X = 0,
+ INDEX_AR_Y_R600 = 1,
+ INDEX_AR_Z_R600 = 2,
+ INDEX_AR_W_R600 = 3,
+
+ INDEX_LOOP = 4,
+ INDEX_GLOBAL = 5,
+ INDEX_GLOBAL_AR_X = 6
+};
+
+enum alu_cayman_mova_dst {
+ CM_MOVADST_AR_X,
+ CM_MOVADST_PC,
+ CM_MOVADST_IDX0,
+ CM_MOVADST_IDX1,
+ CM_MOVADST_CG0, // clause-global byte 0
+ CM_MOVADST_CG1,
+ CM_MOVADST_CG2,
+ CM_MOVADST_CG3
+};
+
+enum alu_cayman_exec_mask_op {
+ CM_EMO_DEACTIVATE,
+ CM_EMO_BREAK,
+ CM_EMO_CONTINUE,
+ CM_EMO_KILL
+};
+
+
+enum cf_exp_type {
+ EXP_PIXEL,
+ EXP_POS,
+ EXP_PARAM,
+
+ EXP_TYPE_COUNT
+};
+
+enum cf_mem_type {
+ MEM_WRITE,
+ MEM_WRITE_IND,
+ MEM_WRITE_ACK,
+ MEM_WRITE_IND_ACK
+};
+
+
+enum alu_kcache_mode {
+ KC_LOCK_NONE,
+ KC_LOCK_1,
+ KC_LOCK_2,
+ KC_LOCK_LOOP
+};
+
+enum alu_kcache_index_mode {
+ KC_INDEX_NONE,
+ KC_INDEX_0,
+ KC_INDEX_1,
+ KC_INDEX_INVALID
+};
+
+enum chan_select {
+ SEL_X = 0,
+ SEL_Y = 1,
+ SEL_Z = 2,
+ SEL_W = 3,
+ SEL_0 = 4,
+ SEL_1 = 5,
+// RESERVED = 6,
+ SEL_MASK = 7
+};
+
+enum bank_swizzle {
+ VEC_012 = 0,
+ VEC_021 = 1,
+ VEC_120 = 2,
+ VEC_102 = 3,
+ VEC_201 = 4,
+ VEC_210 = 5,
+
+ VEC_NUM = 6,
+
+ SCL_210 = 0,
+ SCL_122 = 1,
+ SCL_212 = 2,
+ SCL_221 = 3,
+
+ SCL_NUM = 4
+
+};
+
+enum sched_queue_id {
+ SQ_CF,
+ SQ_ALU,
+ SQ_TEX,
+ SQ_VTX,
+
+ SQ_NUM
+};
+
+struct literal {
+ union {
+ int32_t i;
+ uint32_t u;
+ float f;
+ };
+
+ literal(int32_t i = 0) : i(i) {}
+ literal(uint32_t u) : u(u) {}
+ literal(float f) : f(f) {}
+ literal(double f) : f(f) {}
+ operator uint32_t() const { return u; }
+ bool operator ==(literal l) { return u == l.u; }
+ bool operator ==(int v_int) { return i == v_int; }
+ bool operator ==(unsigned v_uns) { return u == v_uns; }
+};
+
+struct bc_kcache {
+ unsigned mode;
+ unsigned bank;
+ unsigned addr;
+ unsigned index_mode;
+} ;
+
+// TODO optimize bc structures
+
+struct bc_cf {
+
+ bc_kcache kc[4];
+
+ unsigned id;
+
+
+ const cf_op_info * op_ptr;
+ unsigned op;
+
+ unsigned addr:32;
+
+ unsigned alt_const:1;
+ unsigned uses_waterfall:1;
+
+ unsigned barrier:1;
+ unsigned count:7;
+ unsigned pop_count:3;
+ unsigned call_count:6;
+ unsigned whole_quad_mode:1;
+ unsigned valid_pixel_mode:1;
+
+ unsigned jumptable_sel:3;
+ unsigned cf_const:5;
+ unsigned cond:2;
+ unsigned end_of_program:1;
+
+ unsigned array_base:13;
+ unsigned elem_size:2;
+ unsigned index_gpr:7;
+ unsigned rw_gpr:7;
+ unsigned rw_rel:1;
+ unsigned type:2;
+
+ unsigned burst_count:4;
+ unsigned mark:1;
+ unsigned sel[4];
+
+ unsigned array_size:12;
+ unsigned comp_mask:4;
+
+ unsigned rat_id:4;
+ unsigned rat_inst:6;
+ unsigned rat_index_mode:2;
+
+ void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
+
+ bool is_alu_extended() {
+ assert(op_ptr->flags & CF_ALU);
+ return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE;
+ }
+
+};
+
+struct bc_alu_src {
+ unsigned sel:9;
+ unsigned chan:2;
+ unsigned neg:1;
+ unsigned abs:1;
+ unsigned rel:1;
+ literal value;
+};
+
+struct bc_alu {
+ const alu_op_info * op_ptr;
+ unsigned op;
+
+ bc_alu_src src[3];
+
+ unsigned dst_gpr:7;
+ unsigned dst_chan:2;
+ unsigned dst_rel:1;
+ unsigned clamp:1;
+ unsigned omod:2;
+ unsigned bank_swizzle:3;
+
+ unsigned index_mode:3;
+ unsigned last:1;
+ unsigned pred_sel:2;
+
+ unsigned fog_merge:1;
+ unsigned write_mask:1;
+ unsigned update_exec_mask:1;
+ unsigned update_pred:1;
+
+ unsigned slot:3;
+
+ alu_op_flags slot_flags;
+
+ void set_op(unsigned op) {
+ this->op = op;
+ op_ptr = r600_isa_alu(op);
+ }
+};
+
+struct bc_fetch {
+ const fetch_op_info * op_ptr;
+ unsigned op;
+
+ unsigned bc_frac_mode:1;
+ unsigned fetch_whole_quad:1;
+ unsigned resource_id:8;
+
+ unsigned src_gpr:7;
+ unsigned src_rel:1;
+ unsigned src_sel[4];
+
+ unsigned dst_gpr:7;
+ unsigned dst_rel:1;
+ unsigned dst_sel[4];
+
+ unsigned alt_const:1;
+
+ unsigned inst_mod:2;
+ unsigned resource_index_mode:2;
+ unsigned sampler_index_mode:2;
+
+ unsigned coord_type[4];
+ unsigned lod_bias:7;
+
+ unsigned offset[3];
+
+ unsigned sampler_id:5;
+
+
+ unsigned fetch_type:2;
+ unsigned mega_fetch_count:6;
+ unsigned coalesced_read:1;
+ unsigned structured_read:2;
+ unsigned lds_req:1;
+
+ unsigned data_format:6;
+ unsigned format_comp_all:1;
+ unsigned num_format_all:2;
+ unsigned semantic_id:8;
+ unsigned srf_mode_all:1;
+ unsigned use_const_fields:1;
+
+ unsigned const_buf_no_stride:1;
+ unsigned endian_swap:2;
+ unsigned mega_fetch:1;
+
+ void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
+};
+
+class sb_context {
+
+public:
+
+ r600_isa *isa;
+
+ sb_hw_chip hw_chip;
+ sb_hw_class hw_class;
+
+ unsigned alu_temp_gprs;
+ unsigned max_fetch;
+ bool has_trans;
+ unsigned vtx_src_num;
+ unsigned num_slots;
+ bool uses_mova_gpr;
+
+ unsigned stack_entry_size;
+
+ static unsigned dump_pass;
+ static unsigned dump_stat;
+
+ static unsigned dry_run;
+
+ static unsigned dskip_start;
+ static unsigned dskip_end;
+ static unsigned dskip_mode;
+
+ sb_context()
+ : isa(0), hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
+
+ int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
+
+ bool is_r600() {return hw_class == HW_CLASS_R600;}
+ bool is_r700() {return hw_class == HW_CLASS_R700;}
+ bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
+ bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
+ bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
+
+ sb_hw_class_bits hw_class_bit() {
+ switch (hw_class) {
+ case HW_CLASS_R600:return HB_R6;
+ case HW_CLASS_R700:return HB_R7;
+ case HW_CLASS_EVERGREEN:return HB_EG;
+ case HW_CLASS_CAYMAN:return HB_CM;
+ default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
+
+ }
+ }
+
+ unsigned cf_opcode(unsigned op) {
+ return r600_isa_cf_opcode(isa->hw_class, op);
+ }
+
+ unsigned alu_opcode(unsigned op) {
+ return r600_isa_alu_opcode(isa->hw_class, op);
+ }
+
+ unsigned alu_slots(unsigned op) {
+ return r600_isa_alu_slots(isa->hw_class, op);
+ }
+
+ unsigned alu_slots(const alu_op_info * op_ptr) {
+ return op_ptr->slots[isa->hw_class];
+ }
+
+ unsigned alu_slots_mask(const alu_op_info * op_ptr) {
+ unsigned mask = 0;
+ unsigned slot_flags = alu_slots(op_ptr);
+ if (slot_flags & AF_V)
+ mask = 0b01111;
+ if (!is_cayman() && (slot_flags & AF_S))
+ mask |= 0b10000;
+ return mask;
+ }
+
+ unsigned fetch_opcode(unsigned op) {
+ return r600_isa_fetch_opcode(isa->hw_class, op);
+ }
+
+ bool is_kcache_sel(unsigned sel) {
+ return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
+ }
+
+};
+
+#define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
+#define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
+
+class bc_decoder {
+
+ sb_context &ctx;
+
+ uint32_t* dw;
+ unsigned ndw;
+
+public:
+
+ bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
+ : ctx(sctx), dw(data), ndw(size) {}
+
+ int decode_cf(unsigned &i, bc_cf &bc);
+ int decode_alu(unsigned &i, bc_alu &bc);
+ int decode_fetch(unsigned &i, bc_fetch &bc);
+
+private:
+ int decode_cf_alu(unsigned &i, bc_cf &bc);
+ int decode_cf_exp(unsigned &i, bc_cf &bc);
+ int decode_cf_mem(unsigned &i, bc_cf &bc);
+
+ int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
+};
+
+// bytecode format definition
+
+class hw_encoding_format {
+ const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
+ hw_encoding_format();
+protected:
+ uint32_t value;
+public:
+ hw_encoding_format(sb_hw_class_bits hw)
+ : hw_target(hw), value(0) {}
+ hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
+ : hw_target(hw), value(v) {}
+ uint32_t get_value(sb_hw_class_bits hw) const {
+ assert((hw & hw_target) == hw);
+ return value;
+ }
+};
+
+#define BC_FORMAT_BEGIN_HW(fmt, hwset) \
+class fmt##_##hwset : public hw_encoding_format {\
+ typedef fmt##_##hwset thistype; \
+public: \
+ fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
+ fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
+
+#define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
+
+#define BC_FORMAT_END(fmt) };
+
+// bytecode format field definition
+
+#define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
+ thistype & name(unsigned v) { \
+ value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
+ return *this; \
+ } \
+ unsigned get_##name() const { \
+ return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
+ } \
+
+#define BC_RSRVD(fmt, last_bit, first_bit)
+
+// CLAMP macro defined elsewhere interferes with bytecode field name
+#undef CLAMP
+
+#include "sb_bc_fmt_def.inc"
+
+#undef BC_FORMAT_BEGIN
+#undef BC_FORMAT_END
+#undef BC_FIELD
+#undef BC_RSRVD
+
+class bc_parser {
+ sb_context & ctx;
+
+ bc_decoder *dec;
+
+ r600_bytecode *bc;
+ r600_shader *pshader;
+
+ uint32_t *dw;
+ unsigned bc_ndw;
+
+ unsigned max_cf;
+
+ shader *sh;
+
+ int error;
+
+ alu_node *slots[2][5];
+ unsigned cgroup;
+
+ typedef std::vector<cf_node*> id_cf_map;
+ id_cf_map cf_map;
+
+ typedef std::stack<region_node*> region_stack;
+ region_stack loop_stack;
+
+ int enable_dump;
+ int optimize;
+
+public:
+
+ bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader,
+ int dump_source, int optimize) :
+ ctx(sctx), dec(), bc(bc), pshader(pshader),
+ dw(), bc_ndw(), max_cf(),
+ sh(), error(), slots(), cgroup(),
+ cf_map(), loop_stack(), enable_dump(dump_source),
+ optimize(optimize) { }
+
+ int parse();
+
+ shader* get_shader() { assert(!error); return sh; }
+
+private:
+
+ int parse_shader();
+
+ int parse_decls();
+
+ int parse_cf(unsigned &i, bool &eop);
+
+ int parse_alu_clause(cf_node *cf);
+ int parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
+
+ int parse_fetch_clause(cf_node *cf);
+
+ int prepare_ir();
+ int prepare_loop(cf_node *c);
+ int prepare_if(cf_node *c);
+ int prepare_alu_clause(cf_node *c);
+
+};
+
+
+
+
+class bytecode {
+ typedef std::vector<uint32_t> bc_vector;
+ sb_hw_class_bits hw_class_bit;
+
+ bc_vector bc;
+
+ unsigned pos;
+
+public:
+
+ bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
+ : hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
+
+ unsigned ndw() { return bc.size(); }
+
+ void write_data(uint32_t* dst) {
+ memcpy(dst, bc.data(), 4 * bc.size());
+ }
+
+ void align(unsigned a) {
+ unsigned size = bc.size();
+ size = (size + a - 1) & ~(a-1);
+ bc.resize(size);
+ }
+
+ void set_size(unsigned sz) {
+ assert(sz >= bc.size());
+ bc.resize(sz);
+ }
+
+ void seek(unsigned p) {
+ if (p != pos) {
+ if (p > bc.size()) {
+ bc.resize(p);
+ }
+ pos = p;
+ }
+ }
+
+ unsigned get_pos() { return pos; }
+ uint32_t *data() { return bc.data(); }
+
+ bytecode & operator <<(uint32_t v) {
+ if (pos == ndw()) {
+ bc.push_back(v);
+ } else
+ bc.at(pos) = v;
+ ++pos;
+ return *this;
+ }
+
+ bytecode & operator <<(const hw_encoding_format &e) {
+ *this << e.get_value(hw_class_bit);
+ return *this;
+ }
+
+ bytecode & operator <<(const bytecode &b) {
+ bc.insert(bc.end(), b.bc.begin(), b.bc.end());
+ return *this;
+ }
+
+ uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
+};
+
+
+class bc_builder {
+ shader &sh;
+ sb_context &ctx;
+ bytecode bb;
+ int error;
+
+public:
+
+ bc_builder(shader &s);
+ int build();
+ bytecode& get_bytecode() { assert(!error); return bb; }
+
+private:
+
+ int build_cf(cf_node *n);
+
+ int build_cf_alu(cf_node *n);
+ int build_cf_mem(cf_node *n);
+ int build_cf_exp(cf_node *n);
+
+ int build_alu_clause(cf_node *n);
+ int build_alu_group(alu_group_node *n);
+ int build_alu(alu_node *n);
+
+ int build_fetch_clause(cf_node *n);
+ int build_fetch_tex(fetch_node *n);
+ int build_fetch_vtx(fetch_node *n);
+};
+
+} // namespace r600_sb
+
+#endif /* SB_BC_H_ */