aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/r600/r700_assembler.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r600/r700_assembler.h')
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.h516
1 files changed, 516 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
new file mode 100644
index 00000000000..73bb8bac55d
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -0,0 +1,516 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ */
+
+#ifndef _R700_ASSEMBLER_H_
+#define _R700_ASSEMBLER_H_
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+
+#include "r700_chip.h"
+#include "r700_shaderinst.h"
+#include "r700_shader.h"
+
+typedef enum SHADER_PIPE_TYPE
+{
+ SPT_VP = 0,
+ SPT_FP = 1
+} SHADER_PIPE_TYPE;
+
+typedef enum ConstantCycles
+{
+ NUMBER_OF_CYCLES = 3,
+ NUMBER_OF_COMPONENTS = 4
+} ConstantCycles;
+
+typedef enum HARDWARE_LIMIT_VALUES
+{
+ TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE,
+ MAX_TEMPORARY_REGISTERS = SQ_ALU_SRC_GPR_SIZE,
+ MAX_CONSTANT_REGISTERS = SQ_ALU_SRC_CFILE_SIZE,
+ CFILE_REGISTER_OFFSET = SQ_ALU_SRC_CFILE_BASE,
+ NUMBER_OF_INPUT_COLORS = 2,
+ NUMBER_OF_OUTPUT_COLORS = 8,
+ NUMBER_OF_TEXTURE_UNITS = 16,
+ MEGA_FETCH_BYTES = 32
+} HARDWARE_LIMIT_VALUES;
+
+typedef enum AddressMode
+{
+ ADDR_ABSOLUTE = 0,
+ ADDR_RELATIVE_A0 = 1,
+ ADDR_RELATIVE_FLI_0 = 2,
+ NUMBER_OF_ADDR_MOD = 3
+} AddressMode;
+
+typedef enum SrcRegisterType
+{
+ SRC_REG_TEMPORARY = 0,
+ SRC_REG_INPUT = 1,
+ SRC_REG_CONSTANT = 2,
+ SRC_REG_ALT_TEMPORARY = 3,
+ NUMBER_OF_SRC_REG_TYPE = 4
+} SrcRegisterType;
+
+typedef enum DstRegisterType
+{
+ DST_REG_TEMPORARY = 0,
+ DST_REG_A0 = 1,
+ DST_REG_OUT = 2,
+ DST_REG_OUT_X_REPL = 3,
+ DST_REG_ALT_TEMPORARY = 4,
+ DST_REG_INPUT = 5,
+ NUMBER_OF_DST_REG_TYPE = 6
+} DstRegisterType;
+
+typedef unsigned int BITS;
+
+typedef struct PVSDSTtag
+{
+ BITS opcode:8; //(:6) //@@@ really should be 10 bits for OP2
+ BITS math:1;
+ BITS predicated:1; //10 //8
+ BITS pred_inv :1; //11 //8
+
+ BITS rtype:3;
+ BITS reg:10; //24 //20
+
+ BITS writex:1;
+ BITS writey:1;
+ BITS writez:1;
+ BITS writew:1; //28
+
+ BITS op3:1; // 29 Represents *_OP3_* ALU opcode
+
+ BITS dualop:1; // 30 //26
+
+ BITS addrmode0:1; //31 //29
+ BITS addrmode1:1; //32
+} PVSDST;
+
+typedef struct PVSSRCtag
+{
+ BITS rtype:4;
+ BITS addrmode0:1;
+ BITS reg:10; //15 (8)
+ BITS swizzlex:3;
+ BITS swizzley:3;
+ BITS swizzlez:3;
+ BITS swizzlew:3; //27
+
+ BITS negx:1;
+ BITS negy:1;
+ BITS negz:1;
+ BITS negw:1; //31
+ //BITS addrsel:2;
+ BITS addrmode1:1; //32
+} PVSSRC;
+
+typedef struct PVSMATHtag
+{
+ BITS rtype:4;
+ BITS spare:1;
+ BITS reg:8;
+ BITS swizzlex:3;
+ BITS swizzley:3;
+ BITS dstoff:2; // 2 bits of dest offset into alt ram
+ BITS opcode:4;
+ BITS negx:1;
+ BITS negy:1;
+ BITS dstcomp:2; // select dest component
+ BITS spare2:3;
+} PVSMATH;
+
+typedef union PVSDWORDtag
+{
+ BITS bits;
+ PVSDST dst;
+ PVSSRC src;
+ PVSMATH math;
+ float f;
+} PVSDWORD;
+
+typedef struct VAP_OUT_VTX_FMT_0tag
+{
+ BITS pos:1; // 0
+ BITS misc:1;
+ BITS clip_dist0:1;
+ BITS clip_dist1:1;
+ BITS pos_param:1; // 4
+
+ BITS color0:1; // 5
+ BITS color1:1;
+ BITS color2:1;
+ BITS color3:1;
+ BITS color4:1;
+ BITS color5:1;
+ BITS color6:1;
+ BITS color7:1;
+
+ BITS normal:1;
+
+ BITS depth:1; // 14
+
+ BITS point_size:1; // 15
+ BITS edge_flag:1;
+ BITS rta_index:1; // shares same channel as kill_flag
+ BITS kill_flag:1;
+ BITS viewport_index:1; // 19
+
+ BITS resvd1:12; // 20
+} VAP_OUT_VTX_FMT_0;
+
+typedef struct VAP_OUT_VTX_FMT_1tag
+{
+ BITS tex0comp:3;
+ BITS tex1comp:3;
+ BITS tex2comp:3;
+ BITS tex3comp:3;
+ BITS tex4comp:3;
+ BITS tex5comp:3;
+ BITS tex6comp:3;
+ BITS tex7comp:3;
+
+ BITS resvd:8;
+} VAP_OUT_VTX_FMT_1;
+
+typedef struct VAP_OUT_VTX_FMT_2tag
+{
+ BITS tex8comp :3;
+ BITS tex9comp :3;
+ BITS tex10comp:3;
+ BITS tex11comp:3;
+ BITS tex12comp:3;
+ BITS tex13comp:3;
+ BITS tex14comp:3;
+ BITS tex15comp:3;
+
+ BITS resvd:8;
+} VAP_OUT_VTX_FMT_2;
+
+typedef struct OUT_FRAGMENT_FMT_0tag
+{
+ BITS color0:1;
+ BITS color1:1;
+ BITS color2:1;
+ BITS color3:1;
+ BITS color4:1;
+ BITS color5:1;
+ BITS color6:1;
+ BITS color7:1;
+
+ BITS depth:1;
+ BITS stencil_ref:1;
+ BITS coverage_to_mask:1;
+ BITS mask:1;
+
+ BITS resvd1:20;
+} OUT_FRAGMENT_FMT_0;
+
+typedef enum CF_CLAUSE_TYPE
+{
+ CF_EXPORT_CLAUSE,
+ CF_ALU_CLAUSE,
+ CF_TEX_CLAUSE,
+ CF_VTX_CLAUSE,
+ CF_OTHER_CLAUSE,
+ CF_EMPTY_CLAUSE,
+ NUMBER_CF_CLAUSE_TYPES
+} CF_CLAUSE_TYPE;
+
+enum
+{
+ MAX_BOOL_CONSTANTS = 32,
+ MAX_INT_CONSTANTS = 32,
+ MAX_FLOAT_CONSTANTS = 256,
+
+ FC_NONE = 0,
+ FC_IF = 1,
+ FC_LOOP = 2,
+ FC_REP = 3,
+
+ COND_NONE = 0,
+ COND_BOOL = 1,
+ COND_PRED = 2,
+ COND_ALU = 3,
+
+ SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
+ SAFEDIST_ALU = 6 ///< the same for alu->fc
+};
+
+typedef struct FC_LEVEL
+{
+ unsigned int first; ///< first fc instruction on level (if, rep, loop)
+ unsigned int* mid; ///< middle instructions - else or all breaks on this level
+ unsigned int midLen;
+ unsigned int type;
+ unsigned int cond;
+ unsigned int inv;
+ unsigned int bpush; ///< 1 if first instruction does branch stack push
+ int id; ///< id of bool or int variable
+} FC_LEVEL;
+
+typedef struct VTX_FETCH_METHOD
+{
+ GLboolean bEnableMini;
+ GLuint mega_fetch_remainder;
+} VTX_FETCH_METHOD;
+
+typedef struct r700_AssemblerBase
+{
+ R700ControlFlowSXClause* cf_last_export_ptr;
+ R700ControlFlowSXClause* cf_current_export_clause_ptr;
+ R700ControlFlowALUClause* cf_current_alu_clause_ptr;
+ R700ControlFlowGenericClause* cf_current_tex_clause_ptr;
+ R700ControlFlowGenericClause* cf_current_vtx_clause_ptr;
+ R700ControlFlowGenericClause* cf_current_cf_clause_ptr;
+
+ //Result shader
+ R700_Shader * pR700Shader;
+
+ // No clause has been created yet
+ CF_CLAUSE_TYPE cf_current_clause_type;
+
+ GLuint number_of_exports;
+ GLuint number_of_colorandz_exports;
+ GLuint number_of_export_opcodes;
+
+ PVSDWORD D;
+ PVSDWORD S[3];
+
+ unsigned int uLastPosUpdate;
+
+ OUT_FRAGMENT_FMT_0 fp_stOutFmt0;
+
+ unsigned int uIIns;
+ unsigned int uOIns;
+ unsigned int number_used_registers;
+ unsigned int uUsedConsts;
+
+ // Fragment programs
+ unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
+ unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
+ unsigned int uBoolConsts;
+ unsigned int uIntConsts;
+ unsigned int uInsts;
+ unsigned int uConsts;
+
+ // Vertex programs
+ unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX];
+ unsigned char ucVP_OutputMap[VERT_RESULT_MAX];
+
+ unsigned char * pucOutMask;
+
+ //-----------------------------------------------------------------------------------
+ // flow control members
+ //-----------------------------------------------------------------------------------
+ unsigned int FCSP;
+ FC_LEVEL fc_stack[32];
+
+ unsigned int branch_depth;
+ unsigned int max_branch_depth;
+
+ //-----------------------------------------------------------------------------------
+ // ArgSubst used in Assemble_Source() function
+ //-----------------------------------------------------------------------------------
+ int aArgSubst[4];
+
+ GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ];
+ GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ];
+ GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ];
+
+ GLuint uOutputs;
+
+ GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS];
+ GLint depth_export_register_number;
+
+ GLint stencil_export_register_number;
+ GLint coverage_to_mask_export_register_number;
+ GLint mask_export_register_number;
+
+ GLuint starting_export_register_number;
+ GLuint starting_vfetch_register_number;
+ GLuint starting_temp_register_number;
+ GLuint uHelpReg;
+ GLuint uFirstHelpReg;
+
+ GLboolean input_position_is_used;
+ GLboolean input_normal_is_used;
+
+ GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS];
+
+ GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS];
+
+ R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX];
+
+ GLuint number_of_inputs;
+
+ InstDeps *pInstDeps;
+
+ SHADER_PIPE_TYPE currentShaderType;
+ struct prog_instruction * pILInst;
+ GLuint uiCurInst;
+ GLboolean bR6xx;
+ /* helper to decide which type of instruction to assemble */
+ GLboolean is_tex;
+ /* we inserted helper intructions and need barrier on next TEX ins */
+ GLboolean need_tex_barrier;
+} r700_AssemblerBase;
+
+//Internal use
+BITS addrmode_PVSDST(PVSDST * pPVSDST);
+void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode);
+void nomask_PVSDST(PVSDST * pPVSDST);
+BITS addrmode_PVSSRC(PVSSRC* pPVSSRC);
+void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode);
+void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz);
+void noswizzle_PVSSRC(PVSSRC* pPVSSRC);
+void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w);
+void neg_PVSSRC(PVSSRC* pPVSSRC);
+void noneg_PVSSRC(PVSSRC* pPVSSRC);
+void flipneg_PVSSRC(PVSSRC* pPVSSRC);
+void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c);
+void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c);
+BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0);
+BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
+GLboolean is_reduction_opcode(PVSDWORD * dest);
+GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
+
+unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm);
+
+GLboolean IsTex(gl_inst_opcode Opcode);
+GLboolean IsAlu(gl_inst_opcode Opcode);
+int check_current_clause(r700_AssemblerBase* pAsm,
+ CF_CLAUSE_TYPE new_clause_type);
+GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
+ R700VertexInstruction* vertex_instruction_ptr);
+GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
+ R700TextureInstruction* tex_instruction_ptr);
+GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
+ GLuint gl_client_id,
+ GLuint destination_register,
+ GLuint number_of_elements,
+ GLenum dataElementType,
+ VTX_FETCH_METHOD* pFetchMethod);
+GLuint gethelpr(r700_AssemblerBase* pAsm);
+void resethelpr(r700_AssemblerBase* pAsm);
+void checkop_init(r700_AssemblerBase* pAsm);
+GLboolean mov_temp(r700_AssemblerBase* pAsm, int src);
+GLboolean checkop1(r700_AssemblerBase* pAsm);
+GLboolean checkop2(r700_AssemblerBase* pAsm);
+GLboolean checkop3(r700_AssemblerBase* pAsm);
+GLboolean assemble_src(r700_AssemblerBase *pAsm,
+ int src,
+ int fld);
+GLboolean assemble_dst(r700_AssemblerBase *pAsm);
+GLboolean tex_dst(r700_AssemblerBase *pAsm);
+GLboolean tex_src(r700_AssemblerBase *pAsm);
+GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized);
+void initialize(r700_AssemblerBase *pAsm);
+GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
+ int source_index,
+ PVSSRC* pSource,
+ BITS scalar_channel_index);
+GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
+ R700ALUInstruction* alu_instruction_ptr,
+ GLuint contiguous_slots_needed);
+void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
+ int source_index,
+ BITS* psrc_sel,
+ BITS* psrc_rel,
+ BITS* psrc_chan,
+ BITS* psrc_neg);
+int is_cfile(BITS sel);
+int is_const(BITS sel);
+int is_gpr(BITS sel);
+GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
+ GLuint sel,
+ GLuint chan);
+GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle);
+GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
+GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
+GLboolean check_scalar(r700_AssemblerBase* pAsm,
+ R700ALUInstruction* alu_instruction_ptr);
+GLboolean check_vector(r700_AssemblerBase* pAsm,
+ R700ALUInstruction* alu_instruction_ptr);
+GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
+GLboolean next_ins(r700_AssemblerBase *pAsm);
+GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
+GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
+GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
+GLboolean assemble_BAD(char *opcode_str);
+GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
+GLboolean assemble_COS(r700_AssemblerBase *pAsm);
+GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
+GLboolean assemble_DST(r700_AssemblerBase *pAsm);
+GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
+GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
+GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
+GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
+GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
+GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
+GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
+GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
+GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
+GLboolean assemble_MIN(r700_AssemblerBase *pAsm);
+GLboolean assemble_MOV(r700_AssemblerBase *pAsm);
+GLboolean assemble_MUL(r700_AssemblerBase *pAsm);
+GLboolean assemble_POW(r700_AssemblerBase *pAsm);
+GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
+GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
+GLboolean assemble_SIN(r700_AssemblerBase *pAsm);
+GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
+GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
+GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
+GLboolean assemble_STP(r700_AssemblerBase *pAsm);
+GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
+GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
+GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
+GLboolean assemble_IF(r700_AssemblerBase *pAsm);
+GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
+
+GLboolean Process_Export(r700_AssemblerBase* pAsm,
+ GLuint type,
+ GLuint export_starting_index,
+ GLuint export_count,
+ GLuint starting_register_number,
+ GLboolean is_depth_export);
+GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm,
+ BITS depth_channel_select);
+
+
+//Interface
+GLboolean AssembleInstr(GLuint uiNumberInsts,
+ struct prog_instruction *pILInst,
+ r700_AssemblerBase *pR700AsmCode);
+GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
+GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
+
+int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
+GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);
+
+#endif //_R700_ASSEMBLER_H_