/* * Copyright © 2011 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #ifndef BRW_VEC4_H #define BRW_VEC4_H #include #include "brw_shader.h" #include "main/compiler.h" #include "program/hash_table.h" extern "C" { #include "brw_vs.h" #include "brw_context.h" #include "brw_eu.h" }; #include "glsl/ir.h" namespace brw { class dst_reg; unsigned swizzle_for_size(int size); class reg { public: /** Register file: ARF, GRF, MRF, IMM. */ enum register_file file; /** virtual register number. 0 = fixed hw reg */ int reg; /** Offset within the virtual register. */ int reg_offset; /** Register type. BRW_REGISTER_TYPE_* */ int type; struct brw_reg fixed_hw_reg; /** Value for file == BRW_IMMMEDIATE_FILE */ union { int32_t i; uint32_t u; float f; } imm; }; class src_reg : public reg { public: /* Callers of this ralloc-based new need not call delete. It's * easier to just ralloc_free 'ctx' (or any of its ancestors). */ static void* operator new(size_t size, void *ctx) { void *node; node = ralloc_size(ctx, size); assert(node != NULL); return node; } void init(); src_reg(register_file file, int reg, const glsl_type *type); src_reg(); src_reg(float f); src_reg(uint32_t u); src_reg(int32_t i); bool equals(src_reg *r); bool is_zero() const; bool is_one() const; src_reg(class vec4_visitor *v, const struct glsl_type *type); explicit src_reg(dst_reg reg); GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */ bool negate; bool abs; src_reg *reladdr; }; class dst_reg : public reg { public: /* Callers of this ralloc-based new need not call delete. It's * easier to just ralloc_free 'ctx' (or any of its ancestors). */ static void* operator new(size_t size, void *ctx) { void *node; node = ralloc_size(ctx, size); assert(node != NULL); return node; } void init(); dst_reg(); dst_reg(register_file file, int reg); dst_reg(register_file file, int reg, const glsl_type *type, int writemask); dst_reg(struct brw_reg reg); dst_reg(class vec4_visitor *v, const struct glsl_type *type); explicit dst_reg(src_reg reg); int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ src_reg *reladdr; }; class vec4_instruction : public backend_instruction { public: /* Callers of this ralloc-based new need not call delete. It's * easier to just ralloc_free 'ctx' (or any of its ancestors). */ static void* operator new(size_t size, void *ctx) { void *node; node = rzalloc_size(ctx, size); assert(node != NULL); return node; } vec4_instruction(vec4_visitor *v, enum opcode opcode, dst_reg dst = dst_reg(), src_reg src0 = src_reg(), src_reg src1 = src_reg(), src_reg src2 = src_reg()); struct brw_reg get_dst(void); struct brw_reg get_src(int i); dst_reg dst; src_reg src[3]; bool saturate; bool force_writemask_all; bool no_dd_clear, no_dd_check; int conditional_mod; /**< BRW_CONDITIONAL_* */ int sampler; uint32_t texture_offset; /**< Texture Offset bitfield */ int target; /**< MRT target. */ bool shadow_compare; bool eot; bool header_present; int mlen; /**< SEND message length */ int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ uint32_t offset; /* spill/unspill offset */ /** @{ * Annotation for the generated IR. One of the two can be set. */ const void *ir; const char *annotation; bool is_send_from_grf(); bool can_reswizzle_dst(int dst_writemask, int swizzle, int swizzle_mask); void reswizzle_dst(int dst_writemask, int swizzle); }; /** * The vertex shader front-end. * * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and * fixed-function) into VS IR. */ class vec4_visitor : public backend_visitor { public: vec4_visitor(struct brw_context *brw, struct brw_vec4_compile *c, struct gl_program *prog, const struct brw_vec4_prog_key *key, struct brw_vec4_prog_data *prog_data, struct gl_shader_program *shader_prog, struct brw_shader *shader, void *mem_ctx, bool debug_flag); ~vec4_visitor(); dst_reg dst_null_f() { return dst_reg(brw_null_reg()); } dst_reg dst_null_d() { return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); } struct gl_program *prog; struct brw_vec4_compile *c; const struct brw_vec4_prog_key *key; struct brw_vec4_prog_data *prog_data; unsigned int sanity_param_count; char *fail_msg; bool failed; /** * GLSL IR currently being processed, which is associated with our * driver IR instructions for debugging purposes. */ const void *base_ir; const char *current_annotation; int *virtual_grf_sizes; int virtual_grf_count; int virtual_grf_array_size; int first_non_payload_grf; unsigned int max_grf; int *virtual_grf_start; int *virtual_grf_end; dst_reg userplane[MAX_CLIP_PLANES]; /** * This is the size to be used for an array with an element per * reg_offset */ int virtual_grf_reg_count; /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */ int *virtual_grf_reg_map; bool live_intervals_valid; dst_reg *variable_storage(ir_variable *var); void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); bool need_all_constants_in_pull_buffer; /** * \name Visit methods * * As typical for the visitor pattern, there must be one \c visit method for * each concrete subclass of \c ir_instruction. Virtual base classes within * the hierarchy should not have \c visit methods. */ /*@{*/ virtual void visit(ir_variable *); virtual void visit(ir_loop *); virtual void visit(ir_loop_jump *); virtual void visit(ir_function_signature *); virtual void visit(ir_function *); virtual void visit(ir_expression *); virtual void visit(ir_swizzle *); virtual void visit(ir_dereference_variable *); virtual void visit(ir_dereference_array *); virtual void visit(ir_dereference_record *); virtual void visit(ir_assignment *); virtual void visit(ir_constant *); virtual void visit(ir_call *); virtual void visit(ir_return *); virtual void visit(ir_discard *); virtual void visit(ir_texture *); virtual void visit(ir_if *); /*@}*/ src_reg result; /* Regs for vertex results. Generated at ir_variable visiting time * for the ir->location's used. */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; int uniform_size[MAX_UNIFORMS]; int uniform_vector_size[MAX_UNIFORMS]; int uniforms; src_reg shader_start_time; struct hash_table *variable_ht; bool run(void); void fail(const char *msg, ...); int virtual_grf_alloc(int size); void setup_uniform_clipplane_values(); void setup_uniform_values(ir_variable *ir); void setup_builtin_uniform_values(ir_variable *ir); int setup_uniforms(int payload_reg); void setup_payload(); bool reg_allocate_trivial(); bool reg_allocate(); void evaluate_spill_costs(float *spill_costs, bool *no_spill); int choose_spill_reg(struct ra_graph *g); void spill_reg(int spill_reg); void move_grf_array_access_to_scratch(); void move_uniform_array_access_to_pull_constants(); void move_push_constants_to_pull_constants(); void split_uniform_registers(); void pack_uniform_registers(); void calculate_live_intervals(); void split_virtual_grfs(); bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); bool opt_copy_propagation(); bool opt_algebraic(); bool opt_register_coalesce(); void opt_set_dependency_control(); void opt_schedule_instructions(); bool can_do_source_mods(vec4_instruction *inst); vec4_instruction *emit(vec4_instruction *inst); vec4_instruction *emit(enum opcode opcode); vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1, src_reg src2); vec4_instruction *emit_before(vec4_instruction *inst, vec4_instruction *new_inst); vec4_instruction *MOV(dst_reg dst, src_reg src0); vec4_instruction *NOT(dst_reg dst, src_reg src0); vec4_instruction *RNDD(dst_reg dst, src_reg src0); vec4_instruction *RNDE(dst_reg dst, src_reg src0); vec4_instruction *RNDZ(dst_reg dst, src_reg src0); vec4_instruction *FRC(dst_reg dst, src_reg src0); vec4_instruction *F32TO16(dst_reg dst, src_reg src0); vec4_instruction *F16TO32(dst_reg dst, src_reg src0); vec4_instruction *ADD(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *MUL(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *MACH(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *MAC(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *AND(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *OR(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *XOR(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *DP3(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *DP4(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *DPH(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *SHL(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *SHR(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *ASR(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition); vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition); vec4_instruction *IF(uint32_t predicate); vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index); vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index); vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index); vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x); vec4_instruction *BFREV(dst_reg dst, src_reg value); vec4_instruction *BFE(dst_reg dst, src_reg bits, src_reg offset, src_reg value); vec4_instruction *BFI1(dst_reg dst, src_reg bits, src_reg offset); vec4_instruction *BFI2(dst_reg dst, src_reg bfi1_dst, src_reg insert, src_reg base); vec4_instruction *FBH(dst_reg dst, src_reg value); vec4_instruction *FBL(dst_reg dst, src_reg value); vec4_instruction *CBIT(dst_reg dst, src_reg value); int implied_mrf_writes(vec4_instruction *inst); bool try_rewrite_rhs_to_dst(ir_assignment *ir, dst_reg dst, src_reg src, vec4_instruction *pre_rhs_inst, vec4_instruction *last_rhs_inst); bool try_copy_propagation(vec4_instruction *inst, int arg, src_reg *values[4]); /** Walks an exec_list of ir_instruction and sends it through this visitor. */ void visit_instructions(const exec_list *list); void emit_vp_sop(uint32_t condmod, dst_reg dst, src_reg src0, src_reg src1, src_reg one); void emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate); void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); void emit_if_gen6(ir_if *ir); void emit_minmax(uint32_t condmod, dst_reg dst, src_reg src0, src_reg src1); void emit_block_move(dst_reg *dst, src_reg *src, const struct glsl_type *type, uint32_t predicate); void emit_constant_values(dst_reg *dst, ir_constant *value); /** * Emit the correct dot-product instruction for the type of arguments */ void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); void emit_scalar(ir_instruction *ir, enum prog_opcode op, dst_reg dst, src_reg src0); void emit_scalar(ir_instruction *ir, enum prog_opcode op, dst_reg dst, src_reg src0, src_reg src1); void emit_scs(ir_instruction *ir, enum prog_opcode op, dst_reg dst, const src_reg &src); src_reg fix_3src_operand(src_reg src); void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src); void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src); void emit_math(enum opcode opcode, dst_reg dst, src_reg src); void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); src_reg fix_math_operand(src_reg src); void emit_pack_half_2x16(dst_reg dst, src_reg src0); void emit_unpack_half_2x16(dst_reg dst, src_reg src0); void swizzle_result(ir_texture *ir, src_reg orig_val, int sampler); void emit_ndc_computation(); void emit_psiz_and_flags(struct brw_reg reg); void emit_clip_distances(struct brw_reg reg, int offset); void emit_generic_urb_slot(dst_reg reg, int varying); void emit_urb_slot(int mrf, int varying); void emit_shader_time_begin(); void emit_shader_time_end(); void emit_shader_time_write(enum shader_time_shader_type type, src_reg value); src_reg get_scratch_offset(vec4_instruction *inst, src_reg *reladdr, int reg_offset); src_reg get_pull_constant_offset(vec4_instruction *inst, src_reg *reladdr, int reg_offset); void emit_scratch_read(vec4_instruction *inst, dst_reg dst, src_reg orig_src, int base_offset); void emit_scratch_write(vec4_instruction *inst, int base_offset); void emit_pull_constant_load(vec4_instruction *inst, dst_reg dst, src_reg orig_src, int base_offset); bool try_emit_sat(ir_expression *ir); bool try_emit_mad(ir_expression *ir, int mul_arg); void resolve_ud_negate(src_reg *reg); src_reg get_timestamp(); bool process_move_condition(ir_rvalue *ir); void dump_instruction(backend_instruction *inst); protected: void emit_vertex(); void lower_attributes_to_hw_regs(const int *attribute_map); virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0; virtual int setup_attributes(int payload_reg) = 0; virtual void emit_prolog() = 0; virtual void emit_program_code() = 0; virtual void emit_thread_end() = 0; virtual void emit_urb_write_header(int mrf) = 0; virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0; virtual int compute_array_stride(ir_dereference_array *ir); const bool debug_flag; }; class vec4_vs_visitor : public vec4_visitor { public: vec4_vs_visitor(struct brw_context *brw, struct brw_vs_compile *vs_compile, struct brw_vs_prog_data *vs_prog_data, struct gl_shader_program *prog, struct brw_shader *shader, void *mem_ctx); protected: virtual dst_reg *make_reg_for_system_value(ir_variable *ir); virtual int setup_attributes(int payload_reg); virtual void emit_prolog(); virtual void emit_program_code(); virtual void emit_thread_end(); virtual void emit_urb_write_header(int mrf); virtual vec4_instruction *emit_urb_write_opcode(bool complete); private: void setup_vp_regs(); dst_reg get_vp_dst_reg(const prog_dst_register &dst); src_reg get_vp_src_reg(const prog_src_register &src); struct brw_vs_compile * const vs_compile; struct brw_vs_prog_data * const vs_prog_data; src_reg *vp_temp_regs; src_reg vp_addr_reg; }; /** * The vertex shader code generator. * * Translates VS IR to actual i965 assembly code. */ class vec4_generator { public: vec4_generator(struct brw_context *brw, struct gl_shader_program *shader_prog, struct gl_program *prog, void *mem_ctx, bool debug_flag); ~vec4_generator(); const unsigned *generate_assembly(exec_list *insts, unsigned *asm_size); private: void generate_code(exec_list *instructions); void generate_vec4_instruction(vec4_instruction *inst, struct brw_reg dst, struct brw_reg *src); void generate_math1_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src); void generate_math1_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src); void generate_math2_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1); void generate_math2_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1); void generate_math2_gen7(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1); void generate_tex(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src); void generate_urb_write(vec4_instruction *inst); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); void generate_scratch_write(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src, struct brw_reg index); void generate_scratch_read(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index); void generate_pull_constant_load(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset); void generate_pull_constant_load_gen7(vec4_instruction *inst, struct brw_reg dst, struct brw_reg surf_index, struct brw_reg offset); struct brw_context *brw; struct intel_context *intel; struct gl_context *ctx; struct brw_compile *p; struct gl_shader_program *shader_prog; struct gl_shader *shader; const struct gl_program *prog; void *mem_ctx; const bool debug_flag; }; } /* namespace brw */ #endif /* BRW_VEC4_H */