diff options
Diffstat (limited to 'src/broadcom/compiler/v3d_compiler.h')
-rw-r--r-- | src/broadcom/compiler/v3d_compiler.h | 927 |
1 files changed, 927 insertions, 0 deletions
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h new file mode 100644 index 00000000000..e0eeefe245a --- /dev/null +++ b/src/broadcom/compiler/v3d_compiler.h @@ -0,0 +1,927 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef V3D_COMPILER_H +#define V3D_COMPILER_H + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdint.h> +#include <string.h> + +#include "util/macros.h" +#include "common/v3d_debug.h" +#include "compiler/nir/nir.h" +#include "util/list.h" +#include "util/u_math.h" + +#include "qpu/qpu_instr.h" +#include "pipe/p_state.h" + +#define V3D_MAX_TEXTURE_SAMPLERS 32 +#define V3D_MAX_SAMPLES 4 +#define V3D_MAX_FS_INPUTS 64 +#define V3D_MAX_VS_INPUTS 64 + +struct nir_builder; + +struct v3d_fs_inputs { + /** + * Array of the meanings of the VPM inputs this shader needs. + * + * It doesn't include those that aren't part of the VPM, like + * point/line coordinates. + */ + struct v3d_varying_slot *input_slots; + uint32_t num_inputs; +}; + +enum qfile { + /** An unused source or destination register. */ + QFILE_NULL, + + /** A physical register, such as the W coordinate payload. */ + QFILE_REG, + /** One of the regsiters for fixed function interactions. */ + QFILE_MAGIC, + + /** + * A virtual register, that will be allocated to actual accumulator + * or physical registers later. + */ + QFILE_TEMP, + QFILE_VARY, + QFILE_UNIF, + QFILE_TLB, + QFILE_TLBU, + + /** + * VPM reads use this with an index value to say what part of the VPM + * is being read. + */ + QFILE_VPM, + + /** + * Stores an immediate value in the index field that will be used + * directly by qpu_load_imm(). + */ + QFILE_LOAD_IMM, + + /** + * Stores an immediate value in the index field that can be turned + * into a small immediate field by qpu_encode_small_immediate(). + */ + QFILE_SMALL_IMM, +}; + +/** + * A reference to a QPU register or a virtual temp register. + */ +struct qreg { + enum qfile file; + uint32_t index; +}; + +static inline struct qreg vir_reg(enum qfile file, uint32_t index) +{ + return (struct qreg){file, index}; +} + +/** + * A reference to an actual register at the QPU level, for register + * allocation. + */ +struct qpu_reg { + bool magic; + int index; +}; + +struct qinst { + /** Entry in qblock->instructions */ + struct list_head link; + + /** + * The instruction being wrapped. Its condition codes, pack flags, + * signals, etc. will all be used, with just the register references + * being replaced by the contents of qinst->dst and qinst->src[]. + */ + struct v3d_qpu_instr qpu; + + /* Pre-register-allocation references to src/dst registers */ + struct qreg dst; + struct qreg src[3]; + bool cond_is_exec_mask; + bool has_implicit_uniform; + + /* After vir_to_qpu.c: If instr reads a uniform, which uniform from + * the uncompiled stream it is. + */ + int uniform; +}; + +enum quniform_contents { + /** + * Indicates that a constant 32-bit value is copied from the program's + * uniform contents. + */ + QUNIFORM_CONSTANT, + /** + * Indicates that the program's uniform contents are used as an index + * into the GL uniform storage. + */ + QUNIFORM_UNIFORM, + + /** @{ + * Scaling factors from clip coordinates to relative to the viewport + * center. + * + * This is used by the coordinate and vertex shaders to produce the + * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed + * point offsets from the viewport ccenter. + */ + QUNIFORM_VIEWPORT_X_SCALE, + QUNIFORM_VIEWPORT_Y_SCALE, + /** @} */ + + QUNIFORM_VIEWPORT_Z_OFFSET, + QUNIFORM_VIEWPORT_Z_SCALE, + + QUNIFORM_USER_CLIP_PLANE, + + /** + * A reference to a texture config parameter 0 uniform. + * + * This is a uniform implicitly loaded with a QPU_W_TMU* write, which + * defines texture type, miplevels, and such. It will be found as a + * parameter to the first QOP_TEX_[STRB] instruction in a sequence. + */ + QUNIFORM_TEXTURE_CONFIG_P0_0, + QUNIFORM_TEXTURE_CONFIG_P0_1, + QUNIFORM_TEXTURE_CONFIG_P0_2, + QUNIFORM_TEXTURE_CONFIG_P0_3, + QUNIFORM_TEXTURE_CONFIG_P0_4, + QUNIFORM_TEXTURE_CONFIG_P0_5, + QUNIFORM_TEXTURE_CONFIG_P0_6, + QUNIFORM_TEXTURE_CONFIG_P0_7, + QUNIFORM_TEXTURE_CONFIG_P0_8, + QUNIFORM_TEXTURE_CONFIG_P0_9, + QUNIFORM_TEXTURE_CONFIG_P0_10, + QUNIFORM_TEXTURE_CONFIG_P0_11, + QUNIFORM_TEXTURE_CONFIG_P0_12, + QUNIFORM_TEXTURE_CONFIG_P0_13, + QUNIFORM_TEXTURE_CONFIG_P0_14, + QUNIFORM_TEXTURE_CONFIG_P0_15, + QUNIFORM_TEXTURE_CONFIG_P0_16, + QUNIFORM_TEXTURE_CONFIG_P0_17, + QUNIFORM_TEXTURE_CONFIG_P0_18, + QUNIFORM_TEXTURE_CONFIG_P0_19, + QUNIFORM_TEXTURE_CONFIG_P0_20, + QUNIFORM_TEXTURE_CONFIG_P0_21, + QUNIFORM_TEXTURE_CONFIG_P0_22, + QUNIFORM_TEXTURE_CONFIG_P0_23, + QUNIFORM_TEXTURE_CONFIG_P0_24, + QUNIFORM_TEXTURE_CONFIG_P0_25, + QUNIFORM_TEXTURE_CONFIG_P0_26, + QUNIFORM_TEXTURE_CONFIG_P0_27, + QUNIFORM_TEXTURE_CONFIG_P0_28, + QUNIFORM_TEXTURE_CONFIG_P0_29, + QUNIFORM_TEXTURE_CONFIG_P0_30, + QUNIFORM_TEXTURE_CONFIG_P0_31, + QUNIFORM_TEXTURE_CONFIG_P0_32, + + /** + * A reference to a texture config parameter 1 uniform. + * + * This is a uniform implicitly loaded with a QPU_W_TMU* write, which + * defines texture width, height, filters, and wrap modes. It will be + * found as a parameter to the second QOP_TEX_[STRB] instruction in a + * sequence. + */ + QUNIFORM_TEXTURE_CONFIG_P1, + + QUNIFORM_TEXTURE_FIRST_LEVEL, + + QUNIFORM_TEXTURE_WIDTH, + QUNIFORM_TEXTURE_HEIGHT, + QUNIFORM_TEXTURE_DEPTH, + QUNIFORM_TEXTURE_ARRAY_SIZE, + QUNIFORM_TEXTURE_LEVELS, + + QUNIFORM_TEXTURE_MSAA_ADDR, + + QUNIFORM_UBO_ADDR, + + QUNIFORM_TEXRECT_SCALE_X, + QUNIFORM_TEXRECT_SCALE_Y, + + QUNIFORM_TEXTURE_BORDER_COLOR, + + QUNIFORM_STENCIL, + + QUNIFORM_ALPHA_REF, + QUNIFORM_SAMPLE_MASK, +}; + +struct v3d_varying_slot { + uint8_t slot_and_component; +}; + +static inline struct v3d_varying_slot +v3d_slot_from_slot_and_component(uint8_t slot, uint8_t component) +{ + assert(slot < 255 / 4); + return (struct v3d_varying_slot){ (slot << 2) + component }; +} + +static inline uint8_t v3d_slot_get_slot(struct v3d_varying_slot slot) +{ + return slot.slot_and_component >> 2; +} + +static inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot) +{ + return slot.slot_and_component & 3; +} + +struct v3d_ubo_range { + /** + * offset in bytes from the start of the ubo where this range is + * uploaded. + * + * Only set once used is set. + */ + uint32_t dst_offset; + + /** + * offset in bytes from the start of the gallium uniforms where the + * data comes from. + */ + uint32_t src_offset; + + /** size in bytes of this ubo range */ + uint32_t size; +}; + +struct v3d_key { + void *shader_state; + struct { + uint8_t swizzle[4]; + uint8_t return_size; + uint8_t return_channels; + union { + struct { + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned wrap_s:3; + unsigned wrap_t:3; + }; + struct { + uint16_t msaa_width, msaa_height; + }; + }; + } tex[V3D_MAX_TEXTURE_SAMPLERS]; + uint8_t ucp_enables; +}; + +struct v3d_fs_key { + struct v3d_key base; + bool depth_enabled; + bool is_points; + bool is_lines; + bool alpha_test; + bool point_coord_upper_left; + bool light_twoside; + bool msaa; + bool sample_coverage; + bool sample_alpha_to_coverage; + bool sample_alpha_to_one; + bool clamp_color; + bool swap_color_rb; + uint8_t alpha_test_func; + uint8_t logicop_func; + uint32_t point_sprite_mask; + + struct pipe_rt_blend_state blend; +}; + +struct v3d_vs_key { + struct v3d_key base; + + struct v3d_varying_slot fs_inputs[V3D_MAX_FS_INPUTS]; + uint8_t num_fs_inputs; + + bool is_coord; + bool per_vertex_point_size; + bool clamp_color; +}; + +/** A basic block of VIR intructions. */ +struct qblock { + struct list_head link; + + struct list_head instructions; + + struct set *predecessors; + struct qblock *successors[2]; + + int index; + + /* Instruction IPs for the first and last instruction of the block. + * Set by qpu_schedule.c. + */ + uint32_t start_qpu_ip; + uint32_t end_qpu_ip; + + /* Instruction IP for the branch instruction of the block. Set by + * qpu_schedule.c. + */ + uint32_t branch_qpu_ip; + + /** Offset within the uniform stream at the start of the block. */ + uint32_t start_uniform; + /** Offset within the uniform stream of the branch instruction */ + uint32_t branch_uniform; + + /** @{ used by v3d_vir_live_variables.c */ + BITSET_WORD *def; + BITSET_WORD *use; + BITSET_WORD *live_in; + BITSET_WORD *live_out; + int start_ip, end_ip; + /** @} */ +}; + +/** + * Compiler state saved across compiler invocations, for any expensive global + * setup. + */ +struct v3d_compiler { + const struct v3d_device_info *devinfo; + struct ra_regs *regs; + unsigned int reg_class[3]; +}; + +struct v3d_compile { + const struct v3d_device_info *devinfo; + nir_shader *s; + nir_function_impl *impl; + struct exec_list *cf_node_list; + const struct v3d_compiler *compiler; + + /** + * Mapping from nir_register * or nir_ssa_def * to array of struct + * qreg for the values. + */ + struct hash_table *def_ht; + + /* For each temp, the instruction generating its value. */ + struct qinst **defs; + uint32_t defs_array_size; + + /** + * Inputs to the shader, arranged by TGSI declaration order. + * + * Not all fragment shader QFILE_VARY reads are present in this array. + */ + struct qreg *inputs; + struct qreg *outputs; + bool msaa_per_sample_output; + struct qreg color_reads[V3D_MAX_SAMPLES]; + struct qreg sample_colors[V3D_MAX_SAMPLES]; + uint32_t inputs_array_size; + uint32_t outputs_array_size; + uint32_t uniforms_array_size; + + /* Booleans for whether the corresponding QFILE_VARY[i] is + * flat-shaded. This doesn't count gl_FragColor flat-shading, which is + * controlled by shader->color_inputs and rasterizer->flatshade in the + * gallium driver. + */ + BITSET_WORD flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; + + struct v3d_ubo_range *ubo_ranges; + bool *ubo_range_used; + uint32_t ubo_ranges_array_size; + /** Number of uniform areas tracked in ubo_ranges. */ + uint32_t num_ubo_ranges; + uint32_t next_ubo_dst_offset; + + /* State for whether we're executing on each channel currently. 0 if + * yes, otherwise a block number + 1 that the channel jumped to. + */ + struct qreg execute; + + struct qreg line_x, point_x, point_y; + + /** + * Instance ID, which comes in before the vertex attribute payload if + * the shader record requests it. + */ + struct qreg iid; + + /** + * Vertex ID, which comes in before the vertex attribute payload + * (after Instance ID) if the shader record requests it. + */ + struct qreg vid; + + /* Fragment shader payload regs. */ + struct qreg payload_w, payload_w_centroid, payload_z; + + /** boolean (~0 -> true) if the fragment has been discarded. */ + struct qreg discard; + + uint8_t vattr_sizes[V3D_MAX_VS_INPUTS]; + uint32_t num_vpm_writes; + + /** + * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads. + * + * This includes those that aren't part of the VPM varyings, like + * point/line coordinates. + */ + struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS]; + + /** + * An entry per outputs[] in the VS indicating what the VARYING_SLOT_* + * of the output is. Used to emit from the VS in the order that the + * FS needs. + */ + struct v3d_varying_slot *output_slots; + + struct pipe_shader_state *shader_state; + struct v3d_key *key; + struct v3d_fs_key *fs_key; + struct v3d_vs_key *vs_key; + + /* Live ranges of temps. */ + int *temp_start, *temp_end; + + uint32_t *uniform_data; + enum quniform_contents *uniform_contents; + uint32_t uniform_array_size; + uint32_t num_uniforms; + uint32_t num_outputs; + uint32_t output_position_index; + nir_variable *output_color_var; + uint32_t output_point_size_index; + uint32_t output_sample_mask_index; + + struct qreg undef; + uint32_t num_temps; + + struct list_head blocks; + int next_block_index; + struct qblock *cur_block; + struct qblock *loop_cont_block; + struct qblock *loop_break_block; + + uint64_t *qpu_insts; + uint32_t qpu_inst_count; + uint32_t qpu_inst_size; + + /* For the FS, the number of varying inputs not counting the + * point/line varyings payload + */ + uint32_t num_inputs; + + /** + * Number of inputs from num_inputs remaining to be queued to the read + * FIFO in the VS/CS. + */ + uint32_t num_inputs_remaining; + + /* Number of inputs currently in the read FIFO for the VS/CS */ + uint32_t num_inputs_in_fifo; + + /** Next offset in the VPM to read from in the VS/CS */ + uint32_t vpm_read_offset; + + uint32_t program_id; + uint32_t variant_id; + + /* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH + * is used to hide texturing latency at the cost of limiting ourselves + * to the bottom half of physical reg space. + */ + bool fs_threaded; + + bool last_thrsw_at_top_level; + + bool failed; +}; + +struct v3d_uniform_list { + enum quniform_contents *contents; + uint32_t *data; + uint32_t count; +}; + +struct v3d_prog_data { + struct v3d_uniform_list uniforms; + + struct v3d_ubo_range *ubo_ranges; + uint32_t num_ubo_ranges; + uint32_t ubo_size; + + uint8_t num_inputs; + +}; + +struct v3d_vs_prog_data { + struct v3d_prog_data base; + + bool uses_iid, uses_vid; + + /* Number of components read from each vertex attribute. */ + uint8_t vattr_sizes[32]; + + /* Total number of components read, for the shader state record. */ + uint32_t vpm_input_size; + + /* Total number of components written, for the shader state record. */ + uint32_t vpm_output_size; +}; + +struct v3d_fs_prog_data { + struct v3d_prog_data base; + + struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS]; + + /** bitmask of which inputs are color inputs, for flat shade handling. */ + uint32_t color_inputs[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; + + /* Bitmask for whether the corresponding input is flat-shaded, + * independent of rasterizer (gl_FragColor) flat-shading. + */ + BITSET_WORD flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; + + bool writes_z; +}; + +/* Special nir_load_input intrinsic index for loading the current TLB + * destination color. + */ +#define V3D_NIR_TLB_COLOR_READ_INPUT 2000000000 + +#define V3D_NIR_MS_MASK_OUTPUT 2000000000 + +extern const nir_shader_compiler_options v3d_nir_options; + +const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo); +void v3d_compiler_free(const struct v3d_compiler *compiler); +void v3d_optimize_nir(struct nir_shader *s); + +uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler, + struct v3d_vs_key *key, + struct v3d_vs_prog_data *prog_data, + nir_shader *s, + int program_id, int variant_id, + uint32_t *final_assembly_size); + +uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler, + struct v3d_fs_key *key, + struct v3d_fs_prog_data *prog_data, + nir_shader *s, + int program_id, int variant_id, + uint32_t *final_assembly_size); + +void v3d_nir_to_vir(struct v3d_compile *c); + +void vir_compile_destroy(struct v3d_compile *c); +const char *vir_get_stage_name(struct v3d_compile *c); +struct qblock *vir_new_block(struct v3d_compile *c); +void vir_set_emit_block(struct v3d_compile *c, struct qblock *block); +void vir_link_blocks(struct qblock *predecessor, struct qblock *successor); +struct qblock *vir_entry_block(struct v3d_compile *c); +struct qblock *vir_exit_block(struct v3d_compile *c); +struct qinst *vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, + struct qreg src0, struct qreg src1); +struct qinst *vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, + struct qreg src0, struct qreg src1); +struct qinst *vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src0); +void vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst); +struct qreg vir_uniform(struct v3d_compile *c, + enum quniform_contents contents, + uint32_t data); +void vir_schedule_instructions(struct v3d_compile *c); +struct v3d_qpu_instr v3d_qpu_nop(void); + +struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst); +struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst); +void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond); +void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf); +void vir_set_unpack(struct qinst *inst, int src, + enum v3d_qpu_input_unpack unpack); + +struct qreg vir_get_temp(struct v3d_compile *c); +void vir_calculate_live_intervals(struct v3d_compile *c); +bool vir_has_implicit_uniform(struct qinst *inst); +int vir_get_implicit_uniform_src(struct qinst *inst); +int vir_get_non_sideband_nsrc(struct qinst *inst); +int vir_get_nsrc(struct qinst *inst); +bool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst); +bool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op); +bool vir_get_mul_op(struct qinst *inst, enum v3d_qpu_mul_op *op); +bool vir_is_raw_mov(struct qinst *inst); +bool vir_is_tex(struct qinst *inst); +bool vir_is_add(struct qinst *inst); +bool vir_is_mul(struct qinst *inst); +bool vir_is_float_input(struct qinst *inst); +bool vir_depends_on_flags(struct qinst *inst); +bool vir_writes_r3(struct qinst *inst); +bool vir_writes_r4(struct qinst *inst); +struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); +uint8_t vir_channels_written(struct qinst *inst); + +void vir_dump(struct v3d_compile *c); +void vir_dump_inst(struct v3d_compile *c, struct qinst *inst); + +void vir_validate(struct v3d_compile *c); + +void vir_optimize(struct v3d_compile *c); +bool vir_opt_algebraic(struct v3d_compile *c); +bool vir_opt_constant_folding(struct v3d_compile *c); +bool vir_opt_copy_propagate(struct v3d_compile *c); +bool vir_opt_dead_code(struct v3d_compile *c); +bool vir_opt_peephole_sf(struct v3d_compile *c); +bool vir_opt_small_immediates(struct v3d_compile *c); +bool vir_opt_vpm(struct v3d_compile *c); +void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c); +void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c); +void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c); +void vir_lower_uniforms(struct v3d_compile *c); + +void v3d_vir_to_qpu(struct v3d_compile *c); +uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c); +void qpu_validate(struct v3d_compile *c); +struct qpu_reg *v3d_register_allocate(struct v3d_compile *c); +bool vir_init_reg_sets(struct v3d_compiler *compiler); + +void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf); + +static inline bool +quniform_contents_is_texture_p0(enum quniform_contents contents) +{ + return (contents >= QUNIFORM_TEXTURE_CONFIG_P0_0 && + contents < (QUNIFORM_TEXTURE_CONFIG_P0_0 + + V3D_MAX_TEXTURE_SAMPLERS)); +} + +static inline struct qreg +vir_uniform_ui(struct v3d_compile *c, uint32_t ui) +{ + return vir_uniform(c, QUNIFORM_CONSTANT, ui); +} + +static inline struct qreg +vir_uniform_f(struct v3d_compile *c, float f) +{ + return vir_uniform(c, QUNIFORM_CONSTANT, fui(f)); +} + +#define VIR_ALU0(name, vir_inst, op) \ +static inline struct qreg \ +vir_##name(struct v3d_compile *c) \ +{ \ + return vir_emit_def(c, vir_inst(op, c->undef, \ + c->undef, c->undef)); \ +} \ +static inline struct qinst * \ +vir_##name##_dest(struct v3d_compile *c, struct qreg dest) \ +{ \ + return vir_emit_nondef(c, vir_inst(op, dest, \ + c->undef, c->undef)); \ +} + +#define VIR_ALU1(name, vir_inst, op) \ +static inline struct qreg \ +vir_##name(struct v3d_compile *c, struct qreg a) \ +{ \ + return vir_emit_def(c, vir_inst(op, c->undef, \ + a, c->undef)); \ +} \ +static inline struct qinst * \ +vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ + struct qreg a) \ +{ \ + return vir_emit_nondef(c, vir_inst(op, dest, a, \ + c->undef)); \ +} + +#define VIR_ALU2(name, vir_inst, op) \ +static inline struct qreg \ +vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ +{ \ + return vir_emit_def(c, vir_inst(op, c->undef, a, b)); \ +} \ +static inline struct qinst * \ +vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ + struct qreg a, struct qreg b) \ +{ \ + return vir_emit_nondef(c, vir_inst(op, dest, a, b)); \ +} + +#define VIR_NODST_1(name, vir_inst, op) \ +static inline struct qinst * \ +vir_##name(struct v3d_compile *c, struct qreg a) \ +{ \ + return vir_emit_nondef(c, vir_inst(op, c->undef, \ + a, c->undef)); \ +} + +#define VIR_NODST_2(name, vir_inst, op) \ +static inline struct qinst * \ +vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ +{ \ + return vir_emit_nondef(c, vir_inst(op, c->undef, \ + a, b)); \ +} + +#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name) +#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name) +#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name) +#define VIR_M_ALU1(name) VIR_ALU1(name, vir_mul_inst, V3D_QPU_M_##name) +#define VIR_A_ALU0(name) VIR_ALU0(name, vir_add_inst, V3D_QPU_A_##name) +#define VIR_M_ALU0(name) VIR_ALU0(name, vir_mul_inst, V3D_QPU_M_##name) +#define VIR_A_NODST_2(name) VIR_NODST_2(name, vir_add_inst, V3D_QPU_A_##name) +#define VIR_M_NODST_2(name) VIR_NODST_2(name, vir_mul_inst, V3D_QPU_M_##name) +#define VIR_A_NODST_1(name) VIR_NODST_1(name, vir_add_inst, V3D_QPU_A_##name) +#define VIR_M_NODST_1(name) VIR_NODST_1(name, vir_mul_inst, V3D_QPU_M_##name) + +VIR_A_ALU2(FADD) +VIR_A_ALU2(VFPACK) +VIR_A_ALU2(FSUB) +VIR_A_ALU2(FMIN) +VIR_A_ALU2(FMAX) + +VIR_A_ALU2(ADD) +VIR_A_ALU2(SUB) +VIR_A_ALU2(SHL) +VIR_A_ALU2(SHR) +VIR_A_ALU2(ASR) +VIR_A_ALU2(ROR) +VIR_A_ALU2(MIN) +VIR_A_ALU2(MAX) +VIR_A_ALU2(UMIN) +VIR_A_ALU2(UMAX) +VIR_A_ALU2(AND) +VIR_A_ALU2(OR) +VIR_A_ALU2(XOR) +VIR_A_ALU2(VADD) +VIR_A_ALU2(VSUB) +VIR_A_ALU1(NOT) +VIR_A_ALU1(NEG) +VIR_A_ALU1(FLAPUSH) +VIR_A_ALU1(FLBPUSH) +VIR_A_ALU1(FLBPOP) +VIR_A_ALU1(SETMSF) +VIR_A_ALU1(SETREVF) +VIR_A_ALU1(TIDX) +VIR_A_ALU1(EIDX) + +VIR_A_ALU0(FXCD) +VIR_A_ALU0(XCD) +VIR_A_ALU0(FYCD) +VIR_A_ALU0(YCD) +VIR_A_ALU0(MSF) +VIR_A_ALU0(REVF) +VIR_A_NODST_1(VPMSETUP) +VIR_A_ALU2(FCMP) +VIR_A_ALU2(VFMAX) + +VIR_A_ALU1(FROUND) +VIR_A_ALU1(FTOIN) +VIR_A_ALU1(FTRUNC) +VIR_A_ALU1(FTOIZ) +VIR_A_ALU1(FFLOOR) +VIR_A_ALU1(FTOUZ) +VIR_A_ALU1(FCEIL) +VIR_A_ALU1(FTOC) + +VIR_A_ALU1(FDX) +VIR_A_ALU1(FDY) + +VIR_A_ALU1(ITOF) +VIR_A_ALU1(CLZ) +VIR_A_ALU1(UTOF) + +VIR_M_ALU2(UMUL24) +VIR_M_ALU2(FMUL) +VIR_M_ALU2(SMUL24) +VIR_M_NODST_2(MULTOP) + +VIR_M_ALU1(MOV) +VIR_M_ALU1(FMOV) + +static inline struct qinst * +vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond, + struct qreg dest, struct qreg src) +{ + struct qinst *mov = vir_MOV_dest(c, dest, src); + vir_set_cond(mov, cond); + return mov; +} + +static inline struct qreg +vir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond, + struct qreg src0, struct qreg src1) +{ + struct qreg t = vir_get_temp(c); + vir_MOV_dest(c, t, src1); + vir_MOV_cond(c, cond, t, src0); + return t; +} + +static inline void +vir_VPM_WRITE(struct v3d_compile *c, struct qreg val) +{ + vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val); +} + +static inline struct qinst * +vir_NOP(struct v3d_compile *c) +{ + return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_NOP, + c->undef, c->undef, c->undef)); +} +/* +static inline struct qreg +vir_LOAD_IMM(struct v3d_compile *c, uint32_t val) +{ + return vir_emit_def(c, vir_inst(QOP_LOAD_IMM, c->undef, + vir_reg(QFILE_LOAD_IMM, val), c->undef)); +} + +static inline struct qreg +vir_LOAD_IMM_U2(struct v3d_compile *c, uint32_t val) +{ + return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_U2, c->undef, + vir_reg(QFILE_LOAD_IMM, val), + c->undef)); +} +static inline struct qreg +vir_LOAD_IMM_I2(struct v3d_compile *c, uint32_t val) +{ + return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_I2, c->undef, + vir_reg(QFILE_LOAD_IMM, val), + c->undef)); +} +*/ + +static inline struct qinst * +vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_cond cond) +{ + /* The actual uniform_data value will be set at scheduling time */ + return vir_emit_nondef(c, vir_branch_inst(cond, vir_uniform_ui(c, 0))); +} + +#define vir_for_each_block(block, c) \ + list_for_each_entry(struct qblock, block, &c->blocks, link) + +#define vir_for_each_block_rev(block, c) \ + list_for_each_entry_rev(struct qblock, block, &c->blocks, link) + +/* Loop over the non-NULL members of the successors array. */ +#define vir_for_each_successor(succ, block) \ + for (struct qblock *succ = block->successors[0]; \ + succ != NULL; \ + succ = (succ == block->successors[1] ? NULL : \ + block->successors[1])) + +#define vir_for_each_inst(inst, block) \ + list_for_each_entry(struct qinst, inst, &block->instructions, link) + +#define vir_for_each_inst_rev(inst, block) \ + list_for_each_entry_rev(struct qinst, inst, &block->instructions, link) + +#define vir_for_each_inst_safe(inst, block) \ + list_for_each_entry_safe(struct qinst, inst, &block->instructions, link) + +#define vir_for_each_inst_inorder(inst, c) \ + vir_for_each_block(_block, c) \ + vir_for_each_inst(inst, _block) + +#endif /* V3D_COMPILER_H */ |