diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 609 |
2 files changed, 296 insertions, 316 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index f8313c1cfce..2abd4cece03 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -627,9 +627,6 @@ public: private: void generate_code(const cfg_t *cfg); - void generate_vec4_instruction(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg *src); void generate_math1_gen4(vec4_instruction *inst, struct brw_reg dst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index a5c2d1a2f8c..c727f1a5c5f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -977,368 +977,351 @@ vec4_generator::generate_untyped_surface_read(vec4_instruction *inst, brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); } -/** - * Generate assembly for a Vec4 IR instruction. - * - * \param instruction The Vec4 IR instruction to generate code for. - * \param dst The destination register. - * \param src An array of up to three source registers. - */ void -vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, - struct brw_reg dst, - struct brw_reg *src) +vec4_generator::generate_code(const cfg_t *cfg) { - vec4_instruction *inst = (vec4_instruction *) instruction; - - if (dst.width == BRW_WIDTH_4) { - /* This happens in attribute fixups for "dual instanced" geometry - * shaders, since they use attributes that are vec4's. Since the exec - * width is only 4, it's essential that the caller set - * force_writemask_all in order to make sure the instruction is executed - * regardless of which channels are enabled. - */ - assert(inst->force_writemask_all); - - /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy - * the following register region restrictions (from Graphics BSpec: - * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions - * > Register Region Restrictions) - * - * 1. ExecSize must be greater than or equal to Width. - * - * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set - * to Width * HorzStride." - */ - for (int i = 0; i < 3; i++) { - if (src[i].file == BRW_GENERAL_REGISTER_FILE) - src[i] = stride(src[i], 4, 4, 1); - } - } - - switch (inst->opcode) { - case BRW_OPCODE_MOV: - brw_MOV(p, dst, src[0]); - break; - case BRW_OPCODE_ADD: - brw_ADD(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_MUL: - brw_MUL(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_MACH: - brw_MACH(p, dst, src[0], src[1]); - break; + struct annotation_info annotation; + memset(&annotation, 0, sizeof(annotation)); - case BRW_OPCODE_MAD: - assert(brw->gen >= 6); - brw_MAD(p, dst, src[0], src[1], src[2]); - break; + foreach_block_and_inst (block, vec4_instruction, inst, cfg) { + struct brw_reg src[3], dst; - case BRW_OPCODE_FRC: - brw_FRC(p, dst, src[0]); - break; - case BRW_OPCODE_RNDD: - brw_RNDD(p, dst, src[0]); - break; - case BRW_OPCODE_RNDE: - brw_RNDE(p, dst, src[0]); - break; - case BRW_OPCODE_RNDZ: - brw_RNDZ(p, dst, src[0]); - break; + if (unlikely(debug_flag)) + annotate(brw, &annotation, cfg, inst, p->next_insn_offset); - case BRW_OPCODE_AND: - brw_AND(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_OR: - brw_OR(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_XOR: - brw_XOR(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_NOT: - brw_NOT(p, dst, src[0]); - break; - case BRW_OPCODE_ASR: - brw_ASR(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_SHR: - brw_SHR(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_SHL: - brw_SHL(p, dst, src[0], src[1]); - break; + for (unsigned int i = 0; i < 3; i++) { + src[i] = inst->get_src(this->prog_data, i); + } + dst = inst->get_dst(); - case BRW_OPCODE_CMP: - brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); - break; - case BRW_OPCODE_SEL: - brw_SEL(p, dst, src[0], src[1]); - break; + brw_set_default_predicate_control(p, inst->predicate); + brw_set_default_predicate_inverse(p, inst->predicate_inverse); + brw_set_default_saturate(p, inst->saturate); + brw_set_default_mask_control(p, inst->force_writemask_all); + brw_set_default_acc_write_control(p, inst->writes_accumulator); - case BRW_OPCODE_DPH: - brw_DPH(p, dst, src[0], src[1]); - break; + unsigned pre_emit_nr_insn = p->nr_insn; - case BRW_OPCODE_DP4: - brw_DP4(p, dst, src[0], src[1]); - break; + if (dst.width == BRW_WIDTH_4) { + /* This happens in attribute fixups for "dual instanced" geometry + * shaders, since they use attributes that are vec4's. Since the exec + * width is only 4, it's essential that the caller set + * force_writemask_all in order to make sure the instruction is executed + * regardless of which channels are enabled. + */ + assert(inst->force_writemask_all); + + /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy + * the following register region restrictions (from Graphics BSpec: + * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions + * > Register Region Restrictions) + * + * 1. ExecSize must be greater than or equal to Width. + * + * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set + * to Width * HorzStride." + */ + for (int i = 0; i < 3; i++) { + if (src[i].file == BRW_GENERAL_REGISTER_FILE) + src[i] = stride(src[i], 4, 4, 1); + } + } - case BRW_OPCODE_DP3: - brw_DP3(p, dst, src[0], src[1]); - break; + switch (inst->opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, dst, src[0]); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MUL: + brw_MUL(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MACH: + brw_MACH(p, dst, src[0], src[1]); + break; - case BRW_OPCODE_DP2: - brw_DP2(p, dst, src[0], src[1]); - break; + case BRW_OPCODE_MAD: + assert(brw->gen >= 6); + brw_MAD(p, dst, src[0], src[1], src[2]); + break; - case BRW_OPCODE_F32TO16: - assert(brw->gen >= 7); - brw_F32TO16(p, dst, src[0]); - break; + case BRW_OPCODE_FRC: + brw_FRC(p, dst, src[0]); + break; + case BRW_OPCODE_RNDD: + brw_RNDD(p, dst, src[0]); + break; + case BRW_OPCODE_RNDE: + brw_RNDE(p, dst, src[0]); + break; + case BRW_OPCODE_RNDZ: + brw_RNDZ(p, dst, src[0]); + break; - case BRW_OPCODE_F16TO32: - assert(brw->gen >= 7); - brw_F16TO32(p, dst, src[0]); - break; + case BRW_OPCODE_AND: + brw_AND(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_OR: + brw_OR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_XOR: + brw_XOR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_NOT: + brw_NOT(p, dst, src[0]); + break; + case BRW_OPCODE_ASR: + brw_ASR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHR: + brw_SHR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHL: + brw_SHL(p, dst, src[0], src[1]); + break; - case BRW_OPCODE_LRP: - assert(brw->gen >= 6); - brw_LRP(p, dst, src[0], src[1], src[2]); - break; + case BRW_OPCODE_CMP: + brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); + break; + case BRW_OPCODE_SEL: + brw_SEL(p, dst, src[0], src[1]); + break; - case BRW_OPCODE_BFREV: - assert(brw->gen >= 7); - /* BFREV only supports UD type for src and dst. */ - brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), - retype(src[0], BRW_REGISTER_TYPE_UD)); - break; - case BRW_OPCODE_FBH: - assert(brw->gen >= 7); - /* FBH only supports UD type for dst. */ - brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); - break; - case BRW_OPCODE_FBL: - assert(brw->gen >= 7); - /* FBL only supports UD type for dst. */ - brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); - break; - case BRW_OPCODE_CBIT: - assert(brw->gen >= 7); - /* CBIT only supports UD type for dst. */ - brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); - break; - case BRW_OPCODE_ADDC: - assert(brw->gen >= 7); - brw_ADDC(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_SUBB: - assert(brw->gen >= 7); - brw_SUBB(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_MAC: - brw_MAC(p, dst, src[0], src[1]); - break; + case BRW_OPCODE_DPH: + brw_DPH(p, dst, src[0], src[1]); + break; - case BRW_OPCODE_BFE: - assert(brw->gen >= 7); - brw_BFE(p, dst, src[0], src[1], src[2]); - break; + case BRW_OPCODE_DP4: + brw_DP4(p, dst, src[0], src[1]); + break; - case BRW_OPCODE_BFI1: - assert(brw->gen >= 7); - brw_BFI1(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_BFI2: - assert(brw->gen >= 7); - brw_BFI2(p, dst, src[0], src[1], src[2]); - break; + case BRW_OPCODE_DP3: + brw_DP3(p, dst, src[0], src[1]); + break; - case BRW_OPCODE_IF: - if (inst->src[0].file != BAD_FILE) { - /* The instruction has an embedded compare (only allowed on gen6) */ - assert(brw->gen == 6); - gen6_IF(p, inst->conditional_mod, src[0], src[1]); - } else { - brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8); - brw_inst_set_pred_control(brw, if_inst, inst->predicate); - } - break; + case BRW_OPCODE_DP2: + brw_DP2(p, dst, src[0], src[1]); + break; - case BRW_OPCODE_ELSE: - brw_ELSE(p); - break; - case BRW_OPCODE_ENDIF: - brw_ENDIF(p); - break; + case BRW_OPCODE_F32TO16: + assert(brw->gen >= 7); + brw_F32TO16(p, dst, src[0]); + break; - case BRW_OPCODE_DO: - brw_DO(p, BRW_EXECUTE_8); - break; + case BRW_OPCODE_F16TO32: + assert(brw->gen >= 7); + brw_F16TO32(p, dst, src[0]); + break; - case BRW_OPCODE_BREAK: - brw_BREAK(p); - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - break; - case BRW_OPCODE_CONTINUE: - brw_CONT(p); - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - break; + case BRW_OPCODE_LRP: + assert(brw->gen >= 6); + brw_LRP(p, dst, src[0], src[1], src[2]); + break; - case BRW_OPCODE_WHILE: - brw_WHILE(p); - break; + case BRW_OPCODE_BFREV: + assert(brw->gen >= 7); + /* BFREV only supports UD type for src and dst. */ + brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), + retype(src[0], BRW_REGISTER_TYPE_UD)); + break; + case BRW_OPCODE_FBH: + assert(brw->gen >= 7); + /* FBH only supports UD type for dst. */ + brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); + break; + case BRW_OPCODE_FBL: + assert(brw->gen >= 7); + /* FBL only supports UD type for dst. */ + brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); + break; + case BRW_OPCODE_CBIT: + assert(brw->gen >= 7); + /* CBIT only supports UD type for dst. */ + brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); + break; + case BRW_OPCODE_ADDC: + assert(brw->gen >= 7); + brw_ADDC(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SUBB: + assert(brw->gen >= 7); + brw_SUBB(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MAC: + brw_MAC(p, dst, src[0], src[1]); + break; - case SHADER_OPCODE_RCP: - case SHADER_OPCODE_RSQ: - case SHADER_OPCODE_SQRT: - case SHADER_OPCODE_EXP2: - case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: - if (brw->gen >= 7) { - gen6_math(p, dst, brw_math_function(inst->opcode), src[0], - brw_null_reg()); - } else if (brw->gen == 6) { - generate_math_gen6(inst, dst, src[0], brw_null_reg()); - } else { - generate_math1_gen4(inst, dst, src[0]); - } - break; + case BRW_OPCODE_BFE: + assert(brw->gen >= 7); + brw_BFE(p, dst, src[0], src[1], src[2]); + break; - case SHADER_OPCODE_POW: - case SHADER_OPCODE_INT_QUOTIENT: - case SHADER_OPCODE_INT_REMAINDER: - if (brw->gen >= 7) { - gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); - } else if (brw->gen == 6) { - generate_math_gen6(inst, dst, src[0], src[1]); - } else { - generate_math2_gen4(inst, dst, src[0], src[1]); - } - break; + case BRW_OPCODE_BFI1: + assert(brw->gen >= 7); + brw_BFI1(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_BFI2: + assert(brw->gen >= 7); + brw_BFI2(p, dst, src[0], src[1], src[2]); + break; - case SHADER_OPCODE_TEX: - case SHADER_OPCODE_TXD: - case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_CMS: - case SHADER_OPCODE_TXF_MCS: - case SHADER_OPCODE_TXL: - case SHADER_OPCODE_TXS: - case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_OFFSET: - generate_tex(inst, dst, src[0], src[1]); - break; + case BRW_OPCODE_IF: + if (inst->src[0].file != BAD_FILE) { + /* The instruction has an embedded compare (only allowed on gen6) */ + assert(brw->gen == 6); + gen6_IF(p, inst->conditional_mod, src[0], src[1]); + } else { + brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8); + brw_inst_set_pred_control(brw, if_inst, inst->predicate); + } + break; - case VS_OPCODE_URB_WRITE: - generate_vs_urb_write(inst); - break; + case BRW_OPCODE_ELSE: + brw_ELSE(p); + break; + case BRW_OPCODE_ENDIF: + brw_ENDIF(p); + break; - case SHADER_OPCODE_GEN4_SCRATCH_READ: - generate_scratch_read(inst, dst, src[0]); - break; + case BRW_OPCODE_DO: + brw_DO(p, BRW_EXECUTE_8); + break; - case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - generate_scratch_write(inst, dst, src[0], src[1]); - break; + case BRW_OPCODE_BREAK: + brw_BREAK(p); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + break; + case BRW_OPCODE_CONTINUE: + brw_CONT(p); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + break; - case VS_OPCODE_PULL_CONSTANT_LOAD: - generate_pull_constant_load(inst, dst, src[0], src[1]); - break; + case BRW_OPCODE_WHILE: + brw_WHILE(p); + break; - case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: - generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); - break; + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + if (brw->gen >= 7) { + gen6_math(p, dst, brw_math_function(inst->opcode), src[0], + brw_null_reg()); + } else if (brw->gen == 6) { + generate_math_gen6(inst, dst, src[0], brw_null_reg()); + } else { + generate_math1_gen4(inst, dst, src[0]); + } + break; - case GS_OPCODE_URB_WRITE: - generate_gs_urb_write(inst); - break; + case SHADER_OPCODE_POW: + case SHADER_OPCODE_INT_QUOTIENT: + case SHADER_OPCODE_INT_REMAINDER: + if (brw->gen >= 7) { + gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); + } else if (brw->gen == 6) { + generate_math_gen6(inst, dst, src[0], src[1]); + } else { + generate_math2_gen4(inst, dst, src[0], src[1]); + } + break; - case GS_OPCODE_THREAD_END: - generate_gs_thread_end(inst); - break; + case SHADER_OPCODE_TEX: + case SHADER_OPCODE_TXD: + case SHADER_OPCODE_TXF: + case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_MCS: + case SHADER_OPCODE_TXL: + case SHADER_OPCODE_TXS: + case SHADER_OPCODE_TG4: + case SHADER_OPCODE_TG4_OFFSET: + generate_tex(inst, dst, src[0], src[1]); + break; - case GS_OPCODE_SET_WRITE_OFFSET: - generate_gs_set_write_offset(dst, src[0], src[1]); - break; + case VS_OPCODE_URB_WRITE: + generate_vs_urb_write(inst); + break; - case GS_OPCODE_SET_VERTEX_COUNT: - generate_gs_set_vertex_count(dst, src[0]); - break; + case SHADER_OPCODE_GEN4_SCRATCH_READ: + generate_scratch_read(inst, dst, src[0]); + break; - case GS_OPCODE_SET_DWORD_2_IMMED: - generate_gs_set_dword_2_immed(dst, src[0]); - break; + case SHADER_OPCODE_GEN4_SCRATCH_WRITE: + generate_scratch_write(inst, dst, src[0], src[1]); + break; - case GS_OPCODE_PREPARE_CHANNEL_MASKS: - generate_gs_prepare_channel_masks(dst); - break; + case VS_OPCODE_PULL_CONSTANT_LOAD: + generate_pull_constant_load(inst, dst, src[0], src[1]); + break; - case GS_OPCODE_SET_CHANNEL_MASKS: - generate_gs_set_channel_masks(dst, src[0]); - break; + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); + break; - case GS_OPCODE_GET_INSTANCE_ID: - generate_gs_get_instance_id(dst); - break; + case GS_OPCODE_URB_WRITE: + generate_gs_urb_write(inst); + break; - case SHADER_OPCODE_SHADER_TIME_ADD: - brw_shader_time_add(p, src[0], - prog_data->base.binding_table.shader_time_start); - brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.shader_time_start); - break; + case GS_OPCODE_THREAD_END: + generate_gs_thread_end(inst); + break; - case SHADER_OPCODE_UNTYPED_ATOMIC: - generate_untyped_atomic(inst, dst, src[0], src[1]); - break; + case GS_OPCODE_SET_WRITE_OFFSET: + generate_gs_set_write_offset(dst, src[0], src[1]); + break; - case SHADER_OPCODE_UNTYPED_SURFACE_READ: - generate_untyped_surface_read(inst, dst, src[0]); - break; + case GS_OPCODE_SET_VERTEX_COUNT: + generate_gs_set_vertex_count(dst, src[0]); + break; - case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: - generate_unpack_flags(inst, dst); - break; + case GS_OPCODE_SET_DWORD_2_IMMED: + generate_gs_set_dword_2_immed(dst, src[0]); + break; - default: - if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { - _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n", - opcode_descs[inst->opcode].name); - } else { - _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode); - } - abort(); - } -} + case GS_OPCODE_PREPARE_CHANNEL_MASKS: + generate_gs_prepare_channel_masks(dst); + break; -void -vec4_generator::generate_code(const cfg_t *cfg) -{ - struct annotation_info annotation; - memset(&annotation, 0, sizeof(annotation)); + case GS_OPCODE_SET_CHANNEL_MASKS: + generate_gs_set_channel_masks(dst, src[0]); + break; - foreach_block_and_inst (block, vec4_instruction, inst, cfg) { - struct brw_reg src[3], dst; + case GS_OPCODE_GET_INSTANCE_ID: + generate_gs_get_instance_id(dst); + break; - if (unlikely(debug_flag)) - annotate(brw, &annotation, cfg, inst, p->next_insn_offset); + case SHADER_OPCODE_SHADER_TIME_ADD: + brw_shader_time_add(p, src[0], + prog_data->base.binding_table.shader_time_start); + brw_mark_surface_used(&prog_data->base, + prog_data->base.binding_table.shader_time_start); + break; - for (unsigned int i = 0; i < 3; i++) { - src[i] = inst->get_src(this->prog_data, i); - } - dst = inst->get_dst(); + case SHADER_OPCODE_UNTYPED_ATOMIC: + generate_untyped_atomic(inst, dst, src[0], src[1]); + break; - brw_set_default_predicate_control(p, inst->predicate); - brw_set_default_predicate_inverse(p, inst->predicate_inverse); - brw_set_default_saturate(p, inst->saturate); - brw_set_default_mask_control(p, inst->force_writemask_all); - brw_set_default_acc_write_control(p, inst->writes_accumulator); + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + generate_untyped_surface_read(inst, dst, src[0]); + break; - unsigned pre_emit_nr_insn = p->nr_insn; + case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: + generate_unpack_flags(inst, dst); + break; - generate_vec4_instruction(inst, dst, src); + default: + if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { + _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n", + opcode_descs[inst->opcode].name); + } else { + _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode); + } + abort(); + } if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) { assert(p->nr_insn == pre_emit_nr_insn + 1 || |