diff options
Diffstat (limited to 'src/mesa')
34 files changed, 1001 insertions, 138 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index effcb6c1c4a..fd5227d2c55 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -663,11 +663,14 @@ #define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */ #define BRW_MESSAGE_TARGET_SAMPLER 2 #define BRW_MESSAGE_TARGET_GATEWAY 3 -#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 /* sampler cache on GEN6 */ -#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 /* render cache on Gen6 */ +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 #define BRW_MESSAGE_TARGET_URB 6 #define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 -#define BRW_MESSAGE_TARGET_CONST_CACHE 9 /* GEN6 */ + +#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE 4 +#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE 5 +#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE 9 #define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 #define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 @@ -749,7 +752,7 @@ #define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 #define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 -#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 #define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 #define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 #define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 @@ -760,7 +763,7 @@ #define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 #define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 #define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 -#define GEN6_DATAPORT_WRITE_MESSAGE_DWORLD_SCATTERED_WRITE 11 +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 #define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 #define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 #define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 111cb9974e1..af41c848308 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -967,12 +967,12 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) case BRW_MESSAGE_TARGET_DATAPORT_READ: if (gen >= 6) { format (file, " (%d, %d, %d, %d, %d, %d)", - inst->bits3.dp_render_cache.binding_table_index, - inst->bits3.dp_render_cache.msg_control, - inst->bits3.dp_render_cache.msg_type, - inst->bits3.dp_render_cache.send_commit_msg, - inst->bits3.dp_render_cache.msg_length, - inst->bits3.dp_render_cache.response_length); + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg, + inst->bits3.gen6_dp.msg_length, + inst->bits3.gen6_dp.response_length); } else if (gen >= 5 /* FINISHME: || is_g4x */) { format (file, " (%d, %d, %d)", inst->bits3.dp_read_gen5.binding_table_index, @@ -988,12 +988,12 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) case BRW_MESSAGE_TARGET_DATAPORT_WRITE: if (gen >= 6) { format (file, " (%d, %d, %d, %d, %d, %d)", - inst->bits3.dp_render_cache.binding_table_index, - inst->bits3.dp_render_cache.msg_control, - inst->bits3.dp_render_cache.msg_type, - inst->bits3.dp_render_cache.send_commit_msg, - inst->bits3.dp_render_cache.msg_length, - inst->bits3.dp_render_cache.response_length); + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg, + inst->bits3.gen6_dp.msg_length, + inst->bits3.gen6_dp.response_length); } else { format (file, " (%d, %d, %d, %d)", inst->bits3.dp_write.binding_table_index, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 859068ec4eb..633cc03388a 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -487,18 +487,18 @@ static void brw_set_dp_write_message( struct brw_context *brw, brw_set_src1(insn, brw_imm_ud(0)); if (intel->gen >= 6) { - insn->bits3.dp_render_cache.binding_table_index = binding_table_index; - insn->bits3.dp_render_cache.msg_control = msg_control; - insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; - insn->bits3.dp_render_cache.msg_type = msg_type; - insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; - insn->bits3.dp_render_cache.header_present = header_present; - insn->bits3.dp_render_cache.response_length = response_length; - insn->bits3.dp_render_cache.msg_length = msg_length; - insn->bits3.dp_render_cache.end_of_thread = end_of_thread; + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; + insn->bits3.gen6_dp.header_present = header_present; + insn->bits3.gen6_dp.response_length = response_length; + insn->bits3.gen6_dp.msg_length = msg_length; + insn->bits3.gen6_dp.end_of_thread = end_of_thread; /* We always use the render cache for write messages */ - insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; } else if (intel->gen == 5) { insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; insn->bits3.dp_write_gen5.msg_control = msg_control; @@ -541,19 +541,19 @@ brw_set_dp_read_message(struct brw_context *brw, uint32_t target_function; if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE) - target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */ + target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE; else - target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache */ - - insn->bits3.dp_render_cache.binding_table_index = binding_table_index; - insn->bits3.dp_render_cache.msg_control = msg_control; - insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0; - insn->bits3.dp_render_cache.msg_type = msg_type; - insn->bits3.dp_render_cache.send_commit_msg = 0; - insn->bits3.dp_render_cache.header_present = 1; - insn->bits3.dp_render_cache.response_length = response_length; - insn->bits3.dp_render_cache.msg_length = msg_length; - insn->bits3.dp_render_cache.end_of_thread = 0; + target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; + + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.pixel_scoreboard_clear = 0; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = 0; + insn->bits3.gen6_dp.header_present = 1; + insn->bits3.gen6_dp.response_length = response_length; + insn->bits3.gen6_dp.msg_length = msg_length; + insn->bits3.gen6_dp.end_of_thread = 0; insn->header.destreg__conditionalmod = target_function; } else if (intel->gen == 5) { insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; @@ -1711,7 +1711,7 @@ void brw_oword_block_read(struct brw_compile *p, bind_table_index, BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, - 0, /* source cache = data cache */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1, /* msg_length */ 1); /* response_length (1 reg, 2 owords!) */ @@ -1752,7 +1752,7 @@ void brw_dword_scattered_read(struct brw_compile *p, bind_table_index, BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, - 0, /* source cache = data cache */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2, /* msg_length */ 1); /* response_length */ } @@ -1806,7 +1806,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p, bind_table_index, 0, BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 0, /* source cache = data cache */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1, /* msg_length */ 1); /* response_length (1 Oword) */ } diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 8d4797fb675..6132bb91abe 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1704,7 +1704,7 @@ struct brw_instruction GLuint msg_length:4; GLuint pad1:2; GLuint end_of_thread:1; - } dp_render_cache; + } gen6_dp; struct { GLuint function_control:16; diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 5c9f57b4eac..4bedfacd632 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -74,6 +74,9 @@ tags: clean: rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak +test: default + @$(MAKE) -s -C tests/ + # Dummy target install: @echo -n "" diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c index 76c7c60d8f5..ddcdddf9e3c 100644 --- a/src/mesa/drivers/dri/r300/compiler/memory_pool.c +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c @@ -28,7 +28,7 @@ #define POOL_LARGE_ALLOC 4096 -#define POOL_ALIGN 4 +#define POOL_ALIGN 8 struct memory_block { diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index e2441e97d87..bb6c010e8e3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -109,8 +109,12 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { 0, 0 } }; - struct radeon_program_transformation native_rewrite_r500[] = { + struct radeon_program_transformation rewrite_if[] = { { &r500_transform_IF, 0 }, + {0, 0} + }; + + struct radeon_program_transformation native_rewrite_r500[] = { { &radeonTransformALU, 0 }, { &radeonTransformDeriv, 0 }, { &radeonTransformTrigScale, 0 }, @@ -135,6 +139,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, {"saturate output writes", 1, sat_out, rc_local_transform, saturate_output}, {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, + {"transform IF", 1, is_r500, rc_local_transform, rewrite_if}, {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use}, diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 5e0be6b8881..cf99f5e4538 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -30,6 +30,8 @@ #include <stdio.h> #include "radeon_compiler_util.h" +#include "radeon_list.h" +#include "radeon_variable.h" #include "../r300_reg.h" /** @@ -37,27 +39,143 @@ */ int r500_transform_IF( struct radeon_compiler * c, - struct rc_instruction * inst, - void* data) + struct rc_instruction * inst_if, + void *data) { - struct rc_instruction * inst_mov; + struct rc_variable * writer; + struct rc_list * writer_list, * list_ptr; + struct rc_list * var_list = rc_get_variables(c); + unsigned int generic_if = 0; + unsigned int alu_chan; - if (inst->U.I.Opcode != RC_OPCODE_IF) + if (inst_if->U.I.Opcode != RC_OPCODE_IF) { return 0; + } + + writer_list = rc_variable_list_get_writers( + var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); + if (!writer_list) { + generic_if = 1; + } else { + + /* Make sure it is safe for the writers to write to + * ALU Result */ + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + struct rc_instruction * inst; + writer = list_ptr->Item; + /* We are going to modify the destination register + * of writer, so if it has a reader other than + * inst_if (aka ReaderCount > 1) we must fall back to + * our generic IF. + * If the writer has a lower IP than inst_if, this + * means that inst_if is above the writer in a loop. + * I'm not sure why this would ever happen, but + * if it does we want to make sure we fall back + * to our generic IF. */ + if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) { + generic_if = 1; + break; + } + + /* The ALU Result is not preserved across IF + * instructions, so if there is another IF + * instruction between writer and inst_if, then + * we need to fall back to generic IF. */ + for (inst = writer->Inst; inst != inst_if; inst = inst->Next) { + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + if (info->IsFlowControl) { + generic_if = 1; + break; + } + } + if (generic_if) { + break; + } + } + } + + if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) { + alu_chan = RC_ALURESULT_X; + } else { + alu_chan = RC_ALURESULT_W; + } + if (generic_if) { + struct rc_instruction * inst_mov = + rc_insert_new_instruction(c, inst_if->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.DstReg.File = RC_FILE_NONE; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.WriteALUResult = alu_chan; + inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + if (alu_chan == RC_ALURESULT_X) { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + } else { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z); + } + } else { + rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER; + unsigned int reverse_srcs = 0; + unsigned int preserve_opcode = 0; + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + writer = list_ptr->Item; + switch(writer->Inst->U.I.Opcode) { + case RC_OPCODE_SEQ: + compare_func = RC_COMPARE_FUNC_EQUAL; + break; + case RC_OPCODE_SNE: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + break; + case RC_OPCODE_SLE: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SGE: + compare_func = RC_COMPARE_FUNC_GEQUAL; + break; + case RC_OPCODE_SGT: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SLT: + compare_func = RC_COMPARE_FUNC_LESS; + break; + default: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + preserve_opcode = 1; + break; + } + if (!preserve_opcode) { + writer->Inst->U.I.Opcode = RC_OPCODE_SUB; + } + writer->Inst->U.I.DstReg.WriteMask = 0; + writer->Inst->U.I.DstReg.File = RC_FILE_NONE; + writer->Inst->U.I.WriteALUResult = alu_chan; + writer->Inst->U.I.ALUResultCompare = compare_func; + if (reverse_srcs) { + struct rc_src_register temp_src; + temp_src = writer->Inst->U.I.SrcReg[0]; + writer->Inst->U.I.SrcReg[0] = + writer->Inst->U.I.SrcReg[1]; + writer->Inst->U.I.SrcReg[1] = temp_src; + } + } + } - inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.WriteMask = 0; - inst_mov->U.I.WriteALUResult = RC_ALURESULT_W; - inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X); - - inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL; - inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - inst->U.I.SrcReg[0].Negate = 0; + inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE( + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + inst_if->U.I.SrcReg[0].Negate = 0; return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 1e665e27641..6aa448cc6f7 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -44,7 +44,7 @@ extern struct rc_swizzle_caps r500_swizzle_caps; extern int r500_transform_IF( struct radeon_compiler * c, - struct rc_instruction * inst, + struct rc_instruction * inst_if, void* data); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index b077e7b7d65..c8001ba8c18 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -359,53 +359,71 @@ unsigned int rc_source_type_mask(unsigned int mask) return ret; } +struct src_select { + rc_register_file File; + int Index; + unsigned int SrcType; +}; + struct can_use_presub_data { - struct rc_src_register RemoveSrcs[3]; - unsigned int RGBCount; - unsigned int AlphaCount; + struct src_select Selects[5]; + unsigned int SelectCount; + const struct rc_src_register * ReplaceReg; + unsigned int ReplaceRemoved; }; +static void can_use_presub_data_add_select( + struct can_use_presub_data * data, + rc_register_file file, + unsigned int index, + unsigned int src_type) +{ + struct src_select * select; + + select = &data->Selects[data->SelectCount++]; + select->File = file; + select->Index = index; + select->SrcType = src_type; +} + +/** + * This callback function counts the number of sources in inst that are + * different from the sources in can_use_presub_data->RemoveSrcs. + */ static void can_use_presub_read_cb( void * userdata, struct rc_instruction * inst, - rc_register_file file, - unsigned int index, - unsigned int mask) + struct rc_src_register * src) { struct can_use_presub_data * d = userdata; - unsigned int src_type = rc_source_type_mask(mask); - unsigned int i; - if (file == RC_FILE_NONE) + if (!d->ReplaceRemoved && src == d->ReplaceReg) { + d->ReplaceRemoved = 1; return; - - for(i = 0; i < 3; i++) { - if (d->RemoveSrcs[i].File == file - && d->RemoveSrcs[i].Index == index) { - src_type &= - ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle); - } } - if (src_type & RC_SOURCE_RGB) - d->RGBCount++; + if (src->File == RC_FILE_NONE) + return; - if (src_type & RC_SOURCE_ALPHA) - d->AlphaCount++; + can_use_presub_data_add_select(d, src->File, src->Index, + rc_source_type_swz(src->Swizzle)); } unsigned int rc_inst_can_use_presub( struct rc_instruction * inst, rc_presubtract_op presub_op, unsigned int presub_writemask, - struct rc_src_register replace_reg, - struct rc_src_register presub_src0, - struct rc_src_register presub_src1) + const struct rc_src_register * replace_reg, + const struct rc_src_register * presub_src0, + const struct rc_src_register * presub_src1) { struct can_use_presub_data d; unsigned int num_presub_srcs; + unsigned int i; const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); + int rgb_count = 0, alpha_count = 0; + unsigned int src_type0, src_type1; if (presub_op == RC_PRESUB_NONE) { return 1; @@ -425,15 +443,62 @@ unsigned int rc_inst_can_use_presub( } memset(&d, 0, sizeof(d)); - d.RemoveSrcs[0] = replace_reg; - d.RemoveSrcs[1] = presub_src0; - d.RemoveSrcs[2] = presub_src1; + d.ReplaceReg = replace_reg; - rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d); + rc_for_all_reads_src(inst, can_use_presub_read_cb, &d); num_presub_srcs = rc_presubtract_src_reg_count(presub_op); - if (d.RGBCount + num_presub_srcs > 3 || d.AlphaCount + num_presub_srcs > 3) { + src_type0 = rc_source_type_swz(presub_src0->Swizzle); + can_use_presub_data_add_select(&d, + presub_src0->File, + presub_src0->Index, + src_type0); + + if (num_presub_srcs > 1) { + src_type1 = rc_source_type_swz(presub_src1->Swizzle); + can_use_presub_data_add_select(&d, + presub_src1->File, + presub_src1->Index, + src_type1); + + /* Even if both of the presub sources read from the same + * register, we still need to use 2 different source selects + * for them, so we need to increment the count to compensate. + */ + if (presub_src0->File == presub_src1->File + && presub_src0->Index == presub_src1->Index) { + if (src_type0 & src_type1 & RC_SOURCE_RGB) { + rgb_count++; + } + if (src_type0 & src_type1 & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + } + + /* Count the number of source selects for Alpha and RGB. If we + * encounter two of the same source selects then we can ignore the + * first one. */ + for (i = 0; i < d.SelectCount; i++) { + unsigned int j; + unsigned int src_type = d.Selects[i].SrcType; + for (j = i + 1; j < d.SelectCount; j++) { + if (d.Selects[i].File == d.Selects[j].File + && d.Selects[i].Index == d.Selects[j].Index) { + src_type &= ~d.Selects[j].SrcType; + } + } + if (src_type & RC_SOURCE_RGB) { + rgb_count++; + } + + if (src_type & RC_SOURCE_ALPHA) { + alpha_count++; + } + } + + if (rgb_count > 3 || alpha_count > 3) { return 0; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index 2af289dfabd..3730aa888c0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -63,9 +63,9 @@ unsigned int rc_inst_can_use_presub( struct rc_instruction * inst, rc_presubtract_op presub_op, unsigned int presub_writemask, - struct rc_src_register replace_reg, - struct rc_src_register presub_src0, - struct rc_src_register presub_src1); + const struct rc_src_register * replace_reg, + const struct rc_src_register * presub_src0, + const struct rc_src_register * presub_src1); int rc_get_max_index( struct radeon_compiler * c, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 5b4fba80873..ac73608839e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -70,9 +70,9 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, if(!rc_inst_can_use_presub(inst, reader_data->Writer->U.I.PreSub.Opcode, rc_swizzle_to_writemask(src->Swizzle), - *src, - reader_data->Writer->U.I.PreSub.SrcReg[0], - reader_data->Writer->U.I.PreSub.SrcReg[1])) { + src, + &reader_data->Writer->U.I.PreSub.SrcReg[0], + &reader_data->Writer->U.I.PreSub.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -437,9 +437,9 @@ static void presub_scan_read( if (!rc_inst_can_use_presub(inst, *presub_opcode, reader_data->Writer->U.I.DstReg.WriteMask, - *src, - reader_data->Writer->U.I.SrcReg[0], - reader_data->Writer->U.I.SrcReg[1])) { + src, + &reader_data->Writer->U.I.SrcReg[0], + &reader_data->Writer->U.I.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -517,8 +517,11 @@ static int is_presub_candidate( assert(inst->U.I.Opcode == RC_OPCODE_ADD); - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode) + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE + || inst->U.I.SaturateMode + || inst->U.I.WriteALUResult) { return 0; + } /* If both sources use a constant swizzle, then we can't convert it to * a presubtract operation. In fact for the ADD and SUB presubtract diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c index 16fa5d28902..33181bdcc88 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c @@ -388,17 +388,21 @@ struct rc_list * rc_get_variables(struct radeon_compiler * c) */ while (aborted_list) { struct rc_list * search_ptr_next; + struct rc_variable * var; var_ptr = aborted_list; + for (var = var_ptr->Item; var; var = var->Friend) { - search_ptr = var_ptr->Next; - while(search_ptr) { - search_ptr_next = search_ptr->Next; - if (readers_intersect(var_ptr->Item, search_ptr->Item)){ - rc_list_remove(&aborted_list, search_ptr); - rc_variable_add_friend(var_ptr->Item, + search_ptr = var_ptr->Next; + while(search_ptr) { + search_ptr_next = search_ptr->Next; + if (readers_intersect(var, search_ptr->Item)){ + rc_list_remove(&aborted_list, + search_ptr); + rc_variable_add_friend(var, search_ptr->Item); + } + search_ptr = search_ptr_next; } - search_ptr = search_ptr_next; } rc_list_remove(&aborted_list, var_ptr); rc_list_add(&variable_list, rc_list( @@ -465,6 +469,67 @@ struct rc_list * rc_variable_readers_union(struct rc_variable * var) return list; } +static unsigned int reader_equals_src( + struct rc_reader reader, + unsigned int src_type, + void * src) +{ + if (reader.Inst->Type != src_type) { + return 0; + } + if (src_type == RC_INSTRUCTION_NORMAL) { + return reader.U.I.Src == src; + } else { + return reader.U.P.Src == src; + } +} + +static unsigned int variable_writes_src( + struct rc_variable * var, + unsigned int src_type, + void * src) +{ + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + if (reader_equals_src(var->Readers[i], src_type, src)) { + return 1; + } + } + return 0; +} + + +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src) +{ + struct rc_list * list_ptr; + struct rc_list * writer_list = NULL; + for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) { + struct rc_variable * var = list_ptr->Item; + if (variable_writes_src(var, src_type, src)) { + struct rc_variable * friend; + rc_list_add(&writer_list, rc_list(&var->C->Pool, var)); + for (friend = var->Friend; friend; + friend = friend->Friend) { + if (variable_writes_src(friend, src_type, src)) { + rc_list_add(&writer_list, + rc_list(&var->C->Pool, friend)); + } + } + /* Once we have indentifed the variable and its + * friends that write this source, we can stop + * stop searching, because we know know of the + * other variables in the list will write this source. + * If they did they would be friends of var. + */ + break; + } + } + return writer_list; +} + void rc_variable_print(struct rc_variable * var) { unsigned int i; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h index b8fbcaa4029..9427bee18a7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h @@ -79,6 +79,11 @@ unsigned int rc_variable_writemask_sum(struct rc_variable * var); struct rc_list * rc_variable_readers_union(struct rc_variable * var); +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src); + void rc_variable_print(struct rc_variable * var); #endif /* RADEON_VARIABLE_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/tests/Makefile b/src/mesa/drivers/dri/r300/compiler/tests/Makefile new file mode 100644 index 00000000000..e268543e6e7 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/tests/Makefile @@ -0,0 +1,55 @@ +# src/mesa/drivers/dri/r300/compiler/Makefile + +TOP = ../../../../../../.. +include $(TOP)/configs/current + +CFLAGS += -Wall -Werror + +### Basic defines ### +TESTS = radeon_compiler_util_tests + +TEST_SOURCES := $(TESTS:=.c) + +SHARED_SOURCES = \ + rc_test_helpers.c \ + unit_test.c + +C_SOURCES = $(SHARED_SOURCES) $(TEST_SOURCES) + +INCLUDES = \ + -I. \ + -I.. + +COMPILER_LIB = ../libr300compiler.a + +##### TARGETS ##### + +default: depend run_tests + +depend: $(C_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $^ 2> /dev/null + +# Remove .o and backup files +clean: + rm -f $(TESTS) depend depend.bak + +$(TESTS): $(TESTS:=.o) $(SHARED_SOURCES:.c=.o) $(COMPILER_LIB) + $(APP_CC) -o $@ $^ + +run_tests: $(TESTS) + @echo "RUNNING TESTS:" + @echo "" + $(foreach test, $^, @./$(test)) + +.PHONY: $(COMPILER_LIB) +$(COMPILER_LIB): + $(MAKE) -C .. + +##### RULES ##### +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + + +sinclude depend diff --git a/src/mesa/drivers/dri/r300/compiler/tests/radeon_compiler_util_tests.c b/src/mesa/drivers/dri/r300/compiler/tests/radeon_compiler_util_tests.c new file mode 100644 index 00000000000..a2e3f2ab2e5 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/tests/radeon_compiler_util_tests.c @@ -0,0 +1,76 @@ +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +#include "radeon_compiler_util.h" +#include "radeon_program.h" + +#include "rc_test_helpers.h" +#include "unit_test.h" + +static void test_rc_inst_can_use_presub( + struct test_result * result, + int expected, + const char * add_str, + const char * replace_str) +{ + struct rc_instruction add_inst, replace_inst; + int ret; + + test_begin(result); + init_rc_normal_instruction(&add_inst, add_str); + init_rc_normal_instruction(&replace_inst, replace_str); + + ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0, + &replace_inst.U.I.SrcReg[0], + &add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]); + + test_check(result, ret == expected); +} + +static void test_runner_rc_inst_can_use_presub(struct test_result * result) +{ + + /* This tests the case where the source being replace has the same + * register file and register index as another source register in the + * CMP instruction. A previous version of this function was ignoring + * all registers that shared the same file and index as the replacement + * register when counting the number of source selects. + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[0].z, temp[6].__x_, const[1].__x_;", + "CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;"); + + + /* Testing a random case that should fail + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[3], temp[1], temp[2];", + "MAD temp[1], temp[0], const[0].xxxx, -temp[3];"); + + /* This tests the case where the arguments of the ADD + * instruction share the same register file and index. Normally, we + * would need only one source select for these two arguments, but since + * they will be part of a presubtract operation we need to use the two + * source selects that the presubtract instruction expects + * (src0 and src1). + * + * https://bugs.freedesktop.org/show_bug.cgi?id=36527 + */ + test_rc_inst_can_use_presub(result, 0, + "ADD temp[3].x, temp[0].x___, temp[0].x___;", + "MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;"); +} + +int main(int argc, char ** argv) +{ + struct test tests[] = { + {"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub}, + {NULL, NULL} + }; + run_tests(tests); +} diff --git a/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.c b/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.c new file mode 100644 index 00000000000..ca4738af54d --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.c @@ -0,0 +1,380 @@ +#include <errno.h> +#include <regex.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> + +#include "../radeon_compiler_util.h" +#include "../radeon_opcodes.h" +#include "../radeon_program.h" + +#include "rc_test_helpers.h" + +/* This file contains some helper functions for filling out the rc_instruction + * data structures. These functions take a string as input based on the format + * output by rc_program_print(). + */ + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +#define REGEX_ERR_BUF_SIZE 50 + +struct match_info { + const char * String; + int Length; +}; + +static int match_length(regmatch_t * matches, int index) +{ + return matches[index].rm_eo - matches[index].rm_so; +} + +static int regex_helper( + const char * regex_str, + const char * search_str, + regmatch_t * matches, + int num_matches) +{ + char err_buf[REGEX_ERR_BUF_SIZE]; + regex_t regex; + int err_code; + unsigned int i; + + err_code = regcomp(®ex, regex_str, REG_EXTENDED); + if (err_code) { + regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); + fprintf(stderr, "Failed to compile regex: %s\n", err_buf); + return 0; + } + + err_code = regexec(®ex, search_str, num_matches, matches, 0); + DBG("Search string: '%s'\n", search_str); + for (i = 0; i < num_matches; i++) { + DBG("Match %u start = %d end = %d\n", i, + matches[i].rm_so, matches[i].rm_eo); + } + if (err_code) { + regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); + fprintf(stderr, "Failed to match regex: %s\n", err_buf); + return 0; + } + return 1; +} + +#define REGEX_SRC_MATCHES 6 + +struct src_tokens { + struct match_info Negate; + struct match_info Abs; + struct match_info File; + struct match_info Index; + struct match_info Swizzle; +}; + +/** + * Initialize the source register at index src_index for the instruction based + * on src_str. + * + * NOTE: Warning in init_rc_normal_instruction() applies to this function as + * well. + * + * @param src_str A string that represents the source register. The format for + * this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_src( + struct rc_instruction * inst, + unsigned int src_index, + const char * src_str) +{ + const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[([[:digit:]])\\](\\.*[[:lower:]-]*)"; + regmatch_t matches[REGEX_SRC_MATCHES]; + struct src_tokens tokens; + struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index]; + unsigned int i; + + /* Execute the regex */ + if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) { + fprintf(stderr, "Failed to execute regex for src register.\n"); + return 0; + } + + /* Create Tokens */ + tokens.Negate.String = src_str + matches[1].rm_so; + tokens.Negate.Length = match_length(matches, 1); + tokens.Abs.String = src_str + matches[2].rm_so; + tokens.Abs.Length = match_length(matches, 2); + tokens.File.String = src_str + matches[3].rm_so; + tokens.File.Length = match_length(matches, 3); + tokens.Index.String = src_str + matches[4].rm_so; + tokens.Index.Length = match_length(matches, 4); + tokens.Swizzle.String = src_str + matches[5].rm_so; + tokens.Swizzle.Length = match_length(matches, 5); + + /* Negate */ + if (tokens.Negate.Length > 0) { + src_reg->Negate = RC_MASK_XYZW; + } + + /* Abs */ + if (tokens.Abs.Length > 0) { + src_reg->Abs = 1; + } + + /* File */ + if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { + src_reg->File = RC_FILE_TEMPORARY; + } else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) { + src_reg->File = RC_FILE_INPUT; + } else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) { + src_reg->File = RC_FILE_CONSTANT; + } else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) { + src_reg->File = RC_FILE_NONE; + } + + /* Index */ + errno = 0; + src_reg->Index = strtol(tokens.Index.String, NULL, 10); + if (errno > 0) { + fprintf(stderr, "Could not convert src register index.\n"); + return 0; + } + + /* Swizzle */ + if (tokens.Swizzle.Length == 0) { + src_reg->Swizzle = RC_SWIZZLE_XYZW; + } else { + int str_index = 1; + src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED); + if (tokens.Swizzle.String[0] != '.') { + fprintf(stderr, "First char of swizzle is not valid.\n"); + return 0; + } + for (i = 0; i < 4; i++, str_index++) { + if (tokens.Swizzle.String[str_index] == '-') { + src_reg->Negate |= (1 << i); + str_index++; + } + switch(tokens.Swizzle.String[str_index]) { + case 'x': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X); + break; + case 'y': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y); + break; + case 'z': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z); + break; + case 'w': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W); + break; + case '1': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE); + break; + case '0': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO); + break; + case 'H': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF); + break; + case '_': + SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED); + break; + default: + fprintf(stderr, "Unknown src register swizzle.\n"); + return 0; + } + } + } + DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n", + src_reg->File, src_reg->Index, src_reg->Swizzle, + src_reg->Negate, src_reg->Abs); + return 1; +} + +#define REGEX_DST_MATCHES 4 + +struct dst_tokens { + struct match_info File; + struct match_info Index; + struct match_info WriteMask; +}; + +/** + * Initialize the destination for the instruction based on dst_str. + * + * NOTE: Warning in init_rc_normal_instruction() applies to this function as + * well. + * + * @param dst_str A string that represents the destination register. The format + * for this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_dst( + struct rc_instruction * inst, + const char * dst_str) +{ + const char * regex_str = "([[:lower:]]*)\\[([[:digit:]]*)\\](\\.*[[:lower:]]*)"; + regmatch_t matches[REGEX_DST_MATCHES]; + struct dst_tokens tokens; + unsigned int i; + + /* Execute the regex */ + if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) { + fprintf(stderr, "Failed to execute regex for dst register.\n"); + return 0; + } + + /* Create Tokens */ + tokens.File.String = dst_str + matches[1].rm_so; + tokens.File.Length = match_length(matches, 1); + tokens.Index.String = dst_str + matches[2].rm_so; + tokens.Index.Length = match_length(matches, 2); + tokens.WriteMask.String = dst_str + matches[3].rm_so; + tokens.WriteMask.Length = match_length(matches, 3); + + /* File Type */ + if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + } else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) { + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + } else { + fprintf(stderr, "Unknown dst register file type.\n"); + return 0; + } + + /* File Index */ + errno = 0; + inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10); + + if (errno > 0) { + fprintf(stderr, "Could not convert dst register index\n"); + return 0; + } + + /* WriteMask */ + if (tokens.WriteMask.Length == 0) { + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } else { + /* The first character should be '.' */ + if (tokens.WriteMask.String[0] != '.') { + fprintf(stderr, "1st char of writemask is not valid.\n"); + return 0; + } + for (i = 1; i < tokens.WriteMask.Length; i++) { + switch(tokens.WriteMask.String[i]) { + case 'x': + inst->U.I.DstReg.WriteMask |= RC_MASK_X; + break; + case 'y': + inst->U.I.DstReg.WriteMask |= RC_MASK_Y; + break; + case 'z': + inst->U.I.DstReg.WriteMask |= RC_MASK_Z; + break; + case 'w': + inst->U.I.DstReg.WriteMask |= RC_MASK_W; + break; + default: + fprintf(stderr, "Unknown swizzle in writemask.\n"); + return 0; + } + } + } + DBG("Dst Reg File=%u Index=%d Writemask=%d\n", + inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask); + return 1; +} + +#define REGEX_INST_MATCHES 7 + +struct inst_tokens { + struct match_info Opcode; + struct match_info Sat; + struct match_info Dst; + struct match_info Srcs[3]; +}; + +/** + * Initialize a normal instruction based on inst_str. + * + * WARNING: This function might not be able to handle every kind of format that + * rc_program_print() can output. If you are having problems with a + * particular string, you may need to add support for it to this functions. + * + * @param inst_str A string that represents the source register. The format for + * this string is the same that is output by rc_program_print. + * @return 1 On success, 0 on failure + */ +int init_rc_normal_instruction( + struct rc_instruction * inst, + const char * inst_str) +{ + const char * regex_str = "([[:upper:]]+)(_SAT)* ([^,]*)[, ]*([^,]*)[, ]*([^,]*)[, ]*([^;]*)"; + int i; + regmatch_t matches[REGEX_INST_MATCHES]; + struct inst_tokens tokens; + + /* Initialize inst */ + memset(inst, 0, sizeof(struct rc_instruction)); + inst->Type = RC_INSTRUCTION_NORMAL; + + /* Execute the regex */ + if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) { + return 0; + } + memset(&tokens, 0, sizeof(tokens)); + + /* Create Tokens */ + tokens.Opcode.String = inst_str + matches[1].rm_so; + tokens.Opcode.Length = match_length(matches, 1); + if (matches[2].rm_so > -1) { + tokens.Sat.String = inst_str + matches[2].rm_so; + tokens.Sat.Length = match_length(matches, 2); + } + + + /* Fill out the rest of the instruction. */ + for (i = 0; i < MAX_RC_OPCODE; i++) { + const struct rc_opcode_info * info = rc_get_opcode_info(i); + unsigned int first_src = 3; + unsigned int j; + if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) { + continue; + } + inst->U.I.Opcode = info->Opcode; + if (info->HasDstReg) { + char * dst_str; + tokens.Dst.String = inst_str + matches[3].rm_so; + tokens.Dst.Length = match_length(matches, 3); + first_src++; + + dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1)); + strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length); + dst_str[tokens.Dst.Length] = '\0'; + init_rc_normal_dst(inst, dst_str); + free(dst_str); + } + for (j = 0; j < info->NumSrcRegs; j++) { + char * src_str; + tokens.Srcs[j].String = + inst_str + matches[first_src + j].rm_so; + tokens.Srcs[j].Length = + match_length(matches, first_src + j); + + src_str = malloc(sizeof(char) * + (tokens.Srcs[j].Length + 1)); + strncpy(src_str, tokens.Srcs[j].String, + tokens.Srcs[j].Length); + src_str[tokens.Srcs[j].Length] = '\0'; + init_rc_normal_src(inst, j, src_str); + } + break; + } + return 1; +} diff --git a/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.h b/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.h new file mode 100644 index 00000000000..1a6bf9699ba --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.h @@ -0,0 +1,13 @@ + +int init_rc_normal_src( + struct rc_instruction * inst, + unsigned int src_index, + const char * src_str); + +int init_rc_normal_dst( + struct rc_instruction * inst, + const char * dst_str); + +int init_rc_normal_instruction( + struct rc_instruction * inst, + const char * inst_str); diff --git a/src/mesa/drivers/dri/r300/compiler/tests/unit_test.c b/src/mesa/drivers/dri/r300/compiler/tests/unit_test.c new file mode 100644 index 00000000000..266f3365c58 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/tests/unit_test.c @@ -0,0 +1,35 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "unit_test.h" + +void run_tests(struct test tests[]) +{ + int i; + for (i = 0; tests[i].name; i++) { + printf("Test %s\n", tests[i].name); + memset(&tests[i].result, 0, sizeof(tests[i].result)); + tests[i].test_func(&tests[i].result); + printf("Test %s (%d/%d) pass\n", tests[i].name, + tests[i].result.pass, tests[i].result.test_count); + } +} + +void test_begin(struct test_result * result) +{ + result->test_count++; +} + +void test_check(struct test_result * result, int cond) +{ + printf("Subtest %u -> ", result->test_count); + if (cond) { + result->pass++; + printf("Pass"); + } else { + result->fail++; + printf("Fail"); + } + printf("\n"); +} diff --git a/src/mesa/drivers/dri/r300/compiler/tests/unit_test.h b/src/mesa/drivers/dri/r300/compiler/tests/unit_test.h new file mode 100644 index 00000000000..441e8b655a5 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/tests/unit_test.h @@ -0,0 +1,17 @@ + +struct test_result { + unsigned int test_count; + unsigned int pass; + unsigned int fail; +}; + +struct test { + const char * name; + void (*test_func)(struct test_result * result); + struct test_result result; +}; + +void run_tests(struct test tests[]); + +void test_begin(struct test_result * result); +void test_check(struct test_result * result, int cond); diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c index 14e60866d93..c525f0ffe2f 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.c +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -118,6 +118,7 @@ static void create_fragment_program(struct r300_context *r300) inst->U.I.SrcReg[0].Negate = 0; inst->U.I.SrcReg[0].RelAddr = 0; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.TexSwizzle = RC_SWIZZLE_XYZW; compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0); compiler.OutputColor[0] = FRAG_RESULT_COLOR; diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c index 232603ece59..b1dfccd22d0 100644 --- a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c +++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c @@ -170,6 +170,7 @@ static void translate_instruction(struct radeon_compiler * c, dest->U.I.TexSrcUnit = src->TexSrcUnit; dest->U.I.TexSrcTarget = translate_tex_target(src->TexSrcTarget); dest->U.I.TexShadow = src->TexShadow; + dest->U.I.TexSwizzle = RC_SWIZZLE_XYZW; } } diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index e17fd0ff6fd..743841be4ef 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -355,7 +355,7 @@ static INLINE GLuint CPU_TO_LE32(GLuint x) #ifndef M_PI -#define M_PI (3.1415926536) +#define M_PI (3.14159265358979323846) #endif #ifndef M_E @@ -366,10 +366,6 @@ static INLINE GLuint CPU_TO_LE32(GLuint x) #define M_LOG2E (1.4426950408889634074) #endif -#ifndef ONE_DIV_LN2 -#define ONE_DIV_LN2 (1.442695040888963456) -#endif - #ifndef ONE_DIV_SQRT_LN2 #define ONE_DIV_SQRT_LN2 (1.201122408786449815) #endif diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 63653df4502..930236c0161 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -9784,9 +9784,9 @@ _mesa_create_save_table(void) SET_DeleteProgramsNV(table, _mesa_DeletePrograms); SET_GenProgramsNV(table, _mesa_GenPrograms); SET_IsProgramNV(table, _mesa_IsProgramARB); - SET_GetVertexAttribdvNV(table, _mesa_GetVertexAttribdvNV); - SET_GetVertexAttribfvNV(table, _mesa_GetVertexAttribfvNV); - SET_GetVertexAttribivNV(table, _mesa_GetVertexAttribivNV); + SET_GetVertexAttribdvARB(table, _mesa_GetVertexAttribdvARB); + SET_GetVertexAttribfvARB(table, _mesa_GetVertexAttribfvARB); + SET_GetVertexAttribivARB(table, _mesa_GetVertexAttribivARB); SET_GetVertexAttribPointervNV(table, _mesa_GetVertexAttribPointervNV); SET_ProgramEnvParameter4dARB(table, save_ProgramEnvParameter4dARB); SET_ProgramEnvParameter4dvARB(table, save_ProgramEnvParameter4dvARB); diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index a9ef8fa4cb7..8672ac2a730 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -112,6 +112,7 @@ static const struct extension extension_table[] = { { "GL_ARB_seamless_cube_map", o(ARB_seamless_cube_map), GL, 2009 }, { "GL_ARB_shader_objects", o(ARB_shader_objects), GL, 2002 }, { "GL_ARB_shader_stencil_export", o(ARB_shader_stencil_export), GL, 2009 }, + { "GL_ARB_shader_texture_lod", o(ARB_shader_texture_lod), GL, 2009 }, { "GL_ARB_shading_language_100", o(ARB_shading_language_100), GL, 2003 }, { "GL_ARB_shadow_ambient", o(ARB_shadow_ambient), GL, 2001 }, { "GL_ARB_shadow", o(ARB_shadow), GL, 2001 }, @@ -143,7 +144,6 @@ static const struct extension extension_table[] = { { "GL_ARB_vertex_shader", o(ARB_vertex_shader), GL, 2002 }, { "GL_ARB_vertex_type_2_10_10_10_rev", o(ARB_vertex_type_2_10_10_10_rev), GL, 2009 }, { "GL_ARB_window_pos", o(ARB_window_pos), GL, 2001 }, - /* EXT extensions */ { "GL_EXT_abgr", o(EXT_abgr), GL, 1995 }, { "GL_EXT_bgra", o(EXT_bgra), GL, 1995 }, diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 29c8cfd2363..eb2efc89aed 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2793,6 +2793,7 @@ struct gl_extensions GLboolean ARB_seamless_cube_map; GLboolean ARB_shader_objects; GLboolean ARB_shader_stencil_export; + GLboolean ARB_shader_texture_lod; GLboolean ARB_shading_language_100; GLboolean ARB_shadow; GLboolean ARB_shadow_ambient; diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index 229267f8c94..f7774fdd7cb 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -132,7 +132,7 @@ _mesa_init_sampler_object(struct gl_sampler_object *sampObj, GLuint name) sampObj->CompareMode = GL_NONE; sampObj->CompareFunc = GL_LEQUAL; sampObj->CompareFailValue = 0.0; - sampObj->sRGBDecode = GL_FALSE; + sampObj->sRGBDecode = GL_DECODE_EXT; sampObj->CubeMapSeamless = GL_FALSE; sampObj->DepthMode = 0; } diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index f2d214f931b..fdf12817c9a 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -254,6 +254,7 @@ _mesa_copy_texture_object( struct gl_texture_object *dest, dest->Sampler.CompareFailValue = src->Sampler.CompareFailValue; dest->Sampler.CubeMapSeamless = src->Sampler.CubeMapSeamless; dest->Sampler.DepthMode = src->Sampler.DepthMode; + dest->Sampler.sRGBDecode = src->Sampler.sRGBDecode; dest->_MaxLevel = src->_MaxLevel; dest->_MaxLambda = src->_MaxLambda; dest->GenerateMipmap = src->GenerateMipmap; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index bc10b455b8b..510aeab82da 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1966,7 +1966,7 @@ ir_to_mesa_visitor::visit(ir_call *ir) void ir_to_mesa_visitor::visit(ir_texture *ir) { - src_reg result_src, coord, lod_info, projector; + src_reg result_src, coord, lod_info, projector, dx, dy; dst_reg result_dst, coord_dst; ir_to_mesa_instruction *inst = NULL; prog_opcode opcode = OPCODE_NOP; @@ -2008,6 +2008,12 @@ ir_to_mesa_visitor::visit(ir_texture *ir) lod_info = this->result; break; case ir_txd: + opcode = OPCODE_TXD; + ir->lod_info.grad.dPdx->accept(this); + dx = this->result; + ir->lod_info.grad.dPdy->accept(this); + dy = this->result; + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -2080,7 +2086,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir) coord_dst.writemask = WRITEMASK_XYZW; } - inst = emit(ir, opcode, result_dst, coord); + if (opcode == OPCODE_TXD) + inst = emit(ir, opcode, result_dst, coord, dx, dy); + else + inst = emit(ir, opcode, result_dst, coord); if (ir->shadow_comparitor) inst->tex_shadow = GL_TRUE; diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h index 669d7102980..db2b594e753 100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@ -154,9 +154,9 @@ typedef enum prog_opcode { OPCODE_ARR, /* 2 */ OPCODE_BGNLOOP, /* opt */ OPCODE_BGNSUB, /* opt */ - OPCODE_BRA, /* 2 X */ + OPCODE_BRA, /* 2 */ OPCODE_BRK, /* 2 opt */ - OPCODE_CAL, /* 2 2 X */ + OPCODE_CAL, /* 2 2 opt */ OPCODE_CMP, /* X X */ OPCODE_CONT, /* opt */ OPCODE_COS, /* X 2 X X */ @@ -168,7 +168,7 @@ typedef enum prog_opcode { OPCODE_DP4, /* X X X X X */ OPCODE_DPH, /* X X 1.1 */ OPCODE_DST, /* X X X X */ - OPCODE_ELSE, /* X */ + OPCODE_ELSE, /* opt */ OPCODE_EMIT_VERTEX,/* X */ OPCODE_END, /* X X X X opt */ OPCODE_END_PRIMITIVE,/* X */ @@ -176,16 +176,16 @@ typedef enum prog_opcode { OPCODE_ENDLOOP, /* opt */ OPCODE_ENDSUB, /* opt */ OPCODE_EX2, /* X X 2 X X */ - OPCODE_EXP, /* X X X */ + OPCODE_EXP, /* X X */ OPCODE_FLR, /* X X 2 X X */ OPCODE_FRC, /* X X 2 X X */ OPCODE_IF, /* opt */ - OPCODE_KIL, /* X */ + OPCODE_KIL, /* X X */ OPCODE_KIL_NV, /* X X */ OPCODE_LG2, /* X X 2 X X */ OPCODE_LIT, /* X X X X */ - OPCODE_LOG, /* X X X */ - OPCODE_LRP, /* X X X */ + OPCODE_LOG, /* X X */ + OPCODE_LRP, /* X X */ OPCODE_MAD, /* X X X X X */ OPCODE_MAX, /* X X X X X */ OPCODE_MIN, /* X X X X X */ @@ -196,8 +196,8 @@ typedef enum prog_opcode { OPCODE_NOISE3, /* X */ OPCODE_NOISE4, /* X */ OPCODE_NOT, /* */ - OPCODE_NRM3, /* X */ - OPCODE_NRM4, /* X */ + OPCODE_NRM3, /* */ + OPCODE_NRM4, /* */ OPCODE_OR, /* */ OPCODE_PK2H, /* X */ OPCODE_PK2US, /* X */ @@ -209,10 +209,10 @@ typedef enum prog_opcode { OPCODE_PUSHA, /* 3 */ OPCODE_RCC, /* 1.1 */ OPCODE_RCP, /* X X X X X */ - OPCODE_RET, /* 2 2 X */ + OPCODE_RET, /* 2 2 opt */ OPCODE_RFL, /* X X */ OPCODE_RSQ, /* X X X X X */ - OPCODE_SCS, /* X */ + OPCODE_SCS, /* X X */ OPCODE_SEQ, /* 2 X X */ OPCODE_SFL, /* 2 X */ OPCODE_SGE, /* X X X X X */ @@ -221,10 +221,10 @@ typedef enum prog_opcode { OPCODE_SLE, /* 2 X X */ OPCODE_SLT, /* X X X X X */ OPCODE_SNE, /* 2 X X */ - OPCODE_SSG, /* 2 */ + OPCODE_SSG, /* 2 X */ OPCODE_STR, /* 2 X */ OPCODE_SUB, /* X X 1.1 X X */ - OPCODE_SWZ, /* X X */ + OPCODE_SWZ, /* X X X */ OPCODE_TEX, /* X 3 X X */ OPCODE_TXB, /* X 3 X */ OPCODE_TXD, /* X X */ @@ -238,7 +238,7 @@ typedef enum prog_opcode { OPCODE_UP4UB, /* X */ OPCODE_X2D, /* X */ OPCODE_XOR, /* */ - OPCODE_XPD, /* X X X */ + OPCODE_XPD, /* X X */ MAX_OPCODE } gl_inst_opcode; diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index d94d7fe5dfb..16f9690e865 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -459,7 +459,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[], value[0] = (ctx->Fog.End == ctx->Fog.Start) ? 1.0f : (GLfloat)(-1.0F / (ctx->Fog.End - ctx->Fog.Start)); value[1] = ctx->Fog.End * -value[0]; - value[2] = (GLfloat)(ctx->Fog.Density * ONE_DIV_LN2); + value[2] = (GLfloat)(ctx->Fog.Density * M_LOG2E); /* M_LOG2E == 1/ln(2) */ value[3] = (GLfloat)(ctx->Fog.Density * ONE_DIV_SQRT_LN2); return; diff --git a/src/mesa/program/programopt.c b/src/mesa/program/programopt.c index a239da2c609..c72dfb23b4d 100644 --- a/src/mesa/program/programopt.c +++ b/src/mesa/program/programopt.c @@ -267,6 +267,11 @@ _mesa_append_fog_code(struct gl_context *ctx, return; } + if (!(fprog->Base.OutputsWritten & (1 << FRAG_RESULT_COLOR))) { + /* program doesn't output color, so nothing to do */ + return; + } + /* Alloc storage for new instructions */ newInst = _mesa_alloc_instructions(newLen); if (!newInst) { @@ -407,6 +412,7 @@ _mesa_append_fog_code(struct gl_context *ctx, fprog->Base.Instructions = newInst; fprog->Base.NumInstructions = inst - newInst; fprog->Base.InputsRead |= FRAG_BIT_FOGC; + assert(fprog->Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)); } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index e377861b8d2..9948f8d2bc5 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -39,6 +39,7 @@ #include "main/pack.h" #include "main/pbo.h" #include "main/texformat.h" +#include "main/teximage.h" #include "main/texstore.h" #include "program/program.h" #include "program/prog_print.h" @@ -419,10 +420,10 @@ make_texture(struct st_context *st, gl_format mformat; struct pipe_resource *pt; enum pipe_format pipeFormat; - GLenum baseFormat, intFormat; + GLenum baseInternalFormat, intFormat; - baseFormat = base_format(format); intFormat = internal_format(ctx, format, type); + baseInternalFormat = _mesa_base_tex_format(ctx, intFormat); mformat = st_ChooseTextureFormat_renderable(ctx, intFormat, format, type, GL_FALSE); @@ -465,7 +466,7 @@ make_texture(struct st_context *st, * the texture. We deal with that with texcoords. */ success = _mesa_texstore(ctx, 2, /* dims */ - baseFormat, /* baseInternalFormat */ + baseInternalFormat, /* baseInternalFormat */ mformat, /* gl_format */ dest, /* dest */ 0, 0, 0, /* dstX/Y/Zoffset */ diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index ad8e6bb9ec9..d3aebe526dd 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -603,4 +603,8 @@ void st_init_extensions(struct st_context *st) else if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP)) { ctx->Extensions.ARB_seamless_cube_map = GL_TRUE; } + + if (screen->get_param(screen, PIPE_CAP_SM3)) { + ctx->Extensions.ARB_shader_texture_lod = GL_TRUE; + } } |