From c9e48e5b083b6cf97ecdb2d17c874ea631203b06 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 1 Aug 2012 19:35:18 -0700 Subject: i965: Generalize VS compute-to-MRF for compute-to-another-GRF, too. No statistically significant performance difference on glbenchmark 2.7 (n=60). It reduces cycles spent in the vertex shader by 3.3% +/- 0.8% (n=5), but that's only about .3% of all cycles spent according to the fixed shader_time. Reviewed-by: Kenneth Graunke --- .../dri/i965/test_vec4_register_coalesce.cpp | 58 +++++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp') diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index fa9c155655f..45be376fed2 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -70,7 +70,7 @@ _register_coalesce(vec4_visitor *v, const char *func) v->dump_instructions(); } - v->opt_compute_to_mrf(); + v->opt_register_coalesce(); if (print) { printf("%s: instructions after:\n", func); @@ -78,7 +78,7 @@ _register_coalesce(vec4_visitor *v, const char *func) } } -TEST_F(register_coalesce_test, test_easy_success) +TEST_F(register_coalesce_test, test_compute_to_mrf) { src_reg something = src_reg(v, glsl_type::float_type); dst_reg temp = dst_reg(v, glsl_type::float_type); @@ -143,3 +143,57 @@ TEST_F(register_coalesce_test, test_dp4_mrf) EXPECT_EQ(dp4->dst.file, MRF); EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y); } + +TEST_F(register_coalesce_test, test_dp4_grf) +{ + src_reg some_src_1 = src_reg(v, glsl_type::vec4_type); + src_reg some_src_2 = src_reg(v, glsl_type::vec4_type); + dst_reg init; + + dst_reg to = dst_reg(v, glsl_type::vec4_type); + dst_reg temp = dst_reg(v, glsl_type::float_type); + + vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2)); + to.writemask = WRITEMASK_Y; + v->emit(v->MOV(to, src_reg(temp))); + + /* if we don't do something with the result, the automatic dead code + * elimination will remove all our instructions. + */ + src_reg src = src_reg(to); + src.negate = true; + v->emit(v->MOV(dst_reg(MRF, 0), src)); + + register_coalesce(v); + + EXPECT_EQ(dp4->dst.reg, to.reg); + EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y); +} + +TEST_F(register_coalesce_test, test_channel_mul_grf) +{ + src_reg some_src_1 = src_reg(v, glsl_type::vec4_type); + src_reg some_src_2 = src_reg(v, glsl_type::vec4_type); + dst_reg init; + + dst_reg to = dst_reg(v, glsl_type::vec4_type); + dst_reg temp = dst_reg(v, glsl_type::float_type); + + vec4_instruction *mul = v->emit(v->MUL(temp, some_src_1, some_src_2)); + to.writemask = WRITEMASK_Y; + v->emit(v->MOV(to, src_reg(temp))); + + /* if we don't do something with the result, the automatic dead code + * elimination will remove all our instructions. + */ + src_reg src = src_reg(to); + src.negate = true; + v->emit(v->MOV(dst_reg(MRF, 0), src)); + + register_coalesce(v); + + /* This path isn't supported yet in the reswizzling code, so we're checking + * that we haven't done anything bad to scalar non-DP[234]s. + */ + EXPECT_NE(mul->dst.reg, to.reg); +} -- cgit v1.2.3