From 546425726387ffceb71989e08028c386d21dedfd Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 31 Jul 2014 01:26:30 -0700 Subject: i965: Micro-optimize swizzle_to_scs() and make it inlinable. brw_swizzle_to_scs has been showing up in my CPU profiling, which is rather silly - it's a tiny amount of code. It really should be inlined, and can easily be implemented with fewer instructions. The enum translation is as follows: SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE 0 1 2 3 4 5 4 5 6 7 0 1 SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE which is simply (swizzle + 4) & 7. Haswell needs extra textureGather workarounds to remap GREEN to BLUE, but Broadwell and later do not. This patch replicates swizzle_to_scs in gen7_wm_surface_state.c and gen8_surface_state.c, since the Gen8+ code can be simplified to a mere two instructions. Both copies can be marked static for easy inlining. v2: Put the commit message in the code as comments (requested by Jason Ekstrand). Also fix a typo. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_state.h | 1 - src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 40 ++++++++++------------- src/mesa/drivers/dri/i965/gen8_surface_state.c | 25 +++++++++++--- 3 files changed, 39 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 399347c15fe..f195407e882 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -225,7 +225,6 @@ int brw_get_texture_swizzle(const struct gl_context *ctx, const struct gl_texture_object *t); /* gen7_wm_surface_state.c */ -unsigned brw_swizzle_to_scs(GLenum swizzle, bool need_green_to_blue); uint32_t gen7_surface_tiling_mode(uint32_t tiling); uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout l); void gen7_set_surface_mcs_info(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index c257cb79c30..e2c347a51ee 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -39,27 +39,23 @@ /** * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+ - * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED) + * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are + * + * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE + * 0 1 2 3 4 5 + * 4 5 6 7 0 1 + * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE + * + * which is simply adding 4 then modding by 8 (or anding with 7). + * + * We then may need to apply workarounds for textureGather hardware bugs. */ -unsigned -brw_swizzle_to_scs(GLenum swizzle, bool need_green_to_blue) +static unsigned +swizzle_to_scs(GLenum swizzle, bool need_green_to_blue) { - switch (swizzle) { - case SWIZZLE_X: - return HSW_SCS_RED; - case SWIZZLE_Y: - return need_green_to_blue ? HSW_SCS_BLUE : HSW_SCS_GREEN; - case SWIZZLE_Z: - return HSW_SCS_BLUE; - case SWIZZLE_W: - return HSW_SCS_ALPHA; - case SWIZZLE_ZERO: - return HSW_SCS_ZERO; - case SWIZZLE_ONE: - return HSW_SCS_ONE; - } + unsigned scs = (swizzle + 4) & 7; - unreachable("Should not get here: invalid swizzle mode"); + return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs; } uint32_t @@ -353,10 +349,10 @@ gen7_update_texture_surface(struct gl_context *ctx, const bool need_scs_green_to_blue = for_gather && tex_format == BRW_SURFACEFORMAT_R32G32_FLOAT_LD; surf[7] |= - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 0), need_scs_green_to_blue), GEN7_SURFACE_SCS_R) | - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 1), need_scs_green_to_blue), GEN7_SURFACE_SCS_G) | - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 2), need_scs_green_to_blue), GEN7_SURFACE_SCS_B) | - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 3), need_scs_green_to_blue), GEN7_SURFACE_SCS_A); + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0), need_scs_green_to_blue), GEN7_SURFACE_SCS_R) | + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1), need_scs_green_to_blue), GEN7_SURFACE_SCS_G) | + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2), need_scs_green_to_blue), GEN7_SURFACE_SCS_B) | + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3), need_scs_green_to_blue), GEN7_SURFACE_SCS_A); } if (mt->mcs_mt) { diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 56c46b0469a..d1b095cf535 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -38,6 +38,23 @@ #include "brw_defines.h" #include "brw_wm.h" +/** + * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+ + * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are + * + * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE + * 0 1 2 3 4 5 + * 4 5 6 7 0 1 + * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE + * + * which is simply adding 4 then modding by 8 (or anding with 7). + */ +static unsigned +swizzle_to_scs(unsigned swizzle) +{ + return (swizzle + 4) & 7; +} + static uint32_t surface_tiling_mode(uint32_t tiling) { @@ -231,10 +248,10 @@ gen8_update_texture_surface(struct gl_context *ctx, const int swizzle = unlikely(alpha_depth) ? SWIZZLE_XYZW : brw_get_texture_swizzle(ctx, tObj); surf[7] |= - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 0), false), GEN7_SURFACE_SCS_R) | - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 1), false), GEN7_SURFACE_SCS_G) | - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 2), false), GEN7_SURFACE_SCS_B) | - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 3), false), GEN7_SURFACE_SCS_A); + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) | + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) | + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) | + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A); *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */ -- cgit v1.2.3