diff options
author | Brian Paul <[email protected]> | 2014-04-17 08:54:47 -0700 |
---|---|---|
committer | Brian Paul <[email protected]> | 2014-04-17 11:29:33 -0700 |
commit | 615a356ee38d882e9f073dba0b8918a903094124 (patch) | |
tree | b1950dc8b9eb0c612161ff371ef280dc083cd119 /src/gallium/drivers/svga | |
parent | 52faafa17424a3842f150019a9d76ce626019d9f (diff) |
svga: implement support for signed byte vertex attributes
There's no SVGA3D_DECLTYPE that directly corresponds to
PIPE_FORMAT_R8G8B8_SNORM. Previously, we used the swtnl fallback
path to handle this but that's slow and causes invariance issues.
Now we fetch the attribute as SVGA3D_DECLTYPE_UBYTE4N and insert
some extra VS instructions to remap the attributes from the range
[0,1] to the range[-1,1].
Fixes Sauerbraten sw fallback.
Fixes piglit normal3b3s-invariance test.
Reviewed-by: Charmaine Lee <[email protected]>
Diffstat (limited to 'src/gallium/drivers/svga')
-rw-r--r-- | src/gallium/drivers/svga/svga_context.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_pipe_vertex.c | 29 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_state_vs.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi_emit.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi_insn.c | 128 |
6 files changed, 151 insertions, 14 deletions
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 79cecbf3221..55642773f89 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -201,6 +201,7 @@ struct svga_velems_state { unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */ + unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */ }; /* Use to calculate differences between state emitted to hardware and diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index e34f3a00eeb..d679ad3bdf3 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -94,6 +94,9 @@ translate_vertex_format(enum pipe_format format) case PIPE_FORMAT_R16G16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_2; case PIPE_FORMAT_R16G16B16A16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_4; + /* See attrib_needs_adjustment() below */ + case PIPE_FORMAT_R8G8B8_SNORM: return SVGA3D_DECLTYPE_UBYTE4N; + default: /* There are many formats without hardware support. This case * will be hit regularly, meaning we'll need swvfetch. @@ -103,6 +106,23 @@ translate_vertex_format(enum pipe_format format) } +/** + * Does the given vertex attrib format need range adjustment in the VS? + * Range adjustment scales and biases values from [0,1] to [-1,1]. + * This lets us avoid the swtnl path. + */ +static boolean +attrib_needs_range_adjustment(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_R8G8B8_SNORM: + return TRUE; + default: + return FALSE; + } +} + + static void * svga_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, @@ -117,9 +137,16 @@ svga_create_vertex_elements_state(struct pipe_context *pipe, velems->count = count; memcpy(velems->velem, attribs, sizeof(*attribs) * count); + velems->adjust_attrib_range = 0x0; + /* Translate Gallium vertex format to SVGA3dDeclType */ for (i = 0; i < count; i++) { - velems->decl_type[i] = translate_vertex_format(attribs[i].src_format); + enum pipe_format f = attribs[i].src_format; + velems->decl_type[i] = translate_vertex_format(f); + + if (attrib_needs_range_adjustment(f)) { + velems->adjust_attrib_range |= (1 << i); + } } } return velems; diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 2f130aec5b4..2ea25495b20 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -159,6 +159,9 @@ make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key) /* SVGA_NEW_FS */ key->fs_generic_inputs = svga->curr.fs->generic_inputs; + + /* SVGA_NEW_VELEMENT */ + key->adjust_attrib_range = svga->curr.velems->adjust_attrib_range; } @@ -248,6 +251,7 @@ struct svga_tracked_state svga_hw_vs = (SVGA_NEW_VS | SVGA_NEW_FS | SVGA_NEW_PRESCALE | + SVGA_NEW_VELEMENT | SVGA_NEW_NEED_SWTNL), emit_hw_vs }; diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h index 4fe88b3b70d..cb40560242a 100644 --- a/src/gallium/drivers/svga/svga_tgsi.h +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -49,6 +49,7 @@ struct svga_vs_compile_key unsigned fs_generic_inputs; unsigned need_prescale:1; unsigned allow_psiz:1; + unsigned adjust_attrib_range:16; }; struct svga_fs_compile_key diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index 1a9731ffde8..1894296e6d7 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -86,7 +86,7 @@ struct svga_shader_emitter boolean in_main_func; boolean created_common_immediate; - int common_immediate_idx; + int common_immediate_idx[2]; boolean created_loop_const; int loop_const_idx; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index d3570582404..e798b17daa7 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -859,8 +859,20 @@ create_common_immediate( struct svga_shader_emitter *emit ) if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 0.0f, 0.5f, -1.0f, 1.0f )) return FALSE; + emit->common_immediate_idx[0] = idx; + idx++; + + /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */ + if (emit->key.vkey.adjust_attrib_range) { + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + idx, 2.0f, 0.0f, 0.0f, 0.0f )) + return FALSE; + emit->common_immediate_idx[1] = idx; + } + else { + emit->common_immediate_idx[1] = -1; + } - emit->common_immediate_idx = idx; emit->created_common_immediate = TRUE; return TRUE; @@ -889,7 +901,7 @@ common_immediate_swizzle(float value) /** - * Returns an immediate reg where all the terms are either 0, 1, -1 or 0.5 + * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5 */ static struct src_register get_immediate(struct svga_shader_emitter *emit, @@ -900,8 +912,8 @@ get_immediate(struct svga_shader_emitter *emit, unsigned sz = common_immediate_swizzle(z); unsigned sw = common_immediate_swizzle(w); assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); - return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx), + assert(emit->common_immediate_idx[0] >= 0); + return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), sx, sy, sz, sw); } @@ -913,9 +925,9 @@ static struct src_register get_zero_immediate( struct svga_shader_emitter *emit ) { assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); + assert(emit->common_immediate_idx[0] >= 0); return swizzle(src_register( SVGA3DREG_CONST, - emit->common_immediate_idx), + emit->common_immediate_idx[0]), 0, 0, 0, 0); } @@ -927,9 +939,9 @@ static struct src_register get_one_immediate( struct svga_shader_emitter *emit ) { assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); + assert(emit->common_immediate_idx[0] >= 0); return swizzle(src_register( SVGA3DREG_CONST, - emit->common_immediate_idx), + emit->common_immediate_idx[0]), 3, 3, 3, 3); } @@ -941,13 +953,28 @@ static struct src_register get_half_immediate( struct svga_shader_emitter *emit ) { assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); - return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx), + assert(emit->common_immediate_idx[0] >= 0); + return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), 1, 1, 1, 1); } /** + * returns {2, 2, 2, 2} immediate + */ +static struct src_register +get_two_immediate( struct svga_shader_emitter *emit ) +{ + /* Note we use the second common immediate here */ + assert(emit->created_common_immediate); + assert(emit->common_immediate_idx[1] >= 0); + return swizzle(src_register( SVGA3DREG_CONST, + emit->common_immediate_idx[1]), + 0, 0, 0, 0); +} + + +/** * returns the loop const */ static struct src_register @@ -3498,6 +3525,74 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit) /** + * Emit code to invert the T component of the incoming texture coordinate. + * This is used for drawing point sprites when + * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT. + */ +static boolean +emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit) +{ + unsigned adjust_attrib_range = emit->key.vkey.adjust_attrib_range; + + while (adjust_attrib_range) { + /* The vertex input/attribute is supposed to be a signed value in + * the range [-1,1] but we actually fetched/converted it to the + * range [0,1]. This most likely happens when the app specifies a + * signed byte attribute but we interpreted it as unsigned bytes. + * See also svga_translate_vertex_format(). + * + * Here, we emit some extra instructions to adjust + * the attribute values from [0,1] to [-1,1]. + * + * The adjustment we implement is: + * new_attrib = attrib * 2.0; + * if (attrib >= 0.5) + * new_attrib = new_attrib - 2.0; + * This isn't exactly right (it's off by a bit or so) but close enough. + */ + const unsigned index = u_bit_scan(&adjust_attrib_range); + struct src_register tmp; + + SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0); + + /* allocate a temp reg */ + tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp); + emit->nr_hw_temp++; + + /* tmp = attrib * 2.0 */ + if (!submit_op2(emit, + inst_token(SVGA3DOP_MUL), + dst(tmp), + emit->input_map[index], + get_two_immediate(emit))) + return FALSE; + + /* pred = (attrib >= 0.5) */ + if (!submit_op2(emit, + inst_token_setp(SVGA3DOPCOMP_GE), + pred_reg, + emit->input_map[index], /* vert attrib */ + get_half_immediate(emit))) /* 0.5 */ + return FALSE; + + /* sub(pred) tmp, tmp, 2.0 */ + if (!submit_op3(emit, + inst_token_predicated(SVGA3DOP_SUB), + dst(tmp), + src(pred_reg), + tmp, + get_two_immediate(emit))) + return FALSE; + + /* Reassign the input_map entry to the new tmp register */ + emit->input_map[index] = tmp; + } + + return TRUE; +} + + +/** * Determine if we need to create the "common" immediate value which is * used for generating useful vector constants such as {0,0,0,0} and * {1,1,1,1}. @@ -3542,10 +3637,11 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit) return TRUE; } } - - if (emit->unit == PIPE_SHADER_VERTEX) { + else if (emit->unit == PIPE_SHADER_VERTEX) { if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1) return TRUE; + if (emit->key.vkey.adjust_attrib_range) + return TRUE; } if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || @@ -3705,6 +3801,14 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit) return FALSE; } } + else { + assert(emit->unit == PIPE_SHADER_VERTEX); + if (emit->key.vkey.adjust_attrib_range) { + if (!emit_adjusted_vertex_attribs(emit)) + return FALSE; + } + } + return TRUE; } |