From e054251ed11e25a080f64b92db9334c9b07c8c76 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 13 Aug 2015 11:00:58 -0700 Subject: svga: update driver for version 10 GPU interface This is a squash commit of roughly two years of development work. Authors include: Brian Paul Charmaine Lee Thomas Hellstrom Jakob Bornecrantz Sinclair Yeh Mingcheng Chen Kai Ninomiya MengLin Wu The driver supports OpenGL 3.3. Signed-off-by: Brian Paul --- src/gallium/drivers/svga/svga_state_fs.c | 298 +++++++++++++++++-------------- 1 file changed, 164 insertions(+), 134 deletions(-) (limited to 'src/gallium/drivers/svga/svga_state_fs.c') diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index 8cdce742b3b..849319d3cf3 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -36,43 +36,12 @@ #include "svga_shader.h" #include "svga_resource_texture.h" #include "svga_tgsi.h" +#include "svga_format.h" #include "svga_hw_reg.h" -static inline int -compare_fs_keys(const struct svga_fs_compile_key *a, - const struct svga_fs_compile_key *b) -{ - unsigned keysize_a = svga_fs_key_size( a ); - unsigned keysize_b = svga_fs_key_size( b ); - - if (keysize_a != keysize_b) { - return (int)(keysize_a - keysize_b); - } - return memcmp( a, b, keysize_a ); -} - - -/** Search for a fragment shader variant */ -static struct svga_shader_variant * -search_fs_key(const struct svga_fragment_shader *fs, - const struct svga_fs_compile_key *key) -{ - struct svga_shader_variant *variant = fs->base.variants; - - assert(key); - - for ( ; variant; variant = variant->next) { - if (compare_fs_keys( key, &variant->key.fkey ) == 0) - return variant; - } - - return NULL; -} - - /** * If we fail to compile a fragment shader (because it uses too many * registers, for example) we'll use a dummy/fallback shader that @@ -111,13 +80,29 @@ get_dummy_fragment_shader(void) } +static struct svga_shader_variant * +translate_fragment_program(struct svga_context *svga, + const struct svga_fragment_shader *fs, + const struct svga_compile_key *key) +{ + if (svga_have_vgpu10(svga)) { + return svga_tgsi_vgpu10_translate(svga, &fs->base, key, + PIPE_SHADER_FRAGMENT); + } + else { + return svga_tgsi_vgpu9_translate(&fs->base, key, PIPE_SHADER_FRAGMENT); + } +} + + /** * Replace the given shader's instruction with a simple constant-color * shader. We use this when normal shader translation fails. */ static struct svga_shader_variant * -get_compiled_dummy_shader(struct svga_fragment_shader *fs, - const struct svga_fs_compile_key *key) +get_compiled_dummy_shader(struct svga_context *svga, + struct svga_fragment_shader *fs, + const struct svga_compile_key *key) { const struct tgsi_token *dummy = get_dummy_fragment_shader(); struct svga_shader_variant *variant; @@ -129,7 +114,7 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs, FREE((void *) fs->base.tokens); fs->base.tokens = dummy; - variant = svga_translate_fragment_program(fs, key); + variant = translate_fragment_program(svga, fs, key); return variant; } @@ -140,17 +125,17 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs, static enum pipe_error compile_fs(struct svga_context *svga, struct svga_fragment_shader *fs, - const struct svga_fs_compile_key *key, + const struct svga_compile_key *key, struct svga_shader_variant **out_variant) { struct svga_shader_variant *variant; enum pipe_error ret = PIPE_ERROR; - variant = svga_translate_fragment_program( fs, key ); + variant = translate_fragment_program(svga, fs, key); if (variant == NULL) { debug_printf("Failed to compile fragment shader," " using dummy shader instead.\n"); - variant = get_compiled_dummy_shader(fs, key); + variant = get_compiled_dummy_shader(svga, fs, key); if (!variant) { ret = PIPE_ERROR; goto fail; @@ -159,10 +144,11 @@ compile_fs(struct svga_context *svga, if (svga_shader_too_large(svga, variant)) { /* too big, use dummy shader */ - debug_printf("Shader too large (%lu bytes)," + debug_printf("Shader too large (%u bytes)," " using dummy shader instead.\n", - (unsigned long ) variant->nr_tokens * sizeof(variant->tokens[0])); - variant = get_compiled_dummy_shader(fs, key); + (unsigned) (variant->nr_tokens + * sizeof(variant->tokens[0]))); + variant = get_compiled_dummy_shader(svga, fs, key); if (!variant) { ret = PIPE_ERROR; goto fail; @@ -197,23 +183,45 @@ fail: static enum pipe_error make_fs_key(const struct svga_context *svga, struct svga_fragment_shader *fs, - struct svga_fs_compile_key *key) + struct svga_compile_key *key) { + const unsigned shader = PIPE_SHADER_FRAGMENT; unsigned i; - int idx = 0; memset(key, 0, sizeof *key); + memcpy(key->generic_remap_table, fs->generic_remap_table, + sizeof(fs->generic_remap_table)); + + /* SVGA_NEW_GS, SVGA_NEW_VS + */ + if (svga->curr.gs) { + key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs; + } else { + key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs; + } + /* Only need fragment shader fixup for twoside lighting if doing * hwtnl. Otherwise the draw module does the whole job for us. * * SVGA_NEW_SWTNL */ if (!svga->state.sw.need_swtnl) { - /* SVGA_NEW_RAST + /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE */ - key->light_twoside = svga->curr.rast->templ.light_twoside; - key->front_ccw = svga->curr.rast->templ.front_ccw; + key->fs.light_twoside = svga->curr.rast->templ.light_twoside; + key->fs.front_ccw = svga->curr.rast->templ.front_ccw; + key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable && + svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES); + key->fs.aa_point = (svga->curr.rast->templ.point_smooth && + svga->curr.reduced_prim == PIPE_PRIM_POINTS && + (svga->curr.rast->pointsize > 1.0 || + svga->curr.vs->base.info.writes_psize)); + if (key->fs.aa_point) { + assert(svga->curr.gs != NULL); + assert(svga->curr.gs->aa_point_coord_index != -1); + key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index; + } } /* The blend workaround for simulating logicop xor behaviour @@ -231,7 +239,7 @@ make_fs_key(const struct svga_context *svga, * SVGA_NEW_BLEND */ if (svga->curr.blend->need_white_fragments) { - key->white_fragments = 1; + key->fs.white_fragments = 1; } #ifdef DEBUG @@ -241,22 +249,23 @@ make_fs_key(const struct svga_context *svga, */ { static boolean warned = FALSE; - unsigned i, n = MAX2(svga->curr.num_sampler_views, - svga->curr.num_samplers); + unsigned i, n = MAX2(svga->curr.num_sampler_views[shader], + svga->curr.num_samplers[shader]); /* Only warn once to prevent too much debug output */ if (!warned) { - if (svga->curr.num_sampler_views != svga->curr.num_samplers) { + if (svga->curr.num_sampler_views[shader] != + svga->curr.num_samplers[shader]) { debug_printf("svga: mismatched number of sampler views (%u) " "vs. samplers (%u)\n", - svga->curr.num_sampler_views, - svga->curr.num_samplers); + svga->curr.num_sampler_views[shader], + svga->curr.num_samplers[shader]); } for (i = 0; i < n; i++) { - if ((svga->curr.sampler_views[i] == NULL) != - (svga->curr.sampler[i] == NULL)) + if ((svga->curr.sampler_views[shader][i] == NULL) != + (svga->curr.sampler[shader][i] == NULL)) debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n", - i, svga->curr.sampler_views[i], - i, svga->curr.sampler[i]); + i, svga->curr.sampler_views[shader][i], + i, svga->curr.sampler[shader][i]); } warned = TRUE; } @@ -268,68 +277,62 @@ make_fs_key(const struct svga_context *svga, * * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */ - for (i = 0; i < svga->curr.num_sampler_views; i++) { - if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) { - assert(svga->curr.sampler_views[i]->texture); - key->tex[i].texture_target = svga->curr.sampler_views[i]->texture->target; - if (!svga->curr.sampler[i]->normalized_coords) { - key->tex[i].width_height_idx = idx++; - key->tex[i].unnormalized = TRUE; - ++key->num_unnormalized_coords; - } - - key->tex[i].swizzle_r = svga->curr.sampler_views[i]->swizzle_r; - key->tex[i].swizzle_g = svga->curr.sampler_views[i]->swizzle_g; - key->tex[i].swizzle_b = svga->curr.sampler_views[i]->swizzle_b; - key->tex[i].swizzle_a = svga->curr.sampler_views[i]->swizzle_a; - } - } - key->num_textures = svga->curr.num_sampler_views; - - idx = 0; - for (i = 0; i < svga->curr.num_samplers; ++i) { - if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) { - struct pipe_resource *tex = svga->curr.sampler_views[i]->texture; - struct svga_texture *stex = svga_texture(tex); - SVGA3dSurfaceFormat format = stex->key.format; - - if (format == SVGA3D_Z_D16 || - format == SVGA3D_Z_D24X8 || - format == SVGA3D_Z_D24S8) { - /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8, - * or SVGA3D_Z_D24S8 surface, we'll automatically get - * shadow comparison. But we only get LEQUAL mode. - * Set TEX_COMPARE_NONE here so we don't emit the extra FS - * code for shadow comparison. - */ - key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE; - key->tex[i].compare_func = PIPE_FUNC_NEVER; - /* These depth formats _only_ support comparison mode and - * not ordinary sampling so warn if the later is expected. - */ - if (svga->curr.sampler[i]->compare_mode != - PIPE_TEX_COMPARE_R_TO_TEXTURE) { - debug_warn_once("Unsupported shadow compare mode"); - } - /* The only supported comparison mode is LEQUAL */ - if (svga->curr.sampler[i]->compare_func != PIPE_FUNC_LEQUAL) { - debug_warn_once("Unsupported shadow compare function"); + svga_init_shader_key_common(svga, shader, key); + + for (i = 0; i < svga->curr.num_samplers[shader]; ++i) { + struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; + const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i]; + if (view) { + struct pipe_resource *tex = view->texture; + if (tex->target != PIPE_BUFFER) { + struct svga_texture *stex = svga_texture(tex); + SVGA3dSurfaceFormat format = stex->key.format; + + if (!svga_have_vgpu10(svga) && + (format == SVGA3D_Z_D16 || + format == SVGA3D_Z_D24X8 || + format == SVGA3D_Z_D24S8)) { + /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8, + * or SVGA3D_Z_D24S8 surface, we'll automatically get + * shadow comparison. But we only get LEQUAL mode. + * Set TEX_COMPARE_NONE here so we don't emit the extra FS + * code for shadow comparison. + */ + key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE; + key->tex[i].compare_func = PIPE_FUNC_NEVER; + /* These depth formats _only_ support comparison mode and + * not ordinary sampling so warn if the later is expected. + */ + if (sampler->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) { + debug_warn_once("Unsupported shadow compare mode"); + } + /* The shader translation code can emit code to + * handle ALWAYS and NEVER compare functions + */ + else if (sampler->compare_func == PIPE_FUNC_ALWAYS || + sampler->compare_func == PIPE_FUNC_NEVER) { + key->tex[i].compare_mode = sampler->compare_mode; + key->tex[i].compare_func = sampler->compare_func; + } + else if (sampler->compare_func != PIPE_FUNC_LEQUAL) { + debug_warn_once("Unsupported shadow compare function"); + } + } + else { + /* For other texture formats, just use the compare func/mode + * as-is. Should be no-ops for color textures. For depth + * textures, we do not get automatic depth compare. We have + * to do it ourselves in the shader. And we don't get PCF. + */ + key->tex[i].compare_mode = sampler->compare_mode; + key->tex[i].compare_func = sampler->compare_func; } - } - else { - /* For other texture formats, just use the compare func/mode - * as-is. Should be no-ops for color textures. For depth - * textures, we do not get automatic depth compare. We have - * to do it ourselves in the shader. And we don't get PCF. - */ - key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode; - key->tex[i].compare_func = svga->curr.sampler[i]->compare_func; } } } /* sprite coord gen state */ - for (i = 0; i < svga->curr.num_samplers; ++i) { + for (i = 0; i < svga->curr.num_samplers[shader]; ++i) { key->tex[i].sprite_texgen = svga->curr.rast->templ.sprite_coord_enable & (1 << i); } @@ -337,10 +340,25 @@ make_fs_key(const struct svga_context *svga, key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT); + key->fs.flatshade = svga->curr.rast->templ.flatshade; + + /* SVGA_NEW_DEPTH_STENCIL_ALPHA */ + if (svga_have_vgpu10(svga)) { + /* Alpha testing is not supported in integer-valued render targets. */ + if (svga_has_any_integer_cbufs(svga)) { + key->fs.alpha_func = SVGA3D_CMP_ALWAYS; + key->fs.alpha_ref = 0; + } + else { + key->fs.alpha_func = svga->curr.depth->alphafunc; + key->fs.alpha_ref = svga->curr.depth->alpharef; + } + } + /* SVGA_NEW_FRAME_BUFFER */ if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { /* Replicate color0 output to N colorbuffers */ - key->write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs; + key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs; } return PIPE_OK; @@ -355,18 +373,32 @@ svga_reemit_fs_bindings(struct svga_context *svga) { enum pipe_error ret; - assert(svga->rebind.fs); + assert(svga->rebind.flags.fs); assert(svga_have_gb_objects(svga)); if (!svga->state.hw_draw.fs) return PIPE_OK; - ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, - svga->state.hw_draw.fs->gb_shader); + if (!svga_need_to_rebind_resources(svga)) { + ret = svga->swc->resource_rebind(svga->swc, NULL, + svga->state.hw_draw.fs->gb_shader, + SVGA_RELOC_READ); + goto out; + } + + if (svga_have_vgpu10(svga)) + ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, + svga->state.hw_draw.fs->gb_shader, + svga->state.hw_draw.fs->id); + else + ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, + svga->state.hw_draw.fs->gb_shader); + + out: if (ret != PIPE_OK) return ret; - svga->rebind.fs = FALSE; + svga->rebind.flags.fs = FALSE; return PIPE_OK; } @@ -378,7 +410,7 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty) struct svga_shader_variant *variant = NULL; enum pipe_error ret = PIPE_OK; struct svga_fragment_shader *fs = svga->curr.fs; - struct svga_fs_compile_key key; + struct svga_compile_key key; /* SVGA_NEW_BLEND * SVGA_NEW_TEXTURE_BINDING @@ -386,14 +418,16 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty) * SVGA_NEW_NEED_SWTNL * SVGA_NEW_SAMPLER * SVGA_NEW_FRAME_BUFFER + * SVGA_NEW_DEPTH_STENCIL_ALPHA + * SVGA_NEW_VS */ - ret = make_fs_key( svga, fs, &key ); + ret = make_fs_key(svga, fs, &key); if (ret != PIPE_OK) return ret; - variant = search_fs_key( fs, &key ); + variant = svga_search_shader_key(&fs->base, &key); if (!variant) { - ret = compile_fs( svga, fs, &key, &variant ); + ret = compile_fs(svga, fs, &key, &variant); if (ret != PIPE_OK) return ret; } @@ -401,22 +435,14 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty) assert(variant); if (variant != svga->state.hw_draw.fs) { - if (svga_have_gb_objects(svga)) { - ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, - variant->gb_shader); - if (ret != PIPE_OK) - return ret; + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, variant); + if (ret != PIPE_OK) + return ret; - svga->rebind.fs = FALSE; - } - else { - ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, variant->id); - if (ret != PIPE_OK) - return ret; - } + svga->rebind.flags.fs = FALSE; svga->dirty |= SVGA_NEW_FS_VARIANT; - svga->state.hw_draw.fs = variant; + svga->state.hw_draw.fs = variant; } return PIPE_OK; @@ -426,11 +452,15 @@ struct svga_tracked_state svga_hw_fs = { "fragment shader (hwtnl)", (SVGA_NEW_FS | + SVGA_NEW_GS | + SVGA_NEW_VS | SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_NEED_SWTNL | SVGA_NEW_RAST | + SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_SAMPLER | SVGA_NEW_FRAME_BUFFER | + SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_BLEND), emit_hw_fs }; -- cgit v1.2.3