diff options
248 files changed, 7171 insertions, 2012 deletions
diff --git a/.dir-locals.el b/.dir-locals.el index d95eb4803f6..4b5393198de 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -5,6 +5,7 @@ (c-file-style . "stroustrup") (fill-column . 78) (eval . (progn + (c-set-offset 'case-label '0) (c-set-offset 'innamespace '0) (c-set-offset 'inline-open '0))) ) diff --git a/appveyor.yml b/appveyor.yml index 68cc368a3a1..bf7ac752857 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,7 +6,7 @@ # - Select Git and fill in the Git clone URL # - Setup a Git hook as explained in # https://github.com/appveyor/webhooks#installing-git-hook -# - Check 'Settings > General > Skip branches without appveyor' +# - Check 'Settings > General > Skip branches without appveyor.yml' # - Check 'Settings > General > Rolling builds' # - Setup the global or project notifications to your liking # @@ -24,7 +24,14 @@ branches: except: - /^travis.*$/ -clone_depth: 5 +# Don't download the full Mesa history to speed up cloning. However the clone +# depth must not be too small, otherwise builds might fail when lots of patches +# are committed in succession, because the desired commit is not found on the +# truncated history. +# +# See also: +# - https://www.appveyor.com/blog/2014/06/04/shallow-clone-for-git-repositories +clone_depth: 100 cache: - win_flex_bison-2.4.5.zip diff --git a/configure.ac b/configure.ac index a18080d4ce5..e3d721d93aa 100644 --- a/configure.ac +++ b/configure.ac @@ -2161,7 +2161,12 @@ gallium_require_drm_loader() { fi } +dnl This is for Glamor. Skip this if OpenGL is disabled. require_egl_drm() { + if test "x$enable_opengl" = xno; then + return 0 + fi + case "$with_egl_platforms" in *drm*) ;; diff --git a/docs/GL3.txt b/docs/GL3.txt index f12e0ba8d29..257fc73225c 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -135,7 +135,7 @@ GL 4.2, GLSL 4.20: GL_ARB_texture_compression_bptc DONE (i965, nvc0, r600, radeonsi) GL_ARB_compressed_texture_pixel_storage DONE (all drivers) - GL_ARB_shader_atomic_counters DONE (i965) + GL_ARB_shader_atomic_counters DONE (i965, nvc0) GL_ARB_texture_storage DONE (all drivers) GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_base_instance DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) @@ -164,7 +164,7 @@ GL 4.3, GLSL 4.30: GL_ARB_program_interface_query DONE (all drivers) GL_ARB_robust_buffer_access_behavior not started GL_ARB_shader_image_size DONE (i965) - GL_ARB_shader_storage_buffer_object DONE (i965) + GL_ARB_shader_storage_buffer_object DONE (i965, nvc0) GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe) GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30) @@ -186,7 +186,7 @@ GL 4.4, GLSL 4.40: - specified transform/feedback layout in progress - input/output block locations DONE GL_ARB_multi_bind DONE (all drivers) - GL_ARB_query_buffer_object not started + GL_ARB_query_buffer_object DONE (nvc0) GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_stencil8 DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_vertex_type_10f_11f_11f_rev DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) diff --git a/docs/envvars.html b/docs/envvars.html index 5bb7b1e65bb..ba83335d0b0 100644 --- a/docs/envvars.html +++ b/docs/envvars.html @@ -96,6 +96,7 @@ glGetString(GL_SHADING_LANGUAGE_VERSION). Valid values are integers, such as "130". Mesa will not really implement all the features of the given language version if it's higher than what's normally reported. (for developers only) <li>MESA_GLSL - <a href="shading.html#envvars">shading language compiler options</a> +<li>MESA_NO_MINMAX_CACHE - when set, the minmax index cache is globally disabled. </ul> diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html index 616c134a768..0d92ed41ee8 100644 --- a/docs/relnotes/11.2.0.html +++ b/docs/relnotes/11.2.0.html @@ -48,7 +48,10 @@ Note: some of the new features are only available with certain drivers. <li>GL_ARB_compute_shader on i965</li> <li>GL_ARB_copy_image on r600</li> <li>GL_ARB_indirect_parameters on nvc0</li> +<li>GL_ARB_query_buffer_object on nvc0</li> +<li>GL_ARB_shader_atomic_counters on nvc0</li> <li>GL_ARB_shader_draw_parameters on i965, nvc0</li> +<li>GL_ARB_shader_storage_buffer_object on nvc0</li> <li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li> <li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li> <li>GL_ARB_texture_buffer_range on freedreno/a4xx</li> @@ -58,6 +61,8 @@ Note: some of the new features are only available with certain drivers. <li>GL_ARB_vertex_type_10f_11f_11f_rev on freedreno/a4xx</li> <li>GL_KHR_texture_compression_astc_ldr on freedreno/a4xx</li> <li>GL_AMD_performance_monitor on radeonsi (CIK+ only)</li> +<li>GL_ATI_meminfo on r600, radeonsi</li> +<li>GL_NVX_gpu_memory_info on r600, radeonsi</li> <li>New OSMesaCreateContextAttribs() function (for creating core profile contexts)</li> </ul> diff --git a/include/D3D9/d3d9types.h b/include/D3D9/d3d9types.h index 52fbc99dad7..d74ce80bb30 100644 --- a/include/D3D9/d3d9types.h +++ b/include/D3D9/d3d9types.h @@ -227,6 +227,7 @@ typedef struct _RGNDATA { #define D3DERR_DRIVERINVALIDCALL MAKE_D3DHRESULT(2157) #define D3DERR_DEVICEREMOVED MAKE_D3DHRESULT(2160) #define D3DERR_DEVICEHUNG MAKE_D3DHRESULT(2164) +#define S_PRESENT_OCCLUDED MAKE_D3DSTATUS(2168) /******************************************************** * Bitmasks * diff --git a/include/d3dadapter/present.h b/include/d3dadapter/present.h index 08a97297201..162f703e320 100644 --- a/include/d3dadapter/present.h +++ b/include/d3dadapter/present.h @@ -69,6 +69,8 @@ typedef struct ID3DPresentVtbl HRESULT (WINAPI *SetCursor)(ID3DPresent *This, void *pBitmap, POINT *pHotspot, BOOL bShow); HRESULT (WINAPI *SetGammaRamp)(ID3DPresent *This, const D3DGAMMARAMP *pRamp, HWND hWndOverride); HRESULT (WINAPI *GetWindowInfo)(ID3DPresent *This, HWND hWnd, int *width, int *height, int *depth); + /* Available since version 1.1 */ + BOOL (WINAPI *GetWindowOccluded)(ID3DPresent *This); } ID3DPresentVtbl; struct ID3DPresent @@ -96,6 +98,7 @@ struct ID3DPresent #define ID3DPresent_SetCursor(p,a,b,c) (p)->lpVtbl->SetCursor(p,a,b,c) #define ID3DPresent_SetGammaRamp(p,a,b) (p)->lpVtbl->SetGammaRamp(p,a,b) #define ID3DPresent_GetWindowInfo(p,a,b,c,d) (p)->lpVtbl->GetWindowSize(p,a,b,c,d) +#define ID3DPresent_GetWindowOccluded(p) (p)->lpVtbl->GetWindowOccluded(p) typedef struct ID3DPresentGroupVtbl { diff --git a/src/compiler/.gitignore b/src/compiler/.gitignore new file mode 100644 index 00000000000..6fb069f0bcb --- /dev/null +++ b/src/compiler/.gitignore @@ -0,0 +1 @@ +glsl_compiler diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am index e3d297fe299..fe96cb3c879 100644 --- a/src/compiler/Makefile.am +++ b/src/compiler/Makefile.am @@ -220,9 +220,11 @@ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS) LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS) glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy + $(MKDIR_GEN) $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll + $(MKDIR_GEN) $(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y diff --git a/src/compiler/glsl/.gitignore b/src/compiler/glsl/.gitignore index e80f8af6bfc..6db4e738f6e 100644 --- a/src/compiler/glsl/.gitignore +++ b/src/compiler/glsl/.gitignore @@ -1,4 +1,3 @@ -glsl_compiler glsl_lexer.cpp glsl_parser.cpp glsl_parser.h diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 98d8bc5f268..7213ad8ebec 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -291,6 +291,10 @@ apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from, if (!state->is_version(120, 0)) return false; + /* ESSL does not allow implicit conversions */ + if (state->es_shader) + return false; + /* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec: * * "There are no implicit array or structure conversions. For diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 95e86df1cdd..5512a33f114 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -661,7 +661,7 @@ private: BA1(roundEven) BA1(ceil) BA1(fract) - B2(mod) + BA2(mod) BA1(modf) BA2(min) BA2(max) @@ -1242,23 +1242,23 @@ builtin_builder::create_builtins() FD(fract) add_function("mod", - _mod(glsl_type::float_type, glsl_type::float_type), - _mod(glsl_type::vec2_type, glsl_type::float_type), - _mod(glsl_type::vec3_type, glsl_type::float_type), - _mod(glsl_type::vec4_type, glsl_type::float_type), + _mod(always_available, glsl_type::float_type, glsl_type::float_type), + _mod(always_available, glsl_type::vec2_type, glsl_type::float_type), + _mod(always_available, glsl_type::vec3_type, glsl_type::float_type), + _mod(always_available, glsl_type::vec4_type, glsl_type::float_type), - _mod(glsl_type::vec2_type, glsl_type::vec2_type), - _mod(glsl_type::vec3_type, glsl_type::vec3_type), - _mod(glsl_type::vec4_type, glsl_type::vec4_type), + _mod(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _mod(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _mod(always_available, glsl_type::vec4_type, glsl_type::vec4_type), - _mod(glsl_type::double_type, glsl_type::double_type), - _mod(glsl_type::dvec2_type, glsl_type::double_type), - _mod(glsl_type::dvec3_type, glsl_type::double_type), - _mod(glsl_type::dvec4_type, glsl_type::double_type), + _mod(fp64, glsl_type::double_type, glsl_type::double_type), + _mod(fp64, glsl_type::dvec2_type, glsl_type::double_type), + _mod(fp64, glsl_type::dvec3_type, glsl_type::double_type), + _mod(fp64, glsl_type::dvec4_type, glsl_type::double_type), - _mod(glsl_type::dvec2_type, glsl_type::dvec2_type), - _mod(glsl_type::dvec3_type, glsl_type::dvec3_type), - _mod(glsl_type::dvec4_type, glsl_type::dvec4_type), + _mod(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _mod(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _mod(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), NULL); FD(modf) @@ -3452,9 +3452,10 @@ UNOPA(ceil, ir_unop_ceil) UNOPA(fract, ir_unop_fract) ir_function_signature * -builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type) +builtin_builder::_mod(builtin_available_predicate avail, + const glsl_type *x_type, const glsl_type *y_type) { - return binop(always_available, ir_binop_mod, x_type, x_type, y_type); + return binop(avail, ir_binop_mod, x_type, x_type, y_type); } ir_function_signature * diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp index ccc04c00cea..6db74f1c634 100644 --- a/src/compiler/glsl/builtin_variables.cpp +++ b/src/compiler/glsl/builtin_variables.cpp @@ -328,6 +328,11 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type, this->fields[this->num_fields].sample = 0; this->fields[this->num_fields].patch = 0; this->fields[this->num_fields].precision = GLSL_PRECISION_NONE; + this->fields[this->num_fields].image_read_only = 0; + this->fields[this->num_fields].image_write_only = 0; + this->fields[this->num_fields].image_coherent = 0; + this->fields[this->num_fields].image_volatile = 0; + this->fields[this->num_fields].image_restrict = 0; this->num_fields++; } @@ -1201,7 +1206,12 @@ builtin_variable_generator::generate_varyings() /* gl_Position and gl_PointSize are not visible from fragment shaders. */ if (state->stage != MESA_SHADER_FRAGMENT) { add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position"); - add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize"); + if (!state->es_shader || + state->stage == MESA_SHADER_VERTEX || + (state->stage == MESA_SHADER_GEOMETRY && + state->OES_geometry_point_size_enable)) { + add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize"); + } } if (state->is_version(130, 0)) { diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index ef1a6575aaa..43a1aa94aff 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -2386,6 +2386,13 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1); if (extensions->ARB_blend_func_extended) add_builtin_define(parser, "GL_EXT_blend_func_extended", 1); + + if (version >= 310) { + if (extensions->OES_geometry_shader) { + add_builtin_define(parser, "GL_OES_geometry_point_size", 1); + add_builtin_define(parser, "GL_OES_geometry_shader", 1); + } + } } } else { add_builtin_define(parser, "GL_ARB_draw_buffers", 1); diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index ecf0d7f76e5..d7a4b254aa2 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -600,6 +600,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { /* OES extensions go here, sorted alphabetically. */ EXT(OES_EGL_image_external, false, true, OES_EGL_image_external), + EXT(OES_geometry_point_size, false, true, OES_geometry_shader), EXT(OES_geometry_shader, false, true, OES_geometry_shader), EXT(OES_standard_derivatives, false, true, OES_standard_derivatives), EXT(OES_texture_3D, false, true, dummy_true), @@ -1867,59 +1868,76 @@ do_common_optimization(exec_list *ir, bool linked, const struct gl_shader_compiler_options *options, bool native_integers) { + const bool debug = false; GLboolean progress = GL_FALSE; - progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress; +#define OPT(PASS, ...) do { \ + if (debug) { \ + fprintf(stderr, "START GLSL optimization %s\n", #PASS); \ + const bool opt_progress = PASS(__VA_ARGS__); \ + progress = opt_progress || progress; \ + if (opt_progress) \ + _mesa_print_ir(stderr, ir, NULL); \ + fprintf(stderr, "GLSL optimization %s: %s progress\n", \ + #PASS, opt_progress ? "made" : "no"); \ + } else { \ + progress = PASS(__VA_ARGS__) || progress; \ + } \ + } while (false) + + OPT(lower_instructions, ir, SUB_TO_ADD_NEG); if (linked) { - progress = do_function_inlining(ir) || progress; - progress = do_dead_functions(ir) || progress; - progress = do_structure_splitting(ir) || progress; + OPT(do_function_inlining, ir); + OPT(do_dead_functions, ir); + OPT(do_structure_splitting, ir); } - progress = do_if_simplification(ir) || progress; - progress = opt_flatten_nested_if_blocks(ir) || progress; - progress = opt_conditional_discard(ir) || progress; - progress = do_copy_propagation(ir) || progress; - progress = do_copy_propagation_elements(ir) || progress; + OPT(do_if_simplification, ir); + OPT(opt_flatten_nested_if_blocks, ir); + OPT(opt_conditional_discard, ir); + OPT(do_copy_propagation, ir); + OPT(do_copy_propagation_elements, ir); if (options->OptimizeForAOS && !linked) - progress = opt_flip_matrices(ir) || progress; + OPT(opt_flip_matrices, ir); if (linked && options->OptimizeForAOS) { - progress = do_vectorize(ir) || progress; + OPT(do_vectorize, ir); } if (linked) - progress = do_dead_code(ir, uniform_locations_assigned) || progress; + OPT(do_dead_code, ir, uniform_locations_assigned); else - progress = do_dead_code_unlinked(ir) || progress; - progress = do_dead_code_local(ir) || progress; - progress = do_tree_grafting(ir) || progress; - progress = do_constant_propagation(ir) || progress; + OPT(do_dead_code_unlinked, ir); + OPT(do_dead_code_local, ir); + OPT(do_tree_grafting, ir); + OPT(do_constant_propagation, ir); if (linked) - progress = do_constant_variable(ir) || progress; + OPT(do_constant_variable, ir); else - progress = do_constant_variable_unlinked(ir) || progress; - progress = do_constant_folding(ir) || progress; - progress = do_minmax_prune(ir) || progress; - progress = do_rebalance_tree(ir) || progress; - progress = do_algebraic(ir, native_integers, options) || progress; - progress = do_lower_jumps(ir) || progress; - progress = do_vec_index_to_swizzle(ir) || progress; - progress = lower_vector_insert(ir, false) || progress; - progress = do_swizzle_swizzle(ir) || progress; - progress = do_noop_swizzle(ir) || progress; - - progress = optimize_split_arrays(ir, linked) || progress; - progress = optimize_redundant_jumps(ir) || progress; + OPT(do_constant_variable_unlinked, ir); + OPT(do_constant_folding, ir); + OPT(do_minmax_prune, ir); + OPT(do_rebalance_tree, ir); + OPT(do_algebraic, ir, native_integers, options); + OPT(do_lower_jumps, ir); + OPT(do_vec_index_to_swizzle, ir); + OPT(lower_vector_insert, ir, false); + OPT(do_swizzle_swizzle, ir); + OPT(do_noop_swizzle, ir); + + OPT(optimize_split_arrays, ir, linked); + OPT(optimize_redundant_jumps, ir); loop_state *ls = analyze_loop_variables(ir); if (ls->loop_found) { - progress = set_loop_controls(ir, ls) || progress; - progress = unroll_loops(ir, ls, options) || progress; + OPT(set_loop_controls, ir, ls); + OPT(unroll_loops, ir, ls, options); } delete ls; +#undef OPT + return progress; } diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h index 3f88e01d599..a905b564787 100644 --- a/src/compiler/glsl/glsl_parser_extras.h +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -591,6 +591,8 @@ struct _mesa_glsl_parse_state { */ bool OES_EGL_image_external_enable; bool OES_EGL_image_external_warn; + bool OES_geometry_point_size_enable; + bool OES_geometry_point_size_warn; bool OES_geometry_shader_enable; bool OES_geometry_shader_warn; bool OES_standard_derivatives_enable; diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp index 33b2d4c8646..7072c16cb28 100644 --- a/src/compiler/glsl/link_uniforms.cpp +++ b/src/compiler/glsl/link_uniforms.cpp @@ -471,10 +471,11 @@ private: */ class parcel_out_uniform_storage : public program_resource_visitor { public: - parcel_out_uniform_storage(struct string_to_uint_map *map, + parcel_out_uniform_storage(struct gl_shader_program *prog, + struct string_to_uint_map *map, struct gl_uniform_storage *uniforms, union gl_constant_value *values) - : map(map), uniforms(uniforms), values(values) + : prog(prog), map(map), uniforms(uniforms), values(values) { } @@ -492,8 +493,7 @@ public: memset(this->targets, 0, sizeof(this->targets)); } - void set_and_process(struct gl_shader_program *prog, - ir_variable *var) + void set_and_process(ir_variable *var) { current_var = var; field_counter = 0; @@ -643,6 +643,16 @@ private: uniform->opaque[shader_type].index = this->next_image; uniform->opaque[shader_type].active = true; + /* Set image access qualifiers */ + const GLenum access = + (current_var->data.image_read_only ? GL_READ_ONLY : + current_var->data.image_write_only ? GL_WRITE_ONLY : + GL_READ_WRITE); + + for (unsigned j = 0; j < MAX2(1, uniform->array_elements); ++j) + prog->_LinkedShaders[shader_type]-> + ImageAccess[this->next_image + j] = access; + /* Increment the image index by 1 for non-arrays and by the * number of array elements for arrays. */ @@ -844,6 +854,11 @@ private: this->values += values_for_type(type); } + /** + * Current program being processed. + */ + struct gl_shader_program *prog; + struct string_to_uint_map *map; struct gl_uniform_storage *uniforms; @@ -1007,40 +1022,6 @@ link_update_uniform_buffer_variables(struct gl_shader *shader) } } -static void -link_set_image_access_qualifiers(struct gl_shader_program *prog, - gl_shader *sh, unsigned shader_stage, - ir_variable *var, const glsl_type *type, - char **name, size_t name_length) -{ - /* Handle arrays of arrays */ - if (type->is_array() && type->fields.array->is_array()) { - for (unsigned i = 0; i < type->length; i++) { - size_t new_length = name_length; - - /* Append the subscript to the current variable name */ - ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); - - link_set_image_access_qualifiers(prog, sh, shader_stage, var, - type->fields.array, name, - new_length); - } - } else { - unsigned id = 0; - bool found = prog->UniformHash->get(id, *name); - assert(found); - (void) found; - const gl_uniform_storage *storage = &prog->UniformStorage[id]; - const unsigned index = storage->opaque[shader_stage].index; - const GLenum access = (var->data.image_read_only ? GL_READ_ONLY : - var->data.image_write_only ? GL_WRITE_ONLY : - GL_READ_WRITE); - - for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j) - sh->ImageAccess[index + j] = access; - } -} - /** * Combine the hidden uniform hash map with the uniform hash map so that the * hidden uniforms will be given indicies at the end of the uniform storage @@ -1148,7 +1129,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog, union gl_constant_value *data_end = &data[num_data_slots]; #endif - parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data); + parcel_out_uniform_storage parcel(prog, prog->UniformHash, uniforms, data); for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i] == NULL) @@ -1163,7 +1144,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog, var->data.mode != ir_var_shader_storage)) continue; - parcel.set_and_process(prog, var); + parcel.set_and_process(var); } prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used; @@ -1301,29 +1282,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog, prog->NumHiddenUniforms = hidden_uniforms; prog->UniformStorage = uniforms; - /** - * Scan the program for image uniforms and store image unit access - * information into the gl_shader data structure. - */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - gl_shader *sh = prog->_LinkedShaders[i]; - - if (sh == NULL) - continue; - - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *var = node->as_variable(); - - if (var && var->data.mode == ir_var_uniform && - var->type->contains_image()) { - char *name_copy = ralloc_strdup(NULL, var->name); - link_set_image_access_qualifiers(prog, sh, i, var, var->type, - &name_copy, strlen(var->name)); - ralloc_free(name_copy); - } - } - } - link_set_uniform_initializers(prog, boolean_true); return; diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 264b69ca619..a4c730ffdcf 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -967,11 +967,16 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) return; } - if ((consumer_var == NULL && producer_var->type->contains_integer()) || + bool needs_flat_qualifier = consumer_var == NULL && + (producer_var->type->contains_integer() || + producer_var->type->contains_double()); + + if (needs_flat_qualifier || (consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) { /* Since this varying is not being consumed by the fragment shader, its * interpolation type varying cannot possibly affect rendering. - * Also, this variable is non-flat and is (or contains) an integer. + * Also, this variable is non-flat and is (or contains) an integer + * or a double. * If the consumer stage is unknown, don't modify the interpolation * type as it could affect rendering later with separate shaders. * diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 6657777d74c..4776ffa6acd 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -4633,8 +4633,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) &prog->NumShaderStorageBlocks, &prog->SsboInterfaceBlockIndex); - /* FINISHME: Assign fragment shader output locations. */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i] == NULL) continue; diff --git a/src/compiler/glsl/lower_buffer_access.cpp b/src/compiler/glsl/lower_buffer_access.cpp index f8c8d140ea8..9ad811de9f1 100644 --- a/src/compiler/glsl/lower_buffer_access.cpp +++ b/src/compiler/glsl/lower_buffer_access.cpp @@ -327,6 +327,7 @@ lower_buffer_access::setup_buffer_access(void *mem_ctx, unsigned *const_offset, bool *row_major, int *matrix_columns, + const glsl_struct_field **struct_field, unsigned packing) { *offset = new(mem_ctx) ir_constant(0u); @@ -442,8 +443,11 @@ lower_buffer_access::setup_buffer_access(void *mem_ctx, intra_struct_offset = glsl_align(intra_struct_offset, field_align); if (strcmp(struct_type->fields.structure[i].name, - deref_record->field) == 0) + deref_record->field) == 0) { + if (struct_field) + *struct_field = &struct_type->fields.structure[i]; break; + } if (packing == GLSL_INTERFACE_PACKING_STD430) intra_struct_offset += type->std430_size(field_row_major); diff --git a/src/compiler/glsl/lower_buffer_access.h b/src/compiler/glsl/lower_buffer_access.h index cc4614e9792..8772bdb76ff 100644 --- a/src/compiler/glsl/lower_buffer_access.h +++ b/src/compiler/glsl/lower_buffer_access.h @@ -57,6 +57,7 @@ public: void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref, ir_rvalue **offset, unsigned *const_offset, bool *row_major, int *matrix_columns, + const glsl_struct_field **struct_field, unsigned packing); }; diff --git a/src/compiler/glsl/lower_shared_reference.cpp b/src/compiler/glsl/lower_shared_reference.cpp index 533cd9202f4..12499695882 100644 --- a/src/compiler/glsl/lower_shared_reference.cpp +++ b/src/compiler/glsl/lower_shared_reference.cpp @@ -142,7 +142,7 @@ lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue) setup_buffer_access(mem_ctx, var, deref, &offset, &const_offset, - &row_major, &matrix_columns, packing); + &row_major, &matrix_columns, NULL, packing); /* Now that we've calculated the offset to the start of the * dereference, walk over the type and emit loads into a temporary. @@ -210,7 +210,7 @@ lower_shared_reference_visitor::handle_assignment(ir_assignment *ir) setup_buffer_access(mem_ctx, var, deref, &offset, &const_offset, - &row_major, &matrix_columns, packing); + &row_major, &matrix_columns, NULL, packing); deref = new(mem_ctx) ir_dereference_variable(store_var); @@ -370,7 +370,7 @@ lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) setup_buffer_access(mem_ctx, var, deref, &offset, &const_offset, - &row_major, &matrix_columns, packing); + &row_major, &matrix_columns, NULL, packing); assert(offset); assert(!row_major); diff --git a/src/compiler/glsl/lower_ubo_reference.cpp b/src/compiler/glsl/lower_ubo_reference.cpp index a172054bac8..d6269f7cbac 100644 --- a/src/compiler/glsl/lower_ubo_reference.cpp +++ b/src/compiler/glsl/lower_ubo_reference.cpp @@ -45,7 +45,7 @@ class lower_ubo_reference_visitor : public lower_buffer_access::lower_buffer_access { public: lower_ubo_reference_visitor(struct gl_shader *shader) - : shader(shader) + : shader(shader), struct_field(NULL), variable(NULL) { } @@ -60,6 +60,7 @@ public: bool *row_major, int *matrix_columns, unsigned packing); + uint32_t ssbo_access_params(); ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, ir_rvalue *offset); ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, @@ -104,6 +105,8 @@ public: struct gl_shader *shader; struct gl_uniform_buffer_variable *ubo_var; + const struct glsl_struct_field *struct_field; + ir_variable *variable; ir_rvalue *uniform_block; bool progress; }; @@ -288,8 +291,9 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, *const_offset = ubo_var->Offset; + this->struct_field = NULL; setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major, - matrix_columns, packing); + matrix_columns, &this->struct_field, packing); } void @@ -317,6 +321,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) this->buffer_access_type = var->is_in_shader_storage_block() ? ssbo_load_access : ubo_load_access; + this->variable = var; /* Compute the offset to the start if the dereference as well as other * information we need to configure the write @@ -370,6 +375,24 @@ shader_storage_buffer_object(const _mesa_glsl_parse_state *state) return state->ARB_shader_storage_buffer_object_enable; } +uint32_t +lower_ubo_reference_visitor::ssbo_access_params() +{ + assert(variable); + + if (variable->is_interface_instance()) { + assert(struct_field); + + return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) | + (struct_field->image_restrict ? ACCESS_RESTRICT : 0) | + (struct_field->image_volatile ? ACCESS_VOLATILE : 0)); + } else { + return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) | + (variable->data.image_restrict ? ACCESS_RESTRICT : 0) | + (variable->data.image_volatile ? ACCESS_VOLATILE : 0)); + } +} + ir_call * lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, ir_rvalue *deref, @@ -394,6 +417,10 @@ lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); sig_params.push_tail(writemask_ref); + ir_variable *access_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); + sig_params.push_tail(access_ref); + ir_function_signature *sig = new(mem_ctx) ir_function_signature(glsl_type::void_type, shader_storage_buffer_object); assert(sig); @@ -408,6 +435,7 @@ lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, call_params.push_tail(offset->clone(mem_ctx, NULL)); call_params.push_tail(deref->clone(mem_ctx, NULL)); call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); + call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); return new(mem_ctx) ir_call(sig, NULL, &call_params); } @@ -426,6 +454,10 @@ lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); sig_params.push_tail(offset_ref); + ir_variable *access_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); + sig_params.push_tail(access_ref); + ir_function_signature *sig = new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object); assert(sig); @@ -444,6 +476,7 @@ lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, exec_list call_params; call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); call_params.push_tail(offset->clone(mem_ctx, NULL)); + call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); return new(mem_ctx) ir_call(sig, deref_result, &call_params); } @@ -499,6 +532,7 @@ lower_ubo_reference_visitor::write_to_memory(void *mem_ctx, unsigned packing = var->get_interface_type()->interface_packing; this->buffer_access_type = ssbo_store_access; + this->variable = var; /* Compute the offset to the start if the dereference as well as other * information we need to configure the write @@ -678,6 +712,7 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu int unsized_array_stride = calculate_unsized_array_stride(deref, packing); this->buffer_access_type = ssbo_unsized_array_length_access; + this->variable = var; /* Compute the offset to the start if the dereference as well as other * information we need to calculate the length. @@ -910,6 +945,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) unsigned packing = var->get_interface_type()->interface_packing; this->buffer_access_type = ssbo_atomic_access; + this->variable = var; setup_for_load_or_store(mem_ctx, var, deref, &offset, &const_offset, diff --git a/src/compiler/glsl/opt_tree_grafting.cpp b/src/compiler/glsl/opt_tree_grafting.cpp index 83effb7424c..812f996fb81 100644 --- a/src/compiler/glsl/opt_tree_grafting.cpp +++ b/src/compiler/glsl/opt_tree_grafting.cpp @@ -361,11 +361,12 @@ tree_grafting_basic_block(ir_instruction *bb_first, if (!lhs_var) continue; - if (lhs_var->data.mode == ir_var_function_out || - lhs_var->data.mode == ir_var_function_inout || - lhs_var->data.mode == ir_var_shader_out || - lhs_var->data.mode == ir_var_shader_storage) - continue; + if (lhs_var->data.mode == ir_var_function_out || + lhs_var->data.mode == ir_var_function_inout || + lhs_var->data.mode == ir_var_shader_out || + lhs_var->data.mode == ir_var_shader_storage || + lhs_var->data.mode == ir_var_shader_shared) + continue; ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var); diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index 5920c2e2611..d2eaec173b3 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -164,6 +164,11 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, this->fields.structure[i].sample = fields[i].sample; this->fields.structure[i].matrix_layout = fields[i].matrix_layout; this->fields.structure[i].patch = fields[i].patch; + this->fields.structure[i].image_read_only = fields[i].image_read_only; + this->fields.structure[i].image_write_only = fields[i].image_write_only; + this->fields.structure[i].image_coherent = fields[i].image_coherent; + this->fields.structure[i].image_volatile = fields[i].image_volatile; + this->fields.structure[i].image_restrict = fields[i].image_restrict; this->fields.structure[i].precision = fields[i].precision; } @@ -1330,6 +1335,13 @@ glsl_type::can_implicitly_convert_to(const glsl_type *desired, if (this == desired) return true; + /* ESSL does not allow implicit conversions. If there is no state, we're + * doing intra-stage function linking where these checks have already been + * done. + */ + if (state && state->es_shader) + return false; + /* There is no conversion among matrix types. */ if (this->matrix_columns > 1 || desired->matrix_columns > 1) return false; diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h index a9b5281e774..5965cb2eedb 100644 --- a/src/compiler/glsl_types.h +++ b/src/compiler/glsl_types.h @@ -885,7 +885,8 @@ struct glsl_struct_field { glsl_struct_field(const struct glsl_type *_type, const char *_name) : type(_type), name(_name), location(-1), interpolation(0), centroid(0), sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), - precision(GLSL_PRECISION_NONE) + precision(GLSL_PRECISION_NONE), image_read_only(0), image_write_only(0), + image_coherent(0), image_volatile(0), image_restrict(0) { /* empty */ } diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index 37cb0221e0b..312d2f99a1c 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -139,7 +139,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) b->shader->options->lower_pack_unorm_2x16); nir_ssa_def *word = - nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); + nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); nir_ssa_def *val = nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)), nir_channel(b, word, 0)); @@ -154,7 +154,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) b->shader->options->lower_pack_unorm_4x8); nir_ssa_def *byte = - nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); + nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); nir_ssa_def *val = nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)), nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))), diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 0eff89783dd..60ade4a80ae 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -238,15 +238,15 @@ unpack_2x16("unorm") unpack_4x8("unorm") unpack_2x16("half") -unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """ -dst = (src0.x & 0xffff) | (src0.y >> 16); +unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """ +dst.x = (src0.x & 0xffff) | (src0.y >> 16); """) -unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """ -dst = (src0.x << 0) | - (src0.y << 8) | - (src0.z << 16) | - (src0.w << 24); +unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """ +dst.x = (src0.x << 0) | + (src0.y << 8) | + (src0.z << 16) | + (src0.w << 24); """) # Lowered floating point unpacking operations. @@ -562,12 +562,12 @@ dst.y = src1.x; """) # Byte extraction -binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))") -binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))") +binop("extract_u8", tuint, "", "(uint8_t)(src0 >> (src1 * 8))") +binop("extract_i8", tint, "", "(int8_t)(src0 >> (src1 * 8))") # Word extraction -binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))") -binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))") +binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))") +binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))") def triop(name, ty, const_expr): diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f4bfd3a921a..d4f4a3d903c 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -248,19 +248,19 @@ optimizations = [ ('ubfe', 'value', 'offset', 'bits')), 'options->lower_bitfield_extract'), - (('extract_ibyte', a, b), - ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8), + (('extract_i8', a, b), + ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24), 'options->lower_extract_byte'), - (('extract_ubyte', a, b), + (('extract_u8', a, b), ('iand', ('ushr', a, ('imul', b, 8)), 0xff), 'options->lower_extract_byte'), - (('extract_iword', a, b), + (('extract_i16', a, b), ('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16), 'options->lower_extract_word'), - (('extract_uword', a, b), + (('extract_u16', a, b), ('iand', ('ushr', a, ('imul', b, 16)), 0xffff), 'options->lower_extract_word'), @@ -285,30 +285,30 @@ optimizations = [ 'options->lower_pack_snorm_4x8'), (('unpack_unorm_2x16', 'v'), - ('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0), - ('extract_uword', 'v', 1), 0, 0)), + ('fdiv', ('u2f', ('vec2', ('extract_u16', 'v', 0), + ('extract_u16', 'v', 1))), 65535.0), 'options->lower_unpack_unorm_2x16'), (('unpack_unorm_4x8', 'v'), - ('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0), - ('extract_ubyte', 'v', 1), - ('extract_ubyte', 'v', 2), - ('extract_ubyte', 'v', 3))), + ('fdiv', ('u2f', ('vec4', ('extract_u8', 'v', 0), + ('extract_u8', 'v', 1), + ('extract_u8', 'v', 2), + ('extract_u8', 'v', 3))), 255.0), 'options->lower_unpack_unorm_4x8'), (('unpack_snorm_2x16', 'v'), - ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0), - ('extract_iword', 'v', 1), 0, 0)), + ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0), + ('extract_i16', 'v', 1))), 32767.0))), 'options->lower_unpack_snorm_2x16'), (('unpack_snorm_4x8', 'v'), - ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0), - ('extract_ibyte', 'v', 1), - ('extract_ibyte', 'v', 2), - ('extract_ibyte', 'v', 3))), + ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0), + ('extract_i8', 'v', 1), + ('extract_i8', 'v', 2), + ('extract_i8', 'v', 3))), 127.0))), 'options->lower_unpack_snorm_4x8'), ] diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index e3f46e3d739..d44aabf8f3c 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -544,6 +544,16 @@ enum gl_frag_depth_layout FRAG_DEPTH_LAYOUT_UNCHANGED }; +/** + * \brief Buffer access qualifiers + */ +enum gl_buffer_access_qualifier +{ + ACCESS_COHERENT = 1, + ACCESS_RESTRICT = 2, + ACCESS_VOLATILE = 4, +}; + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk index 749be7dfeb9..2b469b65ee4 100644 --- a/src/gallium/Android.mk +++ b/src/gallium/Android.mk @@ -85,7 +85,7 @@ endif # virgl ifneq ($(filter virgl, $(MESA_GPU_DRIVERS)),) -SUBDIRS += winsys/virgl/drm drivers/virgl +SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl endif # vmwgfx diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 7854142f736..7cf0deece81 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm, * * Convert float32 to half floats, preserving Infs and NaNs, * with rounding towards zero (trunc). + * XXX: For GL, would prefer rounding towards nearest(-even). */ LLVMValueRef lp_build_float_to_half(struct gallivm_state *gallivm, @@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state *gallivm, struct lp_type i16_type = lp_type_int_vec(16, 16 * length); LLVMValueRef result; + /* + * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits + * directly, without any (x86 or generic) intrinsics. + * Albeit the rounding mode cannot be specified (and is undefined, + * though in practice on x86 seems to do nearest-even but it may + * be dependent on instruction set support), so is essentially + * useless. + */ + if (util_cpu_caps.has_f16c && (length == 4 || length == 8)) { struct lp_type i168_type = lp_type_int_vec(16, 16 * 8); @@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state *gallivm, LLVMValueRef index = LLVMConstInt(i32t, i, 0); LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, ""); #if 0 - /* XXX: not really supported by backends */ + /* + * XXX: not really supported by backends. + * Even if they would now, rounding mode cannot be specified and + * is undefined. + */ LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32); #else LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 0b0f7f0147c..d80c997ad84 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -257,6 +257,32 @@ lp_build_concat_n(struct gallivm_state *gallivm, /** + * Un-interleave vector. + * This will return a vector consisting of every second element + * (depending on lo_hi, beginning at 0 or 1). + * The returned vector size (elems and width) will only be half + * that of the source vector. + */ +LLVMValueRef +lp_build_uninterleave1(struct gallivm_state *gallivm, + unsigned num_elems, + LLVMValueRef a, + unsigned lo_hi) +{ + LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + assert(num_elems <= LP_MAX_VECTOR_LENGTH); + + for (i = 0; i < num_elems / 2; ++i) + elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi); + + shuffle = LLVMConstVector(elems, num_elems / 2); + + return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, ""); +} + + +/** * Interleave vector elements. * * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index 7cede35bbde..367fba1fd21 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm, LLVMValueRef b, unsigned lo_hi); +LLVMValueRef +lp_build_uninterleave1(struct gallivm_state *gallivm, + unsigned num_elems, + LLVMValueRef a, + unsigned lo_hi); void lp_build_unpack2(struct gallivm_state *gallivm, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index c88dfbf974a..1cbe47ca91f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm( /* Ignore deprecated instructions */ switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_UP2H: case TGSI_OPCODE_UP2US: case TGSI_OPCODE_UP4B: case TGSI_OPCODE_UP4UB: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 6f75bec5005..43af6b4ea0d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -45,8 +45,10 @@ #include "lp_bld_arit.h" #include "lp_bld_bitarit.h" #include "lp_bld_const.h" +#include "lp_bld_conv.h" #include "lp_bld_gather.h" #include "lp_bld_logic.h" +#include "lp_bld_pack.h" #include "tgsi/tgsi_exec.h" @@ -530,6 +532,77 @@ static struct lp_build_tgsi_action log_action = { log_emit /* emit */ }; +/* TGSI_OPCODE_PK2H */ + +static void +pk2h_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.x */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_X); + /* src0.y */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Y); +} + +static void +pk2h_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_type f16i_t; + LLVMValueRef lo, hi, res; + + f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32); + lo = lp_build_float_to_half(gallivm, emit_data->args[0]); + hi = lp_build_float_to_half(gallivm, emit_data->args[1]); + /* maybe some interleave doubling vector width would be useful... */ + lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2); + hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2); + res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0); + + emit_data->output[emit_data->chan] = res; +} + +static struct lp_build_tgsi_action pk2h_action = { + pk2h_fetch_args, /* fetch_args */ + pk2h_emit /* emit */ +}; + +/* TGSI_OPCODE_UP2H */ + +static void +up2h_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMContextRef context = gallivm->context; + LLVMValueRef lo, hi, res[2], arg; + unsigned nr = bld_base->base.type.length; + LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2); + + arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, ""); + lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0); + hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1); + res[0] = lp_build_half_to_float(gallivm, lo); + res[1] = lp_build_half_to_float(gallivm, hi); + + emit_data->output[0] = emit_data->output[2] = res[0]; + emit_data->output[1] = emit_data->output[3] = res[1]; +} + +static struct lp_build_tgsi_action up2h_action = { + scalar_unary_fetch_args, /* fetch_args */ + up2h_emit /* emit */ +}; + /* TGSI_OPCODE_LRP */ static void @@ -1032,10 +1105,12 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base) bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action; bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action; bld_base->op_actions[TGSI_OPCODE_LOG] = log_action; + bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action; bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action; bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action; bld_base->op_actions[TGSI_OPCODE_POW] = pow_action; bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action; + bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action; bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action; bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args; diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h index 332b1cba984..90820d3fe91 100644 --- a/src/gallium/auxiliary/target-helpers/drm_helper.h +++ b/src/gallium/auxiliary/target-helpers/drm_helper.h @@ -226,14 +226,9 @@ pipe_freedreno_create_screen(int fd) struct pipe_screen * pipe_virgl_create_screen(int fd) { - struct virgl_winsys *vws; struct pipe_screen *screen; - vws = virgl_drm_winsys_create(fd); - if (!vws) - return NULL; - - screen = virgl_create_screen(vws); + screen = virgl_drm_screen_create(fd); return screen ? debug_screen_wrap(screen) : NULL; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index f67c16200a9..d898fd66f48 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -58,6 +58,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" +#include "util/u_half.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -3058,6 +3059,45 @@ exec_dp2(struct tgsi_exec_machine *mach, } static void +exec_pk2h(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned chan; + union tgsi_exec_channel arg[2], dst; + + fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); + for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { + dst.u[chan] = util_float_to_half(arg[0].f[chan]) | + (util_float_to_half(arg[1].f[chan]) << 16); + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT); + } + } +} + +static void +exec_up2h(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned chan; + union tgsi_exec_channel arg, dst[2]; + + fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); + for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { + dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff); + dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16); + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void exec_scs(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { @@ -4339,7 +4379,7 @@ exec_instruction( break; case TGSI_OPCODE_PK2H: - assert (0); + exec_pk2h(mach, inst); break; case TGSI_OPCODE_PK2US: @@ -4425,7 +4465,7 @@ exec_instruction( break; case TGSI_OPCODE_UP2H: - assert (0); + exec_up2h(mach, inst); break; case TGSI_OPCODE_UP2US: diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index b270dd73b67..70fc4604537 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -149,7 +149,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE }, { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT }, { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE }, - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 112 }, /* removed */ + { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR }, { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ }, { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */ { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, @@ -426,6 +426,7 @@ tgsi_opcode_infer_src_type( uint opcode ) case TGSI_OPCODE_SAMPLE_I: case TGSI_OPCODE_SAMPLE_I_MS: case TGSI_OPCODE_UMUL_HI: + case TGSI_OPCODE_UP2H: return TGSI_TYPE_UNSIGNED; case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_I2F: diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 7a02e27e01e..687fb54830d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -377,6 +377,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->reads_position = TRUE; else if (semName == TGSI_SEMANTIC_FACE) info->uses_frontface = TRUE; + else if (semName == TGSI_SEMANTIC_SAMPLEMASK) + info->reads_samplemask = TRUE; } else if (file == TGSI_FILE_OUTPUT) { info->output_semantic_name[reg] = (ubyte) semName; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index b0b423ab528..0541255764c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -81,6 +81,7 @@ struct tgsi_shader_info ubyte colors_written; boolean reads_position; /**< does fragment shader read position? */ boolean reads_z; /**< does fragment shader read depth? */ + boolean reads_samplemask; /**< does fragment shader read sample mask? */ boolean writes_z; /**< does fragment shader write Z value? */ boolean writes_stencil; /**< does fragment shader write stencil value? */ boolean writes_samplemask; /**< does fragment shader write sample mask? */ diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h index 66cf989a830..00f231dc683 100644 --- a/src/gallium/auxiliary/util/u_box.h +++ b/src/gallium/auxiliary/util/u_box.h @@ -195,4 +195,16 @@ u_box_minify_2d(struct pipe_box *dst, dst->height = MAX2(src->height >> l, 1); } +static inline void +u_box_minify_3d(struct pipe_box *dst, + const struct pipe_box *src, unsigned l) +{ + dst->x = src->x >> l; + dst->y = src->y >> l; + dst->z = src->z >> l; + dst->width = MAX2(src->width >> l, 1); + dst->height = MAX2(src->height >> l, 1); + dst->depth = MAX2(src->depth >> l, 1); +} + #endif diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index c719d3a77f0..a84de4fef7b 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -52,7 +52,7 @@ #include <machine/cpu.h> #endif -#if defined(PIPE_OS_FREEBSD) +#if defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_DRAGONFLY) #include <sys/types.h> #include <sys/sysctl.h> #endif diff --git a/src/gallium/auxiliary/util/u_format_parse.py b/src/gallium/auxiliary/util/u_format_parse.py index 929017a4486..d83603faa78 100755 --- a/src/gallium/auxiliary/util/u_format_parse.py +++ b/src/gallium/auxiliary/util/u_format_parse.py @@ -313,7 +313,7 @@ def _parse_channels(fields, layout, colorspace, swizzles): return channels def parse(filename): - '''Parse the format descrition in CSV format in terms of the + '''Parse the format description in CSV format in terms of the Channel and Format classes above.''' stream = open(filename) diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h index d28fae3c77d..966d213bdd5 100644 --- a/src/gallium/auxiliary/util/u_half.h +++ b/src/gallium/auxiliary/util/u_half.h @@ -74,7 +74,11 @@ util_float_to_half(float f) f32.ui &= round_mask; f32.f *= magic.f; f32.ui -= round_mask; - + /* + * XXX: The magic mul relies on denorms being available, otherwise + * all f16 denorms get flushed to zero - hence when this is used + * for tgsi_exec in softpipe we won't get f16 denorms. + */ /* * Clamp to max finite value if overflowed. * OpenGL has completely undefined rounding behavior for float to @@ -112,6 +116,7 @@ util_half_to_float(uint16_t f16) /* Adjust */ f32.f *= magic.f; + /* XXX: The magic mul relies on denorms being available */ /* Inf / NaN */ if (f32.f >= infnan.f) diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c index 1c6cdd4f2c9..5241471f516 100644 --- a/src/gallium/auxiliary/vl/vl_zscan.c +++ b/src/gallium/auxiliary/vl/vl_zscan.c @@ -49,6 +49,13 @@ enum VS_OUTPUT VS_O_VTEX = 0 }; +const int vl_zscan_normal_16[] = +{ + /* Zig-Zag scan pattern */ + 0, 1, 4, 8, 5, 2, 3, 6, + 9,12,13,10, 7,11,14,15 +}; + const int vl_zscan_linear[] = { /* Linear scan pattern */ diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h index eacee2db64f..268cf0a6e32 100644 --- a/src/gallium/auxiliary/vl/vl_zscan.h +++ b/src/gallium/auxiliary/vl/vl_zscan.h @@ -64,6 +64,7 @@ struct vl_zscan_buffer struct pipe_surface *dst; }; +extern const int vl_zscan_normal_16[]; extern const int vl_zscan_linear[]; extern const int vl_zscan_normal[]; extern const int vl_zscan_alternate[]; diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 4c03e00008c..904e1ff04e7 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -325,6 +325,11 @@ returned). Otherwise, if the ``wait`` parameter is FALSE, the call will not block and the return value will be TRUE if the query has completed or FALSE otherwise. +``get_query_result_resource`` is used to store the result of a query into +a resource without synchronizing with the CPU. This write will optionally +wait for the query to complete, and will optionally write whether the value +is available instead of the value itself. + The interface currently includes the following types of queries: ``PIPE_QUERY_OCCLUSION_COUNTER`` counts the number of fragments which diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index b461810644a..3324bcca6f4 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -138,6 +138,10 @@ The integer capabilities: * ``PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT``: Describes the required alignment for pipe_sampler_view::u.buf.first_element, in bytes. If a driver does not support first/last_element, it should return 0. +* ``PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY``: Whether the driver only + supports R, RG, RGB and RGBA formats for PIPE_BUFFER sampler views. + When this is the case it should be assumed that the swizzle parameters + in the sampler view have no effect. * ``PIPE_CAP_TGSI_TEXCOORD``: This CAP describes a hw limitation. If true, the hardware cannot replace arbitrary shader inputs with sprite coordinates and hence the inputs that are desired to be replaceable must @@ -164,7 +168,7 @@ The integer capabilities: view it is intended to be used with, or herein undefined results may occur for permutational swizzles. * ``PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE``: The maximum accessible size with - a buffer sampler view, in bytes. + a buffer sampler view, in texels. * ``PIPE_CAP_MAX_VIEWPORTS``: The maximum number of viewports (and scissors since they are linked) a driver can support. Returning 0 is equivalent to returning 1 because every driver has to support at least a single @@ -306,6 +310,15 @@ The integer capabilities: * ``PIPE_CAP_GENERATE_MIPMAP``: Indicates whether pipe_context::generate_mipmap is supported. * ``PIPE_CAP_STRING_MARKER``: Whether pipe->emit_string_marker() is supported. +* ``PIPE_CAP_SURFACE_REINTERPRET_BLOCKS``: Indicates whether + pipe_context::create_surface supports reinterpreting a texture as a surface + of a format with different block width/height (but same block size in bits). + For example, a compressed texture image can be interpreted as a + non-compressed surface whose texels are the same number of bits as the + compressed blocks, and vice versa. The width and height of the surface is + adjusted appropriately. +* ``PIPE_CAP_QUERY_BUFFER_OBJECT``: Driver supports + context::get_query_result_resource callback. .. _pipe_capf: diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 7810a3eb915..489cbb0bc2f 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -2372,6 +2372,23 @@ programs. the program. Results are unspecified if any of the remaining threads terminates or never reaches an executed BARRIER instruction. +.. opcode:: MEMBAR - Memory barrier + + ``MEMBAR type`` + + This opcode waits for the completion of all memory accesses based on + the type passed in. The type is an immediate bitfield with the following + meaning: + + Bit 0: Shader storage buffers + Bit 1: Atomic buffers + Bit 2: Images + Bit 3: Shared memory + Bit 4: Thread group + + These may be passed in in any combination. An implementation is free to not + distinguish between these as it sees fit. However these map to all the + possibilities made available by GLSL. .. _atomopcodes: diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index c5ea86f9368..c54bb1091f7 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -152,6 +152,9 @@ fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len) struct fd_ringbuffer *ring = ctx->ring; const uint32_t *buf = (const void *)string; + /* max packet size is 0x3fff dwords: */ + len = MIN2(len, 0x3fff * 4); + OUT_PKT3(ring, CP_NOP, align(len, 4) / 4); while (len >= 4) { OUT_RING(ring, *buf); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 640f50f5dcb..27f4d267438 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -165,6 +165,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_COMPUTE: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; case PIPE_CAP_SM3: @@ -183,6 +184,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CLIP_HALFZ: return is_a3xx(screen) || is_a4xx(screen); + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + return 0; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: if (is_a3xx(screen)) return 16; if (is_a4xx(screen)) return 32; @@ -248,6 +251,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: return 0; case PIPE_CAP_MAX_VIEWPORTS: @@ -296,6 +300,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) /* Queries. */ case PIPE_CAP_QUERY_TIME_ELAPSED: case PIPE_CAP_QUERY_TIMESTAMP: + case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; case PIPE_CAP_OCCLUSION_QUERY: return is_a3xx(screen) || is_a4xx(screen); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 1ea2dd9cbf7..6eb6a2d52ef 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -556,6 +556,10 @@ create_frag_coord(struct ir3_compile *ctx, unsigned comp) } } +/* NOTE: this creates the "TGSI" style fragface (ie. input slot + * VARYING_SLOT_FACE). For NIR style nir_intrinsic_load_front_face + * we can just use the value from hw directly (since it is boolean) + */ static struct ir3_instruction * create_frag_face(struct ir3_compile *ctx, unsigned comp) { @@ -1224,7 +1228,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) break; case nir_intrinsic_load_vertex_id_zero_base: if (!ctx->vertex_id) { - ctx->vertex_id = create_input(ctx->block, 0); + ctx->vertex_id = create_input(b, 0); add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, ctx->vertex_id); } @@ -1232,7 +1236,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) break; case nir_intrinsic_load_instance_id: if (!ctx->instance_id) { - ctx->instance_id = create_input(ctx->block, 0); + ctx->instance_id = create_input(b, 0); add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID, ctx->instance_id); } @@ -1244,6 +1248,14 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n); } break; + case nir_intrinsic_load_front_face: + if (!ctx->frag_face) { + ctx->so->frag_face = true; + ctx->frag_face = create_input(b, 0); + ctx->frag_face->regs[0]->flags |= IR3_REG_HALF; + } + dst[0] = ir3_ADD_S(b, ctx->frag_face, 0, create_immed(b, 1), 0); + break; case nir_intrinsic_discard_if: case nir_intrinsic_discard: { struct ir3_instruction *cond, *kill; @@ -1349,6 +1361,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) struct ir3_block *b = ctx->block; struct ir3_instruction **dst, *sam, *src0[12], *src1[4]; struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy; + struct ir3_instruction *const_off[4]; bool has_bias = false, has_lod = false, has_proj = false, has_off = false; unsigned i, coords, flags; unsigned nsrc0 = 0, nsrc1 = 0; @@ -1392,7 +1405,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) ddy = get_src(ctx, &tex->src[i].src); break; default: - compile_error(ctx, "Unhandled NIR tex serc type: %d\n", + compile_error(ctx, "Unhandled NIR tex src type: %d\n", tex->src[i].src_type); return; } @@ -1417,6 +1430,21 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) tex_info(tex, &flags, &coords); + if (!has_off) { + /* could still have a constant offset: */ + if (tex->const_offset[0] || tex->const_offset[1] || + tex->const_offset[2] || tex->const_offset[3]) { + off = const_off; + + off[0] = create_immed(b, tex->const_offset[0]); + off[1] = create_immed(b, tex->const_offset[1]); + off[2] = create_immed(b, tex->const_offset[2]); + off[3] = create_immed(b, tex->const_offset[3]); + + has_off = true; + } + } + /* scale up integer coords for TXF based on the LOD */ if (ctx->unminify_coords && (opc == OPC_ISAML)) { assert(has_lod); @@ -2053,6 +2081,9 @@ setup_output(struct ir3_compile *ctx, nir_variable *out) case VARYING_SLOT_CLIP_DIST0: case VARYING_SLOT_CLIP_DIST1: break; + case VARYING_SLOT_CLIP_VERTEX: + /* handled entirely in nir_lower_clip: */ + return; default: if (slot >= VARYING_SLOT_VAR0) break; @@ -2135,11 +2166,17 @@ emit_instructions(struct ir3_compile *ctx) setup_output(ctx, var); } - /* Setup variables (which should only be arrays): */ + /* Setup global variables (which should only be arrays): */ nir_foreach_variable(var, &ctx->s->globals) { declare_var(ctx, var); } + /* Setup local variables (which should only be arrays): */ + /* NOTE: need to do something more clever when we support >1 fxn */ + nir_foreach_variable(var, &fxn->locals) { + declare_var(ctx, var); + } + /* And emit the body: */ ctx->impl = fxn; emit_function(ctx, fxn); diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 6b0ab587001..8d010f9dc8c 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -262,6 +262,9 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 5171cca9ea6..44d7c11af43 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -428,6 +428,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_CUBE_MAP_ARRAY: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return true; + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + return 0; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: return 1; case PIPE_CAP_TGSI_TEXCOORD: @@ -486,6 +488,9 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index db45cbbb057..34008e1c01e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -308,17 +308,4 @@ void lp_debug_draw_bins_by_coverage( struct lp_scene *scene ); -#ifdef PIPE_ARCH_SSE -#include <emmintrin.h> -#include "util/u_sse.h" - -static inline __m128i -lp_plane_to_m128i(const struct lp_rast_plane *plane) -{ - return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx, - (int32_t)plane->dcdy, (int32_t)plane->eo); -} - -#endif - #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 0ae6ec28d35..f4a2f0268f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -239,7 +239,7 @@ sign_bits4(const __m128i *cstep, int cdiff) void lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) + const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; const struct lp_rast_plane *plane = GET_PLANES(tri); @@ -250,26 +250,29 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task, struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; unsigned nr = 0; - __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */ - __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */ - __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */ + /* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */ + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */ + __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]); + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); __m128i zero = _mm_setzero_si128(); - __m128i c; - __m128i dcdx; - __m128i dcdy; - __m128i rej4; - - __m128i dcdx2; - __m128i dcdx3; + __m128i c, dcdx, dcdy, rej4; + __m128i dcdx_neg_mask, dcdy_neg_mask; + __m128i dcdx2, dcdx3; __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ __m128i unused; - + transpose4_epi32(&p0, &p1, &p2, &zero, - &c, &dcdx, &dcdy, &rej4); + &c, &unused, &dcdx, &dcdy); + + /* recalc eo - easier than trying to load as scalars / shuffle... */ + dcdx_neg_mask = _mm_srai_epi32(dcdx, 31); + dcdy_neg_mask = _mm_srai_epi32(dcdy, 31); + rej4 = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), + _mm_and_si128(dcdx_neg_mask, dcdx)); /* Adjust dcdx; */ @@ -349,32 +352,29 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task, void lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task, - const union lp_rast_cmd_arg arg) + const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; const struct lp_rast_plane *plane = GET_PLANES(tri); unsigned x = (arg.triangle.plane_mask & 0xff) + task->x; unsigned y = (arg.triangle.plane_mask >> 8) + task->y; - __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */ - __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */ - __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */ + /* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */ + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */ + __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]); + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); __m128i zero = _mm_setzero_si128(); - __m128i c; - __m128i dcdx; - __m128i dcdy; + __m128i c, dcdx, dcdy; + __m128i dcdx2, dcdx3; - __m128i dcdx2; - __m128i dcdx3; - __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ __m128i unused; transpose4_epi32(&p0, &p1, &p2, &zero, - &c, &dcdx, &dcdy, &unused); + &c, &unused, &dcdx, &dcdy); /* Adjust dcdx; */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 879a2e7d2f0..2c66bf46332 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -311,6 +311,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 03bb8ce2b6f..5ab297d7e1a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -168,6 +168,21 @@ struct lp_setup_context const float (*v2)[4]); }; +static inline void +scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox, + struct u_rect *scissor) +{ + /* left */ + scis_planes[0] = (bbox->x0 < scissor->x0); + /* right */ + scis_planes[1] = (bbox->x1 > scissor->x1); + /* top */ + scis_planes[2] = (bbox->y0 < scissor->y0); + /* bottom */ + scis_planes[3] = (bbox->y1 > scissor->y1); +} + + void lp_setup_choose_triangle( struct lp_setup_context *setup ); void lp_setup_choose_line( struct lp_setup_context *setup ); void lp_setup_choose_point( struct lp_setup_context *setup ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index f425825fc2a..af4e7900d3c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -336,13 +336,6 @@ try_setup_line( struct lp_setup_context *setup, layer = MIN2(layer, scene->fb_max_layer); } - if (setup->scissor_test) { - nr_planes = 8; - } - else { - nr_planes = 4; - } - dx = v1[0][0] - v2[0][0]; dy = v1[0][1] - v2[0][1]; area = (dx * dx + dy * dy); @@ -591,6 +584,18 @@ try_setup_line( struct lp_setup_context *setup, bbox.x0 = MAX2(bbox.x0, 0); bbox.y0 = MAX2(bbox.y0, 0); + nr_planes = 4; + /* + * Determine how many scissor planes we need, that is drop scissor + * edges if the bounding box of the tri is fully inside that edge. + */ + if (setup->scissor_test) { + /* why not just use draw_regions */ + boolean s_planes[4]; + scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]); + nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; + } + line = lp_setup_alloc_triangle(scene, key->num_inputs, nr_planes, @@ -708,30 +713,46 @@ try_setup_line( struct lp_setup_context *setup, * Note that otherwise, the scissor planes only vary in 'C' value, * and even then only on state-changes. Could alternatively store * these planes elsewhere. + * (Or only store the c value together with a bit indicating which + * scissor edge this is, so rasterization would treat them differently + * (easier to evaluate) to ordinary planes.) */ - if (nr_planes == 8) { - const struct u_rect *scissor = - &setup->scissors[viewport_index]; - - plane[4].dcdx = -1 << 8; - plane[4].dcdy = 0; - plane[4].c = (1-scissor->x0) << 8; - plane[4].eo = 1 << 8; - - plane[5].dcdx = 1 << 8; - plane[5].dcdy = 0; - plane[5].c = (scissor->x1+1) << 8; - plane[5].eo = 0; - - plane[6].dcdx = 0; - plane[6].dcdy = 1 << 8; - plane[6].c = (1-scissor->y0) << 8; - plane[6].eo = 1 << 8; - - plane[7].dcdx = 0; - plane[7].dcdy = -1 << 8; - plane[7].c = (scissor->y1+1) << 8; - plane[7].eo = 0; + if (nr_planes > 4) { + /* why not just use draw_regions */ + struct u_rect *scissor = &setup->scissors[viewport_index]; + struct lp_rast_plane *plane_s = &plane[4]; + boolean s_planes[4]; + scissor_planes_needed(s_planes, &bbox, scissor); + + if (s_planes[0]) { + plane_s->dcdx = -1 << 8; + plane_s->dcdy = 0; + plane_s->c = (1-scissor->x0) << 8; + plane_s->eo = 1 << 8; + plane_s++; + } + if (s_planes[1]) { + plane_s->dcdx = 1 << 8; + plane_s->dcdy = 0; + plane_s->c = (scissor->x1+1) << 8; + plane_s->eo = 0 << 8; + plane_s++; + } + if (s_planes[2]) { + plane_s->dcdx = 0; + plane_s->dcdy = 1 << 8; + plane_s->c = (1-scissor->y0) << 8; + plane_s->eo = 1 << 8; + plane_s++; + } + if (s_planes[3]) { + plane_s->dcdx = 0; + plane_s->dcdy = -1 << 8; + plane_s->c = (scissor->y1+1) << 8; + plane_s->eo = 0; + plane_s++; + } + assert(plane_s == &plane[nr_planes]); } return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 907129dbd1b..cdb3d015dec 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -302,13 +302,6 @@ do_triangle_ccw(struct lp_setup_context *setup, layer = MIN2(layer, scene->fb_max_layer); } - if (setup->scissor_test) { - nr_planes = 7; - } - else { - nr_planes = 3; - } - /* Bounding rectangle (in pixels) */ { /* Yes this is necessary to accurately calculate bounding boxes @@ -347,6 +340,18 @@ do_triangle_ccw(struct lp_setup_context *setup, bbox.x0 = MAX2(bbox.x0, 0); bbox.y0 = MAX2(bbox.y0, 0); + nr_planes = 3; + /* + * Determine how many scissor planes we need, that is drop scissor + * edges if the bounding box of the tri is fully inside that edge. + */ + if (setup->scissor_test) { + /* why not just use draw_regions */ + boolean s_planes[4]; + scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]); + nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; + } + tri = lp_setup_alloc_triangle(scene, key->num_inputs, nr_planes, @@ -367,13 +372,11 @@ do_triangle_ccw(struct lp_setup_context *setup, /* Setup parameter interpolants: */ - setup->setup.variant->jit_function( v0, - v1, - v2, - frontfacing, - GET_A0(&tri->inputs), - GET_DADX(&tri->inputs), - GET_DADY(&tri->inputs) ); + setup->setup.variant->jit_function(v0, v1, v2, + frontfacing, + GET_A0(&tri->inputs), + GET_DADX(&tri->inputs), + GET_DADY(&tri->inputs)); tri->inputs.frontfacing = frontfacing; tri->inputs.disable = FALSE; @@ -383,9 +386,9 @@ do_triangle_ccw(struct lp_setup_context *setup, if (0) lp_dump_setup_coef(&setup->setup.variant->key, - (const float (*)[4])GET_A0(&tri->inputs), - (const float (*)[4])GET_DADX(&tri->inputs), - (const float (*)[4])GET_DADY(&tri->inputs)); + (const float (*)[4])GET_A0(&tri->inputs), + (const float (*)[4])GET_DADX(&tri->inputs), + (const float (*)[4])GET_DADY(&tri->inputs)); plane = GET_PLANES(tri); @@ -672,29 +675,46 @@ do_triangle_ccw(struct lp_setup_context *setup, * Note that otherwise, the scissor planes only vary in 'C' value, * and even then only on state-changes. Could alternatively store * these planes elsewhere. + * (Or only store the c value together with a bit indicating which + * scissor edge this is, so rasterization would treat them differently + * (easier to evaluate) to ordinary planes.) */ - if (nr_planes == 7) { - const struct u_rect *scissor = &setup->scissors[viewport_index]; - - plane[3].dcdx = -1 << 8; - plane[3].dcdy = 0; - plane[3].c = (1-scissor->x0) << 8; - plane[3].eo = 1 << 8; - - plane[4].dcdx = 1 << 8; - plane[4].dcdy = 0; - plane[4].c = (scissor->x1+1) << 8; - plane[4].eo = 0; - - plane[5].dcdx = 0; - plane[5].dcdy = 1 << 8; - plane[5].c = (1-scissor->y0) << 8; - plane[5].eo = 1 << 8; - - plane[6].dcdx = 0; - plane[6].dcdy = -1 << 8; - plane[6].c = (scissor->y1+1) << 8; - plane[6].eo = 0; + if (nr_planes > 3) { + /* why not just use draw_regions */ + struct u_rect *scissor = &setup->scissors[viewport_index]; + struct lp_rast_plane *plane_s = &plane[3]; + boolean s_planes[4]; + scissor_planes_needed(s_planes, &bbox, scissor); + + if (s_planes[0]) { + plane_s->dcdx = -1 << 8; + plane_s->dcdy = 0; + plane_s->c = (1-scissor->x0) << 8; + plane_s->eo = 1 << 8; + plane_s++; + } + if (s_planes[1]) { + plane_s->dcdx = 1 << 8; + plane_s->dcdy = 0; + plane_s->c = (scissor->x1+1) << 8; + plane_s->eo = 0 << 8; + plane_s++; + } + if (s_planes[2]) { + plane_s->dcdx = 0; + plane_s->dcdy = 1 << 8; + plane_s->c = (1-scissor->y0) << 8; + plane_s->eo = 1 << 8; + plane_s++; + } + if (s_planes[3]) { + plane_s->dcdx = 0; + plane_s->dcdy = -1 << 8; + plane_s->c = (scissor->y1+1) << 8; + plane_s->eo = 0; + plane_s++; + } + assert(plane_s == &plane[nr_planes]); } return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index); @@ -984,17 +1004,16 @@ calc_fixed_position(struct lp_setup_context *setup, * Both should be acceptable, I think. */ #if defined(PIPE_ARCH_SSE) - __m128d v0r, v1r, v2r; + __m128 v0r, v1r; __m128 vxy0xy2, vxy1xy0; __m128i vxy0xy2i, vxy1xy0i; __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120; __m128 pix_offset = _mm_set1_ps(setup->pixel_offset); __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE); - v0r = _mm_load_sd((const double *)v0[0]); - v1r = _mm_load_sd((const double *)v1[0]); - v2r = _mm_load_sd((const double *)v2[0]); - vxy0xy2 = _mm_castpd_ps(_mm_unpacklo_pd(v0r, v2r)); - vxy1xy0 = _mm_castpd_ps(_mm_unpacklo_pd(v1r, v0r)); + v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0])); + vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]); + v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0])); + vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2); vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset); vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset); vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 6ad9dd31681..75e5fd843c2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -393,6 +393,9 @@ ImmediateValue::isInteger(const int i) const case TYPE_S32: case TYPE_U32: return reg.data.s32 == i; // as if ... + case TYPE_S64: + case TYPE_U64: + return reg.data.s64 == i; // as if ... case TYPE_F32: return reg.data.f32 == static_cast<float>(i); case TYPE_F64: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index d1fdd75495f..9d7becf27d4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -132,6 +132,7 @@ enum operation OP_SUBFM, // surface bitfield manipulation OP_SUCLAMP, // clamp surface coordinates OP_SUEAU, // surface effective address + OP_SUQ, // surface query OP_MADSP, // special integer multiply-add OP_TEXBAR, // texture dependency barrier OP_DFDX, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 17cb484d2ba..0c7cd1d8137 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1947,10 +1947,16 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) case OP_CEIL: case OP_FLOOR: case OP_TRUNC: - case OP_CVT: case OP_SAT: emitCVT(insn); break; + case OP_CVT: + if (insn->def(0).getFile() == FILE_PREDICATE || + insn->src(0).getFile() == FILE_PREDICATE) + emitMOV(insn); + else + emitCVT(insn); + break; case OP_RSQ: emitSFnOp(insn, 5 + 2 * insn->subOp); break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 1fa0eb6da6d..dee26225b7e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -673,7 +673,12 @@ CodeEmitterGM107::emitMOV() (insn->sType != TYPE_F32 && !longIMMD(insn->src(0)))) { switch (insn->src(0).getFile()) { case FILE_GPR: - emitInsn(0x5c980000); + if (insn->def(0).getFile() == FILE_PREDICATE) { + emitInsn(0x5b6a0000); + emitGPR (0x08); + } else { + emitInsn(0x5c980000); + } emitGPR (0x14, insn->src(0)); break; case FILE_MEMORY_CONST: @@ -684,18 +689,32 @@ CodeEmitterGM107::emitMOV() emitInsn(0x38980000); emitIMMD(0x14, 19, insn->src(0)); break; + case FILE_PREDICATE: + emitInsn(0x50880000); + emitPRED(0x0c, insn->src(0)); + emitPRED(0x1d); + emitPRED(0x27); + break; default: assert(!"bad src file"); break; } - emitField(0x27, 4, insn->lanes); + if (insn->def(0).getFile() != FILE_PREDICATE && + insn->src(0).getFile() != FILE_PREDICATE) + emitField(0x27, 4, insn->lanes); } else { emitInsn (0x01000000); emitIMMD (0x14, 32, insn->src(0)); emitField(0x0c, 4, insn->lanes); } - emitGPR(0x00, insn->def(0)); + if (insn->def(0).getFile() == FILE_PREDICATE) { + emitPRED(0x27); + emitPRED(0x03, insn->def(0)); + emitPRED(0x00); + } else { + emitGPR(0x00, insn->def(0)); + } } void @@ -2684,11 +2703,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i) emitRAM(); break; case OP_MOV: - if (insn->def(0).getFile() == FILE_GPR && - insn->src(0).getFile() != FILE_PREDICATE) - emitMOV(); - else - assert(!"R2P/P2R"); + emitMOV(); break; case OP_RDSV: emitS2R(); @@ -2700,7 +2715,10 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case OP_CEIL: case OP_TRUNC: case OP_CVT: - if (isFloatType(insn->dType)) { + if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE || + insn->src(0).getFile() == FILE_PREDICATE)) { + emitMOV(); + } else if (isFloatType(insn->dType)) { if (isFloatType(insn->sType)) emitF2F(); else diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 0b28047e22b..8637db91521 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -2021,8 +2021,10 @@ CodeEmitterNVC0::emitATOM(const Instruction *i) code[0] |= 63 << 20; } - if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) - srcId(i->src(2), 32 + 17); + if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) { + assert(i->src(1).getSize() == 2 * typeSizeof(i->sType)); + code[1] |= (SDATA(i->src(1)).id + 1) << 17; + } } void @@ -2433,10 +2435,16 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) case OP_CEIL: case OP_FLOOR: case OP_TRUNC: - case OP_CVT: case OP_SAT: emitCVT(insn); break; + case OP_CVT: + if (insn->def(0).getFile() == FILE_PREDICATE || + insn->src(0).getFile() == FILE_PREDICATE) + emitMOV(insn); + else + emitCVT(insn); + break; case OP_RSQ: emitSFnOp(insn, 5 + 2 * insn->subOp); break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 9c4a38f291b..52ac198221d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -38,6 +38,7 @@ static nv50_ir::operation translateOpcode(uint opcode); static nv50_ir::DataFile translateFile(uint file); static nv50_ir::TexTarget translateTexture(uint texTarg); static nv50_ir::SVSemantic translateSysVal(uint sysval); +static nv50_ir::CacheMode translateCacheMode(uint qualifier); class Instruction { @@ -213,6 +214,12 @@ public: nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const; + nv50_ir::CacheMode getCacheMode() const { + if (!insn->Instruction.Memory) + return nv50_ir::CACHE_CA; + return translateCacheMode(insn->Memory.Qualifier); + } + inline uint getLabel() { return insn->Label.Label; } unsigned getSaturate() const { return insn->Instruction.Saturate; } @@ -366,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file) case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; - //case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL; + case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; case TGSI_FILE_SAMPLER: case TGSI_FILE_NULL: default: @@ -436,6 +443,15 @@ static nv50_ir::TexTarget translateTexture(uint tex) } } +static nv50_ir::CacheMode translateCacheMode(uint qualifier) +{ + if (qualifier & TGSI_MEMORY_VOLATILE) + return nv50_ir::CACHE_CV; + if (qualifier & TGSI_MEMORY_COHERENT) + return nv50_ir::CACHE_CG; + return nv50_ir::CACHE_CA; +} + nv50_ir::DataType Instruction::inferSrcType() const { switch (getOpcode()) { @@ -1210,6 +1226,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) case TGSI_FILE_IMMEDIATE: case TGSI_FILE_PREDICATE: case TGSI_FILE_SAMPLER: + case TGSI_FILE_BUFFER: break; default: ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File); @@ -1255,6 +1272,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) { if (insn.getDst(0).isIndirect(0)) indirectTempArrays.insert(insn.getDst(0).getArrayId()); + } else + if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) { + info->io.globalAccess |= 0x2; } } @@ -1264,13 +1284,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (src.isIndirect(0)) indirectTempArrays.insert(src.getArrayId()); } else -/* - if (src.getFile() == TGSI_FILE_RESOURCE) { - if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL) - info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? + if (src.getFile() == TGSI_FILE_BUFFER) { + info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? 0x1 : 0x2; } else -*/ if (src.getFile() == TGSI_FILE_OUTPUT) { if (src.isIndirect(0)) { // We don't know which one is accessed, just mark everything for @@ -1752,7 +1769,7 @@ Converter::acquireDst(int d, int c) int idx = dst.getIndex(0); int idx2d = dst.is2D() ? dst.getIndex(1) : 0; - if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/) + if (dst.isMasked(c) || f == TGSI_FILE_BUFFER) return NULL; if (dst.isIndirect(0) || @@ -2222,6 +2239,28 @@ Converter::handleLOAD(Value *dst0[4]) int c; std::vector<Value *> off, src, ldv, def; + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + for (c = 0; c < 4; ++c) { + if (!dst0[c]) + continue; + + Value *off = fetchSrc(1, c); + Symbol *sym; + if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) { + off = NULL; + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c); + } else { + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); + } + + Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off); + ld->cache = tgsi.getCacheMode(); + if (tgsi.getSrc(0).isIndirect(0)) + ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); + } + return; + } + getResourceCoords(off, r, 1); if (isResourceRaw(code, r)) { @@ -2298,6 +2337,30 @@ Converter::handleSTORE() int c; std::vector<Value *> off, src, dummy; + if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) { + for (c = 0; c < 4; ++c) { + if (!(tgsi.getDst(0).getMask() & (1 << c))) + continue; + + Symbol *sym; + Value *off; + if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) { + off = NULL; + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, + tgsi.getSrc(0).getValueU32(0, info) + 4 * c); + } else { + off = fetchSrc(0, 0); + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); + } + + Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c)); + st->cache = tgsi.getCacheMode(); + if (tgsi.getDst(0).isIndirect(0)) + st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0)); + } + return; + } + getResourceCoords(off, r, 0); src = off; const int s = src.size(); @@ -2359,6 +2422,37 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) std::vector<Value *> defv; LValue *dst = getScratch(); + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + for (int c = 0; c < 4; ++c) { + if (!dst0[c]) + continue; + + Instruction *insn; + Value *off = fetchSrc(1, c), *off2 = NULL; + Value *sym; + if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info)); + else + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0); + if (tgsi.getSrc(0).isIndirect(0)) + off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); + if (subOp == NV50_IR_SUBOP_ATOM_CAS) + insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c)); + else + insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c)); + if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE) + insn->setIndirect(0, 0, off); + if (off2) + insn->setIndirect(0, 1, off2); + insn->subOp = subOp; + } + for (int c = 0; c < 4; ++c) + if (dst0[c]) + dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov + return; + } + + getResourceCoords(srcv, r, 1); if (isResourceSpecial(r)) { @@ -3103,6 +3197,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) geni->fixed = 1; geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); break; + case TGSI_OPCODE_MEMBAR: + geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); + geni->fixed = 1; + if (tgsi.getSrc(0).getValueU32(0, info) & TGSI_MEMBAR_THREAD_GROUP) + geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA); + else + geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL); + break; case TGSI_OPCODE_ATOMUADD: case TGSI_OPCODE_ATOMXCHG: case TGSI_OPCODE_ATOMCAS: @@ -3115,6 +3217,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_ATOMIMAX: handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); break; + case TGSI_OPCODE_RESQ: + geni = mkOp1(OP_SUQ, TYPE_U32, dst0[0], + makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0)); + if (tgsi.getSrc(0).isIndirect(0)) + geni->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); + break; case TGSI_OPCODE_IBFE: case TGSI_OPCODE_UBFE: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index dc1ab769b98..e7cb54bc426 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1022,11 +1022,22 @@ NVC0LoweringPass::handleTXLQ(TexInstruction *i) return true; } +bool +NVC0LoweringPass::handleSUQ(Instruction *suq) +{ + suq->op = OP_MOV; + suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1), + suq->getSrc(0)->reg.fileIndex * 16)); + suq->setIndirect(0, 0, NULL); + suq->setIndirect(0, 1, NULL); + return true; +} bool NVC0LoweringPass::handleATOM(Instruction *atom) { SVSemantic sv; + Value *ptr = atom->getIndirect(0, 0), *ind = atom->getIndirect(0, 1), *base; switch (atom->src(0).getFile()) { case FILE_MEMORY_LOCAL: @@ -1037,16 +1048,22 @@ NVC0LoweringPass::handleATOM(Instruction *atom) break; default: assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); + base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); + assert(base->reg.size == 8); + if (ptr) + base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); + assert(base->reg.size == 8); + atom->setIndirect(0, 0, base); return true; } - Value *base = + base = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0)); - Value *ptr = atom->getIndirect(0, 0); atom->setSrc(0, cloneShallow(func, atom->getSrc(0))); atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; if (ptr) base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr); + atom->setIndirect(0, 1, NULL); atom->setIndirect(0, 0, base); return true; @@ -1069,7 +1086,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) cctl->setPredicate(cas->cc, cas->getPredicate()); } - if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { + if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { // CAS is crazy. It's 2nd source is a double reg, and the 3rd source // should be set to the high part of the double reg or bad things will // happen elsewhere in the universe. @@ -1079,6 +1096,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) bld.setPosition(cas, false); bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2)); cas->setSrc(1, dreg); + cas->setSrc(2, dreg); } return true; @@ -1094,6 +1112,32 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off) } inline Value * +NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off) +{ + uint8_t b = prog->driver->io.resInfoCBSlot; + off += prog->driver->io.suInfoBase; + + if (ptr) + ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4)); + + return bld. + mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr); +} + +inline Value * +NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off) +{ + uint8_t b = prog->driver->io.resInfoCBSlot; + off += prog->driver->io.suInfoBase; + + if (ptr) + ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4)); + + return bld. + mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr); +} + +inline Value * NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) { uint8_t b = prog->driver->io.msInfoCBSlot; @@ -1786,6 +1830,7 @@ NVC0LoweringPass::visit(Instruction *i) return handleRDSV(i); case OP_WRSV: return handleWRSV(i); + case OP_STORE: case OP_LOAD: if (i->src(0).getFile() == FILE_SHADER_INPUT) { if (prog->getType() == Program::TYPE_COMPUTE) { @@ -1820,6 +1865,26 @@ NVC0LoweringPass::visit(Instruction *i) } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) { assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); i->op = OP_VFETCH; + } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { + Value *ind = i->getIndirect(0, 1); + Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16); + // XXX come up with a way not to do this for EVERY little access but + // rather to batch these up somehow. Unfortunately we've lost the + // information about the field width by the time we get here. + Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType)); + Value *length = loadResLength32(ind, i->getSrc(0)->reg.fileIndex * 16); + Value *pred = new_LValue(func, FILE_PREDICATE); + if (i->src(0).isIndirect(0)) { + bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0)); + bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0)); + } + i->setIndirect(0, 1, NULL); + i->setIndirect(0, 0, ptr); + bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); + i->setPredicate(CC_NOT_P, pred); + if (i->defExists(0)) { + bld.mkMov(i->getDef(0), bld.mkImm(0)); + } } break; case OP_ATOM: @@ -1838,6 +1903,9 @@ NVC0LoweringPass::visit(Instruction *i) if (targ->getChipset() >= NVISA_GK104_CHIPSET) handleSurfaceOpNVE4(i->asTex()); break; + case OP_SUQ: + handleSUQ(i); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index adb400a559a..09ec7e69ddc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -101,6 +101,7 @@ protected: bool handleTXQ(TexInstruction *); virtual bool handleManualTXD(TexInstruction *); bool handleTXLQ(TexInstruction *); + bool handleSUQ(Instruction *); bool handleATOM(Instruction *); bool handleCasExch(Instruction *, bool needCctl); void handleSurfaceOpNVE4(TexInstruction *); @@ -116,6 +117,8 @@ private: void readTessCoord(LValue *dst, int c); Value *loadResInfo32(Value *ptr, uint32_t off); + Value *loadResInfo64(Value *ptr, uint32_t off); + Value *loadResLength32(Value *ptr, uint32_t off); Value *loadMsInfo32(Value *ptr, uint32_t off); Value *loadTexHandle(Value *ptr, unsigned int slot); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 95e9fdfc57d..05b8db4a3d8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -336,6 +336,7 @@ private: void expr(Instruction *, ImmediateValue&, ImmediateValue&); void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&); void opnd(Instruction *, ImmediateValue&, int s); + void opnd3(Instruction *, ImmediateValue&); void unary(Instruction *, const ImmediateValue&); @@ -388,6 +389,8 @@ ConstantFolding::visit(BasicBlock *bb) else if (i->srcExists(1) && i->src(1).getImmediate(src1)) opnd(i, src1, 1); + if (i->srcExists(2) && i->src(2).getImmediate(src2)) + opnd3(i, src2); } return true; } @@ -873,6 +876,24 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, } void +ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2) +{ + switch (i->op) { + case OP_MAD: + case OP_FMA: + if (imm2.isInteger(0)) { + i->op = OP_MUL; + i->setSrc(2, NULL); + foldCount++; + return; + } + break; + default: + return; + } +} + +void ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) { const int t = !s; @@ -1202,6 +1223,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32)); } break; + case OP_SHR: + if (si->src(1).getImmediate(imm1) && imm0.reg.data.u32 == imm1.reg.data.u32) { + bld.setPosition(i, false); + i->op = OP_AND; + i->setSrc(0, si->getSrc(0)); + i->setSrc(1, bld.loadImm(NULL, ~((1 << imm0.reg.data.u32) - 1))); + } + break; case OP_MUL: int muls; if (isFloatType(si->dType)) @@ -2504,6 +2533,12 @@ MemoryOpt::runOpt(BasicBlock *bb) } } else if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) { + if (typeSizeof(ldst->dType) == 4 && + ldst->src(1).getFile() == FILE_GPR && + ldst->getSrc(1)->getInsn()->op == OP_NOP) { + delete_Instruction(prog, ldst); + continue; + } isLoad = false; } else { // TODO: maybe have all fixed ops act as barrier ? @@ -3015,7 +3050,7 @@ Instruction::isResultEqual(const Instruction *that) const if (that->srcExists(s)) return false; - if (op == OP_LOAD || op == OP_VFETCH) { + if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) { switch (src(0).getFile()) { case FILE_MEMORY_CONST: case FILE_SHADER_INPUT: @@ -3046,6 +3081,8 @@ GlobalCSE::visit(BasicBlock *bb) ik = phi->getSrc(0)->getInsn(); if (!ik) continue; // probably a function input + if (ik->defCount(0xff) > 1) + continue; // too painful to check if we can really push this forward for (s = 1; phi->srcExists(s); ++s) { if (phi->getSrc(s)->refCount() > 1) break; @@ -3179,10 +3216,10 @@ DeadCodeElim::buryAll(Program *prog) bool DeadCodeElim::visit(BasicBlock *bb) { - Instruction *next; + Instruction *prev; - for (Instruction *i = bb->getFirst(); i; i = next) { - next = i->next; + for (Instruction *i = bb->getExit(); i; i = prev) { + prev = i->prev; if (i->isDead()) { ++deadCount; delete_Instruction(prog, i); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 0b02599dbdd..47285a25c33 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -161,6 +161,7 @@ const char *operationStr[OP_LAST + 1] = "subfm", "suclamp", "sueau", + "suq", "madsp", "texbar", "dfdx", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index cd8c42ced5e..de39be872e4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -1544,6 +1544,9 @@ GCRA::cleanup(const bool success) delete[] nodes; nodes = NULL; + hi.next = hi.prev = &hi; + lo[0].next = lo[0].prev = &lo[0]; + lo[1].next = lo[1].prev = &lo[1]; } Symbol * diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 4390a726d1c..ae0a8bb61d1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -46,7 +46,7 @@ const uint8_t Target::operationSrcNr[] = 1, 1, 1, // TEX, TXB, TXL, 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA - 3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP + 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP 0, // TEXBAR 1, 1, // DFDX, DFDY 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP @@ -109,8 +109,8 @@ const OpClass Target::operationClass[] = // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE, - // SUBFM, SUCLAMP, SUEAU, MADSP - OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH, + // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH, // TEXBAR OPCLASS_OTHER, // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c index a3d07deeb18..c6c287bb8bb 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c @@ -266,7 +266,9 @@ nouveau_vp3_bsp_next(struct nouveau_vp3_decoder *dec, unsigned num_buffers, int i; for (i = 0; i < num_buffers; ++i) { +#ifndef NDEBUG assert(bsp_bo->size >= str_bsp->w0[0] + num_bytes[i]); +#endif memcpy(dec->bsp_ptr, data[i], num_bytes[i]); dec->bsp_ptr += num_bytes[i]; str_bsp->w0[0] += num_bytes[i]; diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 61d91fd4cce..b62889119c5 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -184,6 +184,10 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 888d62e1c52..a67ef28abf8 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -369,7 +369,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, NOUVEAU_ERR("shader translation failed: %i\n", ret); goto out; } - FREE(info->bin.syms); prog->code = info->bin.code; prog->code_size = info->bin.codeSize; @@ -403,10 +402,13 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, break; } prog->gp.vert_count = info->prop.gp.maxVertices; - } else + } + if (prog->type == PIPE_SHADER_COMPUTE) { prog->cp.syms = info->bin.syms; prog->cp.num_syms = info->bin.numSyms; + } else { + FREE(info->bin.syms); } if (prog->pipe.stream_output.num_outputs) @@ -507,6 +509,9 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) FREE(p->interps); FREE(p->so); + if (type == PIPE_SHADER_COMPUTE) + FREE(p->cp.syms); + memset(p, 0, sizeof(*p)); p->pipe = pipe; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 32da60e0a23..14d0085975b 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -227,6 +227,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 86be1b4c4ed..ec5cf376227 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -595,6 +595,82 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, } static void +nv50_clear_buffer_push(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv04_resource *buf = nv04_resource(res); + unsigned count = (size + 3) / 4; + unsigned xcoord = offset & 0xff; + unsigned tmp, i; + + if (data_size == 1) { + tmp = *(unsigned char *)data; + tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp; + data = &tmp; + data_size = 4; + } else if (data_size == 2) { + tmp = *(unsigned short *)data; + tmp = (tmp << 16) | tmp; + data = &tmp; + data_size = 4; + } + + unsigned data_words = data_size / 4; + + nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nv50->bufctx); + nouveau_pushbuf_validate(push); + + offset &= ~0xff; + + BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); + PUSH_DATA (push, 262144); + PUSH_DATA (push, 65536); + PUSH_DATA (push, 1); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); + PUSH_DATA (push, size); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, xcoord); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + while (count) { + unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words; + unsigned nr = nr_data * data_words; + + BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr); + for (i = 0; i < nr_data; i++) + PUSH_DATAp(push, data, data_words); + + count -= nr; + } + + if (buf->mm) { + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); + } + + nouveau_bufctx_reset(nv50->bufctx, 0); +} + +static void nv50_clear_buffer(struct pipe_context *pipe, struct pipe_resource *res, unsigned offset, unsigned size, @@ -643,9 +719,22 @@ nv50_clear_buffer(struct pipe_context *pipe, assert(size % data_size == 0); + if (offset & 0xff) { + unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset); + assert(fixup_size % data_size == 0); + nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size); + offset += fixup_size; + size -= fixup_size; + if (!size) + return; + } + elements = size / data_size; height = (elements + 8191) / 8192; width = elements / height; + if (height > 1) + width &= ~0xff; + assert(width > 0); BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4); PUSH_DATAf(push, color.f[0]); @@ -669,13 +758,13 @@ nv50_clear_buffer(struct pipe_context *pipe, BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5); - PUSH_DATAh(push, buf->bo->offset + buf->offset + offset); - PUSH_DATA (push, buf->bo->offset + buf->offset + offset); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); PUSH_DATA (push, nv50_format_table[dst_fmt].rt); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); - PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size)); + PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | align(width * data_size, 0x100)); PUSH_DATA (push, height); BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); PUSH_DATA (push, 0); @@ -694,26 +783,21 @@ nv50_clear_buffer(struct pipe_context *pipe, BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); PUSH_DATA (push, 0x3c); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + + if (buf->mm) { + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); + } + if (width * height != elements) { offset += width * height * data_size; width = elements - width * height; - height = 1; - BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2); - PUSH_DATAh(push, buf->bo->offset + buf->offset + offset); - PUSH_DATA (push, buf->bo->offset + buf->offset + offset); - BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); - PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size)); - PUSH_DATA (push, height); - BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); - PUSH_DATA (push, 0x3c); + nv50_clear_buffer_push(pipe, res, offset, width * data_size, + data, data_size); } - BEGIN_NV04(push, NV50_3D(COND_MODE), 1); - PUSH_DATA (push, nv50->cond_condmode); - - nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); - nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); - nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; } diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme index 4daa57d47bb..7f76ec66edb 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme @@ -491,3 +491,52 @@ daic_runout: daic_runout_check: branz annul $r7 #daic_runout bra annul #daic_restore + +/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE: + * + * This is a combination macro for all of our query buffer object needs. + * It has the option to clamp results to a configurable amount, as well as + * to write out one or two words. + * + * We use the query engine to write out the values, and expect the query + * address to point to the right place. + * + * arg = clamp value (0 means unclamped). clamped means just 1 written value. + * parm[0] = LSB of end value + * parm[1] = MSB of end value + * parm[2] = LSB of start value + * parm[3] = MSB of start value + * parm[4] = desired sequence + * parm[5] = actual sequence + */ +.section #mme9097_query_buffer_write + parm $r2 + parm $r3 + parm $r4 + parm $r5 maddr 0x16c2 /* QUERY_SEQUENCE */ + parm $r6 + parm $r7 + mov $r6 (sub $r7 $r6) /* actual - desired */ + mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */ + braz annul $r6 #qbw_ready + exit +qbw_ready: + mov $r2 (sub $r2 $r4) + braz $r1 #qbw_postclamp + mov $r3 (sbb $r3 $r5) + branz annul $r3 #qbw_clamp + mov $r4 (sub $r1 $r2) + mov $r4 (sbb 0x0 0x0) + braz annul $r4 #qbw_postclamp +qbw_clamp: + mov $r2 $r1 +qbw_postclamp: + send $r2 + mov $r4 0x1000 + branz annul $r1 #qbw_done + send (extrinsrt 0x0 $r4 0x0 0x10 0x10) + maddr 0x16c2 /* QUERY_SEQUENCE */ + send $r3 +qbw_done: + exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10) + nop diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h index bf8625e0584..ecadf7e4d29 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h @@ -332,3 +332,36 @@ uint32_t mme9097_draw_arrays_indirect_count[] = { 0xfffef837, 0xfffdc027, }; + +uint32_t mme9097_query_buffer_write[] = { + 0x00000201, + 0x00000301, +/* 0x000a: qbw_ready */ + 0x00000401, + 0x05b08551, +/* 0x0011: qbw_clamp */ +/* 0x0012: qbw_postclamp */ + 0x00000601, + 0x00000701, +/* 0x0018: qbw_done */ + 0x0005be10, + 0x00060610, + 0x0000b027, + 0x00000091, + 0x00051210, + 0x0001c807, + 0x00075b10, + 0x00011837, + 0x00048c10, + 0x00060410, + 0x0000a027, + 0x00000a11, + 0x00001041, + 0x04000411, + 0x00010837, + 0x84010042, + 0x05b08021, + 0x00001841, + 0x840100c2, + 0x00000011, +}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 162661ff2a7..547b8f5d309 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -56,6 +56,7 @@ static void nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags) { struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; int i, s; if (flags & PIPE_BARRIER_MAPPED_BUFFER) { @@ -90,6 +91,9 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags) } } } + if (flags & PIPE_BARRIER_SHADER_BUFFER) { + IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011); + } } static void @@ -122,6 +126,10 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0) pipe_surface_reference(&nvc0->surfaces[s][i], NULL); } + for (s = 0; s < 6; ++s) + for (i = 0; i < NVC0_MAX_BUFFERS; ++i) + pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL); + for (i = 0; i < nvc0->num_tfbbufs; ++i) pipe_so_target_reference(&nvc0->tfbbuf[i], NULL); @@ -180,10 +188,9 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, int ref) { struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe); - unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER; unsigned s, i; - if (bind & PIPE_BIND_RENDER_TARGET) { + if (res->bind & PIPE_BIND_RENDER_TARGET) { for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) { if (nvc0->framebuffer.cbufs[i] && nvc0->framebuffer.cbufs[i]->texture == res) { @@ -194,7 +201,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } } - if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (res->bind & PIPE_BIND_DEPTH_STENCIL) { if (nvc0->framebuffer.zsbuf && nvc0->framebuffer.zsbuf->texture == res) { nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; @@ -204,12 +211,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } - if (bind & (PIPE_BIND_VERTEX_BUFFER | - PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_CONSTANT_BUFFER | - PIPE_BIND_STREAM_OUTPUT | - PIPE_BIND_COMMAND_ARGS_BUFFER | - PIPE_BIND_SAMPLER_VIEW)) { + if (res->target == PIPE_BUFFER) { for (i = 0; i < nvc0->num_vtxbufs; ++i) { if (nvc0->vtxbuf[i].buffer == res) { nvc0->dirty |= NVC0_NEW_ARRAYS; @@ -253,6 +255,18 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } } + + for (s = 0; s < 5; ++s) { + for (i = 0; i < NVC0_MAX_BUFFERS; ++i) { + if (nvc0->buffers[s][i].buffer == res) { + nvc0->buffers_dirty[s] |= 1 << i; + nvc0->dirty |= NVC0_NEW_BUFFERS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF); + if (!--ref) + return ref; + } + } + } } return ref; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 12195489691..4ab2ac41183 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -56,6 +56,7 @@ #define NVC0_NEW_SURFACES (1 << 23) #define NVC0_NEW_MIN_SAMPLES (1 << 24) #define NVC0_NEW_TESSFACTOR (1 << 25) +#define NVC0_NEW_BUFFERS (1 << 26) #define NVC0_NEW_CP_PROGRAM (1 << 0) #define NVC0_NEW_CP_SURFACES (1 << 1) @@ -73,9 +74,10 @@ #define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i)) #define NVC0_BIND_TFB 244 #define NVC0_BIND_SUF 245 -#define NVC0_BIND_SCREEN 246 -#define NVC0_BIND_TLS 247 -#define NVC0_BIND_3D_COUNT 248 +#define NVC0_BIND_BUF 246 +#define NVC0_BIND_SCREEN 247 +#define NVC0_BIND_TLS 249 +#define NVC0_BIND_3D_COUNT 250 /* compute bufctx (during launch_grid) */ #define NVC0_BIND_CP_CB(i) ( 0 + (i)) @@ -187,10 +189,15 @@ struct nvc0_context { struct nvc0_blitctx *blit; + /* NOTE: some of these surfaces may reference buffers */ struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS]; uint16_t surfaces_dirty[2]; uint16_t surfaces_valid[2]; + struct pipe_shader_buffer buffers[6][NVC0_MAX_BUFFERS]; + uint32_t buffers_dirty[6]; + uint32_t buffers_valid[6]; + struct util_dynarray global_residents; }; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h index 27c026b8b30..49e176cbd49 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h @@ -33,4 +33,6 @@ #define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT 0x00003850 +#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858 + #endif /* __NVC0_MACROS_H__ */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index c3b53621630..93f211bd5fc 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -554,6 +554,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, } info->io.resInfoCBSlot = 15; info->io.sampleInfoBase = 256 + 128; + info->io.suInfoBase = 512; info->io.msInfoCBSlot = 15; info->io.msInfoBase = 0; /* TODO */ } @@ -635,6 +636,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, } */ if (info->io.globalAccess) + prog->hdr[0] |= 1 << 26; + if (info->io.globalAccess & 0x2) prog->hdr[0] |= 1 << 16; if (info->io.fp64) prog->hdr[0] |= 1 << 27; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index 7497317c419..d2acce7d5be 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -74,6 +74,24 @@ nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq, } static void +nvc0_get_query_result_resource(struct pipe_context *pipe, + struct pipe_query *pq, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset) +{ + struct nvc0_query *q = nvc0_query(pq); + if (!q->funcs->get_query_result_resource) { + assert(!"Unexpected lack of get_query_result_resource"); + return; + } + q->funcs->get_query_result_resource(nvc0_context(pipe), q, wait, result_type, + index, resource, offset); +} + +static void nvc0_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, uint mode) @@ -129,7 +147,7 @@ nvc0_render_condition(struct pipe_context *pipe, } if (wait) - nvc0_hw_query_fifo_wait(push, q); + nvc0_hw_query_fifo_wait(nvc0, q); PUSH_SPACE(push, 7); PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); @@ -262,6 +280,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0) pipe->begin_query = nvc0_begin_query; pipe->end_query = nvc0_end_query; pipe->get_query_result = nvc0_get_query_result; + pipe->get_query_result_resource = nvc0_get_query_result_resource; pipe->render_condition = nvc0_render_condition; nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h index c46361c31aa..a887b220557 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h @@ -14,6 +14,13 @@ struct nvc0_query_funcs { void (*end_query)(struct nvc0_context *, struct nvc0_query *); boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *, boolean, union pipe_query_result *); + void (*get_query_result_resource)(struct nvc0_context *nvc0, + struct nvc0_query *q, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset); }; struct nvc0_query { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index 1bed0162baf..62385884137 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -358,11 +358,119 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, return true; } +static void +nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, + struct nvc0_query *q, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_hw_query *hq = nvc0_hw_query(q); + struct nv04_resource *buf = nv04_resource(resource); + unsigned stride; + + assert(!hq->funcs || !hq->funcs->get_query_result); + + if (index == -1) { + /* TODO: Use a macro to write the availability of the query */ + if (hq->state != NVC0_HW_QUERY_STATE_READY) + nvc0_hw_query_update(nvc0->screen->base.client, q); + uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY}; + nvc0->base.push_cb(&nvc0->base, buf, offset, + result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1, + ready); + return; + } + + /* If the fence guarding this query has not been emitted, that makes a lot + * of the following logic more complicated. + */ + if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED) + nouveau_fence_emit(hq->fence); + + /* We either need to compute a 32- or 64-bit difference between 2 values, + * and then store the result as either a 32- or 64-bit value. As such let's + * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit + * ones), and have one macro that clamps result to i32, u32, or just + * outputs the difference (no need to worry about 64-bit clamping). + */ + if (hq->state != NVC0_HW_QUERY_STATE_READY) + nvc0_hw_query_update(nvc0->screen->base.client, q); + + if (wait && hq->state != NVC0_HW_QUERY_STATE_READY) + nvc0_hw_query_fifo_wait(nvc0, q); + + nouveau_pushbuf_space(push, 16, 2, 0); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR); + BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 7); + if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */ + PUSH_DATA(push, 0x00000001); + else if (result_type == PIPE_QUERY_TYPE_I32) + PUSH_DATA(push, 0x7fffffff); + else if (result_type == PIPE_QUERY_TYPE_U32) + PUSH_DATA(push, 0xffffffff); + else + PUSH_DATA(push, 0x00000000); + + switch (q->type) { + case PIPE_QUERY_SO_STATISTICS: + stride = 2; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + stride = 12; + break; + default: + assert(index == 0); + stride = 1; + break; + } + + if (hq->is64bit) { + nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index, + 8 | NVC0_IB_ENTRY_1_NO_PREFETCH); + nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride), + 8 | NVC0_IB_ENTRY_1_NO_PREFETCH); + } else { + nouveau_pushbuf_data(push, hq->bo, hq->offset + 4, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + PUSH_DATA(push, 0); + nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + PUSH_DATA(push, 0); + } + + if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) { + PUSH_DATA(push, 0); + PUSH_DATA(push, 0); + } else if (hq->is64bit) { + PUSH_DATA(push, hq->fence->sequence); + nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + } else { + PUSH_DATA(push, hq->sequence); + nouveau_pushbuf_data(push, hq->bo, hq->offset, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + } + + if (buf->mm) { + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); + } +} + static const struct nvc0_query_funcs hw_query_funcs = { .destroy_query = nvc0_hw_destroy_query, .begin_query = nvc0_hw_begin_query, .end_query = nvc0_hw_end_query, .get_query_result = nvc0_hw_get_query_result, + .get_query_result_resource = nvc0_hw_get_query_result_resource, }; struct nvc0_query * @@ -476,8 +584,9 @@ nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, } void -nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q) +nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q) { + struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_hw_query *hq = nvc0_hw_query(q); unsigned offset = hq->offset; @@ -486,9 +595,15 @@ nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q) PUSH_SPACE(push, 5); PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); - PUSH_DATAh(push, hq->bo->offset + offset); - PUSH_DATA (push, hq->bo->offset + offset); - PUSH_DATA (push, hq->sequence); + if (hq->is64bit) { + PUSH_DATAh(push, nvc0->screen->fence.bo->offset); + PUSH_DATA (push, nvc0->screen->fence.bo->offset); + PUSH_DATA (push, hq->fence->sequence); + } else { + PUSH_DATAh(push, hq->bo->offset + offset); + PUSH_DATA (push, hq->bo->offset + offset); + PUSH_DATA (push, hq->sequence); + } PUSH_DATA (push, (1 << 12) | NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h index 3701eb7100f..8225755d85e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h @@ -51,6 +51,6 @@ void nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *, struct nvc0_query *, unsigned); void -nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nvc0_query *); +nvc0_hw_query_fifo_wait(struct nvc0_context *, struct nvc0_query *); #endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 84dbd69b8a5..d368fda707d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -111,6 +111,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 256; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: return 1; /* 256 for binding as RT, but that's not possible in GL */ + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + return 16; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return NOUVEAU_MIN_BUFFER_MAP_ALIGN; case PIPE_CAP_MAX_VIEWPORTS: @@ -189,6 +191,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_QUERY_BUFFER_OBJECT: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -212,10 +215,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: - case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; case PIPE_CAP_VENDOR_ID: @@ -322,8 +327,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: - case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: return 0; + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + return NVC0_MAX_BUFFERS; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: return 16; /* would be 32 in linked (OpenGL-style) mode */ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: @@ -676,8 +682,9 @@ nvc0_screen_create(struct nouveau_device *dev) push->rsvd_kick = 5; screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_SHADER_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_COMMAND_ARGS_BUFFER; + PIPE_BIND_COMMAND_ARGS_BUFFER | PIPE_BIND_QUERY_BUFFER; screen->base.sysmem_bindings |= PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; @@ -891,9 +898,9 @@ nvc0_screen_create(struct nouveau_device *dev) /* TIC and TSC entries for each unit (nve4+ only) */ /* auxiliary constants (6 user clip planes, base instance id) */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, 512); - PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9)); - PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9)); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10)); BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); PUSH_DATA (push, (15 << 4) | 1); if (screen->eng3d->oclass >= NVE4_3D_CLASS) { @@ -913,8 +920,8 @@ nvc0_screen_create(struct nouveau_device *dev) /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 256); - PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); - PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); + PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 5); PUSH_DATA (push, 0); PUSH_DATAf(push, 0.0f); @@ -922,8 +929,8 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); - PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); + PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); if (screen->base.drm->version >= 0x01000101) { ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); @@ -953,8 +960,12 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, screen->tls->size); BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1); PUSH_DATA (push, 0); + /* Reduce likelihood of collision with real buffers by placing the hole at + * the top of the 4G area. This will have to be dealt with for real + * eventually by blocking off that area from the VM. + */ BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1); - PUSH_DATA (push, 0); + PUSH_DATA (push, 0xff << 24); if (screen->eng3d->oclass < GM107_3D_CLASS) { ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL, @@ -1039,6 +1050,7 @@ nvc0_screen_create(struct nouveau_device *dev) MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); + MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 8b73102b98b..1a56177815c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -22,6 +22,8 @@ #define NVC0_MAX_VIEWPORTS 16 +#define NVC0_MAX_BUFFERS 32 + struct nvc0_context; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index dc02b011bdf..382a18ef153 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -316,7 +316,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0) continue; if (!targ->clean) - nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq)); + nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq)); nouveau_pushbuf_space(push, 0, 0, 1); BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 24a6c222dd5..cf3d3497c78 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -1243,11 +1243,50 @@ nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader, unsigned start_slot, unsigned count, struct pipe_image_view **views) { -#if 0 - nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views); +} + +static void +nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t, + unsigned start, unsigned nr, + struct pipe_shader_buffer *pbuffers) +{ + const unsigned end = start + nr; + const unsigned mask = ((1 << nr) - 1) << start; + unsigned i; + + assert(t < 5); + + if (pbuffers) { + for (i = start; i < end; ++i) { + const unsigned p = i - start; + if (pbuffers[p].buffer) + nvc0->buffers_valid[t] |= (1 << i); + else + nvc0->buffers_valid[t] &= ~(1 << i); + nvc0->buffers[t][i].buffer_offset = pbuffers[p].buffer_offset; + nvc0->buffers[t][i].buffer_size = pbuffers[p].buffer_size; + pipe_resource_reference(&nvc0->buffers[t][i].buffer, pbuffers[p].buffer); + } + } else { + for (i = start; i < end; ++i) + pipe_resource_reference(&nvc0->buffers[t][i].buffer, NULL); + nvc0->buffers_valid[t] &= ~mask; + } + nvc0->buffers_dirty[t] |= mask; + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF); +} + +static void +nvc0_set_shader_buffers(struct pipe_context *pipe, + unsigned shader, + unsigned start, unsigned nr, + struct pipe_shader_buffer *buffers) +{ + const unsigned s = nvc0_shader_stage(shader); + nvc0_bind_buffers_range(nvc0_context(pipe), s, start, nr, buffers); - nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES; -#endif + nvc0_context(pipe)->dirty |= NVC0_NEW_BUFFERS; } static inline void @@ -1377,6 +1416,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) pipe->set_global_binding = nvc0_set_global_bindings; pipe->set_compute_resources = nvc0_set_compute_resources; pipe->set_shader_images = nvc0_set_shader_images; + pipe->set_shader_buffers = nvc0_set_shader_buffers; nvc0->sample_mask = ~0; nvc0->min_samples = 1; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index b02a590c375..c17223a1b2b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -183,9 +183,9 @@ nvc0_validate_fb(struct nvc0_context *nvc0) ms = 1 << ms_mode; BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, 512); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9)); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); PUSH_DATA (push, 256 + 128); for (i = 0; i < ms; i++) { @@ -317,9 +317,9 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s) struct nouveau_bo *bo = nvc0->screen->uniform_bo; BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, 512); - PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9)); - PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9)); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10)); + PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1); PUSH_DATA (push, 256); PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); @@ -471,6 +471,39 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) } static void +nvc0_validate_buffers(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + int i, s; + + for (s = 0; s < 5; s++) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); + PUSH_DATA (push, 512); + for (i = 0; i < NVC0_MAX_BUFFERS; i++) { + if (nvc0->buffers[s][i].buffer) { + struct nv04_resource *res = + nv04_resource(nvc0->buffers[s][i].buffer); + PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); + PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); + PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); + PUSH_DATA (push, 0); + BCTX_REFN(nvc0->bufctx_3d, BUF, res, RDWR); + } else { + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + } + } + } + +} + +static void nvc0_validate_sample_mask(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; @@ -663,6 +696,7 @@ static struct state_validate { { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS }, { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, { nvc0_validate_surfaces, NVC0_NEW_SURFACES }, + { nvc0_validate_buffers, NVC0_NEW_BUFFERS }, { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF }, { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }, { nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES }, diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 4e43c4e99fd..71726d1aa59 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -357,27 +357,132 @@ nvc0_clear_render_target(struct pipe_context *pipe, } static void -nvc0_clear_buffer_cpu(struct pipe_context *pipe, - struct pipe_resource *res, - unsigned offset, unsigned size, - const void *data, int data_size) +nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) { + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nv04_resource *buf = nv04_resource(res); - struct pipe_transfer *pt; - struct pipe_box box; - unsigned elements, i; + unsigned i; - elements = size / data_size; + nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nvc0->bufctx); + nouveau_pushbuf_validate(push); - u_box_1d(offset, size, &box); + unsigned count = (size + 3) / 4; + unsigned data_words = data_size / 4; - uint8_t *map = buf->vtbl->transfer_map(pipe, res, 0, PIPE_TRANSFER_WRITE, - &box, &pt); + while (count) { + unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words; + unsigned nr = nr_data * data_words; - for (i = 0; i < elements; ++i) - memcpy(&map[i*data_size], data, data_size); + if (!PUSH_SPACE(push, nr + 9)) + break; - buf->vtbl->transfer_unmap(pipe, pt); + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, MIN2(size, nr * 4)); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, 0x100111); + + /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ + BEGIN_NIC0(push, NVC0_M2MF(DATA), nr); + for (i = 0; i < nr_data; i++) + PUSH_DATAp(push, data, data_words); + + count -= nr; + offset += nr * 4; + size -= nr * 4; + } + + if (buf->mm) { + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); + } + + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +static void +nvc0_clear_buffer_push_nve4(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv04_resource *buf = nv04_resource(res); + unsigned i; + + nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nvc0->bufctx); + nouveau_pushbuf_validate(push); + + unsigned count = (size + 3) / 4; + unsigned data_words = data_size / 4; + + while (count) { + unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words; + unsigned nr = nr_data * data_words; + + if (!PUSH_SPACE(push, nr + 10)) + break; + + BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, MIN2(size, nr * 4)); + PUSH_DATA (push, 1); + /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ + BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), nr + 1); + PUSH_DATA (push, 0x1001); + for (i = 0; i < nr_data; i++) + PUSH_DATAp(push, data, data_words); + + count -= nr; + offset += nr * 4; + size -= nr * 4; + } + + if (buf->mm) { + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); + } + + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +static void +nvc0_clear_buffer_push(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + unsigned tmp; + + if (data_size == 1) { + tmp = *(unsigned char *)data; + tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp; + data = &tmp; + data_size = 4; + } else if (data_size == 2) { + tmp = *(unsigned short *)data; + tmp = (tmp << 16) | tmp; + data = &tmp; + data_size = 4; + } + + if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) + nvc0_clear_buffer_push_nvc0(pipe, res, offset, size, data, data_size); + else + nvc0_clear_buffer_push_nve4(pipe, res, offset, size, data, data_size); } static void @@ -402,10 +507,8 @@ nvc0_clear_buffer(struct pipe_context *pipe, memcpy(&color.ui, data, 16); break; case 12: - /* This doesn't work, RGB32 is not a valid RT format. - * dst_fmt = PIPE_FORMAT_R32G32B32_UINT; - * memcpy(&color.ui, data, 12); - * memset(&color.ui[3], 0, 4); + /* RGB32 is not a valid RT format. This will be handled by the pushbuf + * uploader. */ break; case 8: @@ -437,14 +540,26 @@ nvc0_clear_buffer(struct pipe_context *pipe, assert(size % data_size == 0); if (data_size == 12) { - /* TODO: Find a way to do this with the GPU! */ - nvc0_clear_buffer_cpu(pipe, res, offset, size, data, data_size); + nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size); return; } + if (offset & 0xff) { + unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset); + assert(fixup_size % data_size == 0); + nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size); + offset += fixup_size; + size -= fixup_size; + if (!size) + return; + } + elements = size / data_size; height = (elements + 16383) / 16384; width = elements / height; + if (height > 1) + width &= ~0xff; + assert(width > 0); if (!PUSH_SPACE(push, 40)) return; @@ -465,7 +580,7 @@ nvc0_clear_buffer(struct pipe_context *pipe, BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9); PUSH_DATAh(push, buf->address + offset); PUSH_DATA (push, buf->address + offset); - PUSH_DATA (push, width * data_size); + PUSH_DATA (push, align(width * data_size, 0x100)); PUSH_DATA (push, height); PUSH_DATA (push, nvc0_format_table[dst_fmt].rt); PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR); @@ -480,24 +595,20 @@ nvc0_clear_buffer(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c); + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); + + if (buf->mm) { + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); + } + if (width * height != elements) { offset += width * height * data_size; width = elements - width * height; - height = 1; - - BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 4); - PUSH_DATAh(push, buf->address + offset); - PUSH_DATA (push, buf->address + offset); - PUSH_DATA (push, width * data_size); - PUSH_DATA (push, height); - - IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c); + nvc0_clear_buffer_push(pipe, res, offset, width * data_size, + data, data_size); } - IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); - - nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); - nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 74090ce40a5..7223f5aecfb 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -515,12 +515,12 @@ nve4_set_tex_handles(struct nvc0_context *nvc0) return; address = nvc0->screen->uniform_bo->offset + (5 << 16); - for (s = 0; s < 5; ++s, address += (1 << 9)) { + for (s = 0; s < 5; ++s, address += (1 << 10)) { uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; if (!dirty) continue; BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, 512); + PUSH_DATA (push, 1024); PUSH_DATAh(push, address); PUSH_DATA (push, address); do { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index ad79d1cbb9c..44aed1adeeb 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -334,7 +334,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) b = ve->pipe.vertex_buffer_index; vb = &nvc0->vtxbuf[b]; - if (!vb->buffer) { + if (nvc0->vbo_user & (1 << b)) { if (!(nvc0->constant_vbos & (1 << b))) { if (ve->pipe.instance_divisor) { BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1); @@ -352,13 +352,13 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) if (unlikely(ve->pipe.instance_divisor)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4); - PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); PUSH_DATAh(push, res->address + offset); PUSH_DATA (push, res->address + offset); PUSH_DATA (push, ve->pipe.instance_divisor); } else { BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3); - PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); PUSH_DATAh(push, res->address + offset); PUSH_DATA (push, res->address + offset); } @@ -382,7 +382,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) unsigned b; const uint32_t mask = nvc0->vbo_user; - PUSH_SPACE(push, nvc0->num_vtxbufs * 8); + PUSH_SPACE(push, nvc0->num_vtxbufs * 8 + nvc0->vertex->num_elements); for (b = 0; b < nvc0->num_vtxbufs; ++b) { struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; struct nv04_resource *buf; @@ -395,6 +395,10 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) } /* address/value set in nvc0_update_user_vbufs_shared */ continue; + } else if (!vb->buffer) { + /* there can be holes in the vertex buffer lists */ + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0); + continue; } buf = nv04_resource(vb->buffer); offset = vb->buffer_offset; @@ -410,6 +414,12 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD); } + /* If there are more elements than buffers, we might not have unset + * fetching on the later elements. + */ + for (; b < nvc0->vertex->num_elements; ++b) + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0); + if (nvc0->vbo_user) nvc0_update_user_vbufs_shared(nvc0); } @@ -680,7 +690,7 @@ nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push, if (count & 1) { count--; - PUSH_SPACE(push, 1); + PUSH_SPACE(push, 2); BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, *map++); } @@ -779,7 +789,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0, res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; PUSH_SPACE(push, 2); IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); - nvc0_hw_query_fifo_wait(push, nvc0_query(so->pq)); + nvc0_hw_query_fifo_wait(nvc0, nvc0_query(so->pq)); if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS) IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); @@ -811,6 +821,8 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) unsigned size, macro, count = info->indirect_count, drawid = info->drawid; uint32_t offset = buf->offset + info->indirect_offset; + PUSH_SPACE(push, 7); + /* must make FIFO wait for engines idle before continuing to process */ if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) || (buf_count && buf_count->fence_wr && @@ -951,6 +963,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (info->mode == PIPE_PRIM_PATCHES && nvc0->state.patch_vertices != info->vertices_per_patch) { nvc0->state.patch_vertices = info->vertices_per_patch; + PUSH_SPACE(push, 1); IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), nvc0->state.patch_vertices); } @@ -958,6 +971,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0_state_validate(nvc0, ~0, 8); if (nvc0->vertprog->vp.need_draw_parameters) { + PUSH_SPACE(push, 9); BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 512); PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); @@ -979,6 +993,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } if (nvc0->cb_dirty) { + PUSH_SPACE(push, 1); IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011); nvc0->cb_dirty = false; } @@ -987,6 +1002,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (!nvc0->textures_coherent[s]) continue; + PUSH_SPACE(push, nvc0->num_textures[s] * 2); + for (int i = 0; i < nvc0->num_textures[s]; ++i) { struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); if (!(nvc0->textures_coherent[s] & (1 << i))) diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 90c4f71a945..a2b7f87855d 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -210,6 +210,10 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; /* SWTCL-only features. */ diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 20945ece155..2cf08897a8d 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -225,7 +225,7 @@ void *evergreen_create_compute_state( } } #else - memset(&shader->binary, 0, sizeof(shader->binary)); + radeon_shader_binary_init(&shader->binary); radeon_elf_read(code, header->num_bytes, &shader->binary); r600_create_shader(&shader->bc, &shader->binary, &use_kill); @@ -245,13 +245,31 @@ void *evergreen_create_compute_state( return shader; } -void evergreen_delete_compute_state(struct pipe_context *ctx, void* state) +void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state) { - struct r600_pipe_compute *shader = (struct r600_pipe_compute *)state; + struct r600_context *ctx = (struct r600_context *)ctx_; + COMPUTE_DBG(ctx->screen, "*** evergreen_delete_compute_state\n"); + struct r600_pipe_compute *shader = state; if (!shader) return; +#ifdef HAVE_OPENCL +#if HAVE_LLVM < 0x0306 + for (unsigned i = 0; i < shader->num_kernels; i++) { + struct r600_kernel *kernel = &shader->kernels[i]; + LLVMDisposeModule(module); + } + FREE(shader->kernels); + LLVMContextDispose(shader->llvm_ctx); +#else + radeon_shader_binary_clean(&shader->binary); + r600_destroy_shader(&shader->bc); + + /* TODO destroy shader->code_bo, shader->const_bo + * we'll need something like r600_buffer_free */ +#endif +#endif FREE(shader); } @@ -349,7 +367,7 @@ static void evergreen_emit_direct_dispatch( struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_pipe_compute *shader = rctx->cs_shader_state.shader; unsigned num_waves; - unsigned num_pipes = rctx->screen->b.info.r600_max_pipes; + unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes; unsigned wave_divisor = (16 * num_pipes); int group_size = 1; int grid_size = 1; @@ -723,7 +741,7 @@ static void evergreen_set_global_binding( * command stream by the start_cs_cmd atom. However, since the SET_CONTEXT_REG * packet requires that the shader type bit be set, we must initialize all * context registers needed for compute in this function. The registers - * intialized by the start_cs_cmd atom can be found in evereen_state.c in the + * initialized by the start_cs_cmd atom can be found in evergreen_state.c in the * functions evergreen_init_atom_start_cs or cayman_init_atom_start_cs depending * on the GPU family. */ @@ -733,7 +751,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) int num_threads; int num_stack_entries; - /* since all required registers are initialised in the + /* since all required registers are initialized in the * start_compute_cs_cmd atom, we can EMIT_EARLY here. */ r600_init_command_buffer(cb, 256); @@ -818,7 +836,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) * R_008E28_SQ_STATIC_THREAD_MGMT3 */ - /* XXX: We may need to adjust the thread and stack resouce + /* XXX: We may need to adjust the thread and stack resource * values for 3D/compute interop */ r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 9dfb84965cf..61d32c06671 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -772,7 +772,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, if (util_format_get_blocksize(pipe_format) >= 16) non_disp_tiling = 1; } - nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks); + nbanks = eg_num_banks(rscreen->b.info.r600_num_banks); if (state->target == PIPE_TEXTURE_1D_ARRAY) { height = 1; @@ -986,7 +986,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx, unsigned block_size = align(util_format_get_blocksize(pipe_buffer->format), 4); unsigned pitch_alignment = - MAX2(64, rctx->screen->b.tiling_info.group_bytes / block_size); + MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size); unsigned pitch = align(pipe_buffer->width0, pitch_alignment); /* XXX: This is copied from evergreen_init_color_surface(). I don't @@ -1098,7 +1098,7 @@ void evergreen_init_color_surface(struct r600_context *rctx, if (util_format_get_blocksize(surf->base.format) >= 16) non_disp_tiling = 1; } - nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks); + nbanks = eg_num_banks(rscreen->b.info.r600_num_banks); desc = util_format_description(surf->base.format); for (i = 0; i < 4; i++) { if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { @@ -1253,7 +1253,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx, macro_aspect = eg_macro_tile_aspect(macro_aspect); bankw = eg_bank_wh(bankw); bankh = eg_bank_wh(bankh); - nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks); + nbanks = eg_num_banks(rscreen->b.info.r600_num_banks); offset >>= 8; surf->db_z_info = S_028040_ARRAY_MODE(array_mode) | @@ -3467,7 +3467,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, sub_cmd = EG_DMA_COPY_TILED; lbpp = util_logbase2(bpp); pitch_tile_max = ((pitch / bpp) / 8) - 1; - nbanks = eg_num_banks(rctx->screen->b.tiling_info.num_banks); + nbanks = eg_num_banks(rctx->screen->b.info.r600_num_banks); if (dst_mode == RADEON_SURF_MODE_LINEAR) { /* T2L */ @@ -3670,9 +3670,9 @@ void evergreen_init_state_functions(struct r600_context *rctx) unsigned id = 1; unsigned i; /* !!! - * To avoid GPU lockup registers must be emited in a specific order + * To avoid GPU lockup registers must be emitted in a specific order * (no kidding ...). The order below is important and have been - * partialy infered from analyzing fglrx command stream. + * partially inferred from analyzing fglrx command stream. * * Don't reorder atom without carefully checking the effect (GPU lockup * or piglit regression). @@ -3793,7 +3793,7 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe unsigned output_patch0_offset, perpatch_output_offset, lds_size; uint32_t values[16]; unsigned num_waves; - unsigned num_pipes = rctx->screen->b.info.r600_max_pipes; + unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes; unsigned wave_divisor = (16 * num_pipes); *num_patches = 1; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 0b78290295a..1629399d8fe 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -245,8 +245,8 @@ struct r600_bytecode { unsigned ar_chan; unsigned ar_handling; unsigned r6xx_nop_after_rel_dst; - bool index_loaded[2]; - unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */ + bool index_loaded[2]; + unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */ unsigned debug_id; struct r600_isa* isa; }; diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 8b91372f3ae..0fe7c74418d 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -848,6 +848,7 @@ LLVMModuleRef r600_tgsi_llvm( lp_build_tgsi_llvm(bld_base, tokens); + LLVMBuildRetVoid(bld_base->base.gallivm->builder); radeon_llvm_finalize_module(ctx); return ctx->gallivm.module; @@ -910,6 +911,11 @@ unsigned r600_create_shader(struct r600_bytecode *bc, return 0; } +void r600_destroy_shader(struct r600_bytecode *bc) +{ + FREE(bc->bytecode); +} + unsigned r600_llvm_compile( LLVMModuleRef mod, enum radeon_family family, @@ -922,17 +928,14 @@ unsigned r600_llvm_compile( struct radeon_shader_binary binary; const char * gpu_family = r600_get_llvm_processor_name(family); - memset(&binary, 0, sizeof(struct radeon_shader_binary)); + radeon_shader_binary_init(&binary); if (dump) LLVMDumpModule(mod); r = radeon_llvm_compile(mod, &binary, gpu_family, NULL, debug); r = r600_create_shader(bc, &binary, use_kill); - FREE(binary.code); - FREE(binary.config); - FREE(binary.rodata); - FREE(binary.global_symbol_offsets); + radeon_shader_binary_clean(&binary); return r; } diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h index f570b739fbe..3f7fc4bef7e 100644 --- a/src/gallium/drivers/r600/r600_llvm.h +++ b/src/gallium/drivers/r600/r600_llvm.h @@ -30,6 +30,8 @@ unsigned r600_create_shader(struct r600_bytecode *bc, const struct radeon_shader_binary *binary, boolean *use_kill); +void r600_destroy_shader(struct r600_bytecode *bc); + void r600_shader_binary_read_config(const struct radeon_shader_binary *binary, struct r600_bytecode *bc, uint64_t symbol_offset, diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 9b0f31270df..9d378013be0 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -285,6 +285,8 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_MEMORY_INFO: return 1; case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: @@ -342,6 +344,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) /* kernel command checker support is also required */ return family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41; + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + return family >= CHIP_CEDAR ? 0 : 1; + /* Unsupported features. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: @@ -364,6 +369,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: @@ -415,10 +421,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) /* Timer queries, present when the clock frequency is non zero. */ case PIPE_CAP_QUERY_TIME_ELAPSED: - return rscreen->b.info.r600_clock_crystal_freq != 0; + return rscreen->b.info.clock_crystal_freq != 0; case PIPE_CAP_QUERY_TIMESTAMP: return rscreen->b.info.drm_minor >= 20 && - rscreen->b.info.r600_clock_crystal_freq != 0; + rscreen->b.info.clock_crystal_freq != 0; case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MIN_TEXEL_OFFSET: diff --git a/src/gallium/drivers/r600/r600_uvd.c b/src/gallium/drivers/r600/r600_uvd.c index 18d2b69afb0..0c928345773 100644 --- a/src/gallium/drivers/r600/r600_uvd.c +++ b/src/gallium/drivers/r600/r600_uvd.c @@ -160,7 +160,7 @@ static struct pb_buffer* r600_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_ struct r600_texture *chroma = (struct r600_texture *)buf->resources[1]; msg->body.decode.dt_field_mode = buf->base.interlaced; - msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.tiling_info.num_banks)); + msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.info.r600_num_banks)); ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface); diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index c7984c47304..b384baa9237 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -181,7 +181,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen, old_buf = res->buf; res->buf = new_buf; /* should be atomic */ - if (rscreen->info.r600_virtual_address) + if (rscreen->info.has_virtual_memory) res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf); else res->gpu_address = 0; @@ -511,7 +511,7 @@ r600_buffer_from_user_memory(struct pipe_screen *screen, return NULL; } - if (rscreen->info.r600_virtual_address) + if (rscreen->info.has_virtual_memory) rbuffer->gpu_address = ws->buffer_get_virtual_address(rbuffer->buf); else diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h index caf7deef37c..ff5b055448a 100644 --- a/src/gallium/drivers/radeon/r600_cs.h +++ b/src/gallium/drivers/radeon/r600_cs.h @@ -60,7 +60,7 @@ static inline void r600_emit_reloc(struct r600_common_context *rctx, enum radeon_bo_priority priority) { struct radeon_winsys_cs *cs = ring->cs; - bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address; + bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory; unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority); if (!has_vm) { diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c index fad7bdec40a..f3529a1fe0f 100644 --- a/src/gallium/drivers/radeon/r600_perfcounter.c +++ b/src/gallium/drivers/radeon/r600_perfcounter.c @@ -33,10 +33,6 @@ /* Max counters per HW block */ #define R600_QUERY_MAX_COUNTERS 16 -static const char * const r600_pc_shader_suffix[] = { - "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS" -}; - static struct r600_perfcounter_block * lookup_counter(struct r600_perfcounters *pc, unsigned index, unsigned *base_gid, unsigned *sub_index) @@ -92,6 +88,8 @@ struct r600_pc_counter { unsigned stride; }; +#define R600_PC_SHADERS_WINDOWING (1 << 31) + struct r600_query_pc { struct r600_query_hw b; @@ -246,32 +244,29 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen, if (block->flags & R600_PC_BLOCK_SHADER) { unsigned sub_gids = block->num_instances; unsigned shader_id; - unsigned shader_mask; - unsigned query_shader_mask; + unsigned shaders; + unsigned query_shaders; if (block->flags & R600_PC_BLOCK_SE_GROUPS) sub_gids = sub_gids * screen->info.max_se; shader_id = sub_gid / sub_gids; sub_gid = sub_gid % sub_gids; - if (shader_id == 0) - shader_mask = R600_PC_SHADER_ALL; - else - shader_mask = 1 << (shader_id - 1); + shaders = screen->perfcounters->shader_type_bits[shader_id]; - query_shader_mask = query->shaders & R600_PC_SHADER_ALL; - if (query_shader_mask && query_shader_mask != shader_mask) { + query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING; + if (query_shaders && query_shaders != shaders) { fprintf(stderr, "r600_perfcounter: incompatible shader groups\n"); FREE(group); return NULL; } - query->shaders |= shader_mask; + query->shaders = shaders; } - if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) { + if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) { // A non-zero value in query->shaders ensures that the shader // masking is reset unless the user explicitly requests one. - query->shaders |= R600_PC_SHADER_WINDOWING; + query->shaders = R600_PC_SHADERS_WINDOWING; } if (block->flags & R600_PC_BLOCK_SE_GROUPS) { @@ -379,8 +374,8 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx, } if (query->shaders) { - if ((query->shaders & R600_PC_SHADER_ALL) == 0) - query->shaders |= R600_PC_SHADER_ALL; + if (query->shaders == R600_PC_SHADERS_WINDOWING) + query->shaders = 0xffffffff; query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords; } @@ -438,7 +433,7 @@ static boolean r600_init_block_names(struct r600_common_screen *screen, if (block->flags & R600_PC_BLOCK_SE_GROUPS) groups_se = screen->info.max_se; if (block->flags & R600_PC_BLOCK_SHADER) - groups_shader = ARRAY_SIZE(r600_pc_shader_suffix); + groups_shader = screen->perfcounters->num_shader_types; namelen = strlen(block->basename); block->group_name_stride = namelen + 1; @@ -462,14 +457,15 @@ static boolean r600_init_block_names(struct r600_common_screen *screen, groupname = block->group_names; for (i = 0; i < groups_shader; ++i) { - unsigned shaderlen = strlen(r600_pc_shader_suffix[i]); + const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i]; + unsigned shaderlen = strlen(shader_suffix); for (j = 0; j < groups_se; ++j) { for (k = 0; k < groups_instance; ++k) { strcpy(groupname, block->basename); p = groupname + namelen; if (block->flags & R600_PC_BLOCK_SHADER) { - strcpy(p, r600_pc_shader_suffix[i]); + strcpy(p, shader_suffix); p += shaderlen; } @@ -626,7 +622,7 @@ void r600_perfcounters_add_block(struct r600_common_screen *rscreen, if (block->flags & R600_PC_BLOCK_SE_GROUPS) block->num_groups *= rscreen->info.max_se; if (block->flags & R600_PC_BLOCK_SHADER) - block->num_groups *= ARRAY_SIZE(r600_pc_shader_suffix); + block->num_groups *= pc->num_shader_types; ++pc->num_blocks; pc->num_groups += block->num_groups; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 4c066c14cd8..d75317b1cbe 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -49,6 +49,26 @@ struct r600_multi_fence { }; /* + * shader binary helpers. + */ +void radeon_shader_binary_init(struct radeon_shader_binary *b) +{ + memset(b, 0, sizeof(*b)); +} + +void radeon_shader_binary_clean(struct radeon_shader_binary *b) +{ + if (!b) + return; + FREE(b->code); + FREE(b->config); + FREE(b->rodata); + FREE(b->global_symbol_offsets); + FREE(b->relocs); + FREE(b->disasm_string); +} + +/* * pipe_context */ @@ -251,7 +271,7 @@ bool r600_common_context_init(struct r600_common_context *rctx, rctx->chip_class = rscreen->chip_class; if (rscreen->chip_class >= CIK) - rctx->max_db = MAX2(8, rscreen->info.r600_num_backends); + rctx->max_db = MAX2(8, rscreen->info.num_render_backends); else if (rscreen->chip_class >= EVERGREEN) rctx->max_db = 8; else @@ -295,7 +315,7 @@ bool r600_common_context_init(struct r600_common_context *rctx, if (!rctx->ctx) return false; - if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { + if (rscreen->info.has_sdma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA, r600_flush_dma_ring, rctx, NULL); @@ -373,6 +393,7 @@ static const struct debug_named_value common_debug_options[] = { { "noir", DBG_NO_IR, "Don't print the LLVM IR"}, { "notgsi", DBG_NO_TGSI, "Don't print the TGSI"}, { "noasm", DBG_NO_ASM, "Don't print disassembled shaders"}, + { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" }, /* features */ { "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" }, @@ -389,6 +410,7 @@ static const struct debug_named_value common_debug_options[] = { { "nodcc", DBG_NO_DCC, "Disable DCC." }, { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." }, + { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." }, DEBUG_NAMED_VALUE_END /* must be last */ }; @@ -698,7 +720,7 @@ static int r600_get_compute_param(struct pipe_screen *screen, case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: if (ret) { uint32_t *max_clock_frequency = ret; - *max_clock_frequency = rscreen->info.max_sclk; + *max_clock_frequency = rscreen->info.max_shader_clock; } return sizeof(uint32_t); @@ -734,7 +756,7 @@ static uint64_t r600_get_timestamp(struct pipe_screen *screen) struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) / - rscreen->info.r600_clock_crystal_freq; + rscreen->info.clock_crystal_freq; } static void r600_fence_reference(struct pipe_screen *screen, @@ -778,116 +800,40 @@ static boolean r600_fence_finish(struct pipe_screen *screen, return rws->fence_wait(rws, rfence->gfx, timeout); } -static bool r600_interpret_tiling(struct r600_common_screen *rscreen, - uint32_t tiling_config) +static void r600_query_memory_info(struct pipe_screen *screen, + struct pipe_memory_info *info) { - switch ((tiling_config & 0xe) >> 1) { - case 0: - rscreen->tiling_info.num_channels = 1; - break; - case 1: - rscreen->tiling_info.num_channels = 2; - break; - case 2: - rscreen->tiling_info.num_channels = 4; - break; - case 3: - rscreen->tiling_info.num_channels = 8; - break; - default: - return false; - } - - switch ((tiling_config & 0x30) >> 4) { - case 0: - rscreen->tiling_info.num_banks = 4; - break; - case 1: - rscreen->tiling_info.num_banks = 8; - break; - default: - return false; - - } - switch ((tiling_config & 0xc0) >> 6) { - case 0: - rscreen->tiling_info.group_bytes = 256; - break; - case 1: - rscreen->tiling_info.group_bytes = 512; - break; - default: - return false; - } - return true; -} - -static bool evergreen_interpret_tiling(struct r600_common_screen *rscreen, - uint32_t tiling_config) -{ - switch (tiling_config & 0xf) { - case 0: - rscreen->tiling_info.num_channels = 1; - break; - case 1: - rscreen->tiling_info.num_channels = 2; - break; - case 2: - rscreen->tiling_info.num_channels = 4; - break; - case 3: - rscreen->tiling_info.num_channels = 8; - break; - default: - return false; - } - - switch ((tiling_config & 0xf0) >> 4) { - case 0: - rscreen->tiling_info.num_banks = 4; - break; - case 1: - rscreen->tiling_info.num_banks = 8; - break; - case 2: - rscreen->tiling_info.num_banks = 16; - break; - default: - return false; - } - - switch ((tiling_config & 0xf00) >> 8) { - case 0: - rscreen->tiling_info.group_bytes = 256; - break; - case 1: - rscreen->tiling_info.group_bytes = 512; - break; - default: - return false; - } - return true; -} - -static bool r600_init_tiling(struct r600_common_screen *rscreen) -{ - uint32_t tiling_config = rscreen->info.r600_tiling_config; - - /* set default group bytes, overridden by tiling info ioctl */ - if (rscreen->chip_class <= R700) { - rscreen->tiling_info.group_bytes = 256; - } else { - rscreen->tiling_info.group_bytes = 512; - } - - if (!tiling_config) - return true; - - if (rscreen->chip_class <= R700) { - return r600_interpret_tiling(rscreen, tiling_config); - } else { - return evergreen_interpret_tiling(rscreen, tiling_config); - } + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct radeon_winsys *ws = rscreen->ws; + unsigned vram_usage, gtt_usage; + + info->total_device_memory = rscreen->info.vram_size / 1024; + info->total_staging_memory = rscreen->info.gart_size / 1024; + + /* The real TTM memory usage is somewhat random, because: + * + * 1) TTM delays freeing memory, because it can only free it after + * fences expire. + * + * 2) The memory usage can be really low if big VRAM evictions are + * taking place, but the real usage is well above the size of VRAM. + * + * Instead, return statistics of this process. + */ + vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024; + gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024; + + info->avail_device_memory = + vram_usage <= info->total_device_memory ? + info->total_device_memory - vram_usage : 0; + info->avail_staging_memory = + gtt_usage <= info->total_staging_memory ? + info->total_staging_memory - gtt_usage : 0; + + info->device_memory_evicted = + ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; + /* Just return the number of evicted 64KB pages. */ + info->nr_device_memory_evictions = info->device_memory_evicted / 64; } struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, @@ -929,6 +875,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->b.fence_reference = r600_fence_reference; rscreen->b.resource_destroy = u_resource_destroy_vtbl; rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory; + rscreen->b.query_memory_info = r600_query_memory_info; if (rscreen->info.has_uvd) { rscreen->b.get_video_param = rvid_get_video_param; @@ -946,9 +893,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); - if (!r600_init_tiling(rscreen)) { - return false; - } util_format_s3tc_init(); pipe_mutex_init(rscreen->aux_context_lock); pipe_mutex_init(rscreen->gpu_load_mutex); @@ -968,27 +912,34 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, if (rscreen->debug_flags & DBG_INFO) { printf("pci_id = 0x%x\n", rscreen->info.pci_id); - printf("family = %i\n", rscreen->info.family); + printf("family = %i (%s)\n", rscreen->info.family, + r600_get_chip_name(rscreen)); printf("chip_class = %i\n", rscreen->info.chip_class); - printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20)); - printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20)); - printf("max_sclk = %i\n", rscreen->info.max_sclk); + printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024)); + printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024)); + printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory); + printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2); + printf("has_sdma = %i\n", rscreen->info.has_sdma); + printf("has_uvd = %i\n", rscreen->info.has_uvd); + printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version); + printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config); + printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq); + printf("drm = %i.%i.%i\n", rscreen->info.drm_major, + rscreen->info.drm_minor, rscreen->info.drm_patchlevel); + printf("has_userptr = %i\n", rscreen->info.has_userptr); + + printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes); + printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock); printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units); printf("max_se = %i\n", rscreen->info.max_se); printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); - printf("drm = %i.%i.%i\n", rscreen->info.drm_major, - rscreen->info.drm_minor, rscreen->info.drm_patchlevel); - printf("has_uvd = %i\n", rscreen->info.has_uvd); - printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version); - printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends); - printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq); - printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config); - printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes); - printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes); - printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address); - printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma); - printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map); - printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid); + + printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map); + printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid); + printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks); + printf("num_render_backends = %i\n", rscreen->info.num_render_backends); + printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes); + printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes); printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid); printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index d66e74f9254..e92df876c22 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -71,6 +71,7 @@ #define DBG_NO_IR (1 << 12) #define DBG_NO_TGSI (1 << 13) #define DBG_NO_ASM (1 << 14) +#define DBG_PREOPT_IR (1 << 15) /* Bits 21-31 are reserved for the r600g driver. */ /* features */ #define DBG_NO_ASYNC_DMA (1llu << 32) @@ -87,6 +88,7 @@ #define DBG_NO_DCC (1llu << 43) #define DBG_NO_DCC_CLEAR (1llu << 44) #define DBG_NO_RB_PLUS (1llu << 45) +#define DBG_SI_SCHED (1llu << 46) #define R600_MAP_BUFFER_ALIGNMENT 64 @@ -129,6 +131,9 @@ struct radeon_shader_binary { char *disasm_string; }; +void radeon_shader_binary_init(struct radeon_shader_binary *b); +void radeon_shader_binary_clean(struct radeon_shader_binary *b); + struct r600_resource { struct u_resource b; @@ -257,8 +262,6 @@ struct r600_surface { unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */ unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */ unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */ - unsigned sx_ps_downconvert; /* Stoney only */ - unsigned sx_blend_opt_epsilon; /* Stoney only */ struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */ @@ -278,19 +281,12 @@ struct r600_surface { unsigned pa_su_poly_offset_db_fmt_cntl; }; -struct r600_tiling_info { - unsigned num_channels; - unsigned num_banks; - unsigned group_bytes; -}; - struct r600_common_screen { struct pipe_screen b; struct radeon_winsys *ws; enum radeon_family family; enum chip_class chip_class; struct radeon_info info; - struct r600_tiling_info tiling_info; uint64_t debug_flags; bool has_cp_dma; bool has_streamout; diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 0aa19cd54fe..f8b62411722 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -100,6 +100,12 @@ static boolean r600_query_sw_begin(struct r600_common_context *rctx, case R600_QUERY_NUM_SHADERS_CREATED: query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); break; + case R600_QUERY_GPIN_ASIC_ID: + case R600_QUERY_GPIN_NUM_SIMD: + case R600_QUERY_GPIN_NUM_RB: + case R600_QUERY_GPIN_NUM_SPI: + case R600_QUERY_GPIN_NUM_SE: + break; default: unreachable("r600_query_sw_begin: bad query type"); } @@ -146,6 +152,12 @@ static void r600_query_sw_end(struct r600_common_context *rctx, case R600_QUERY_NUM_SHADERS_CREATED: query->end_result = p_atomic_read(&rctx->screen->num_shaders_created); break; + case R600_QUERY_GPIN_ASIC_ID: + case R600_QUERY_GPIN_NUM_SIMD: + case R600_QUERY_GPIN_NUM_RB: + case R600_QUERY_GPIN_NUM_SPI: + case R600_QUERY_GPIN_NUM_SE: + break; default: unreachable("r600_query_sw_end: bad query type"); } @@ -162,7 +174,7 @@ static boolean r600_query_sw_get_result(struct r600_common_context *rctx, case PIPE_QUERY_TIMESTAMP_DISJOINT: /* Convert from cycles per millisecond to cycles per second (Hz). */ result->timestamp_disjoint.frequency = - (uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000; + (uint64_t)rctx->screen->info.clock_crystal_freq * 1000; result->timestamp_disjoint.disjoint = FALSE; return TRUE; case PIPE_QUERY_GPU_FINISHED: { @@ -171,6 +183,22 @@ static boolean r600_query_sw_get_result(struct r600_common_context *rctx, wait ? PIPE_TIMEOUT_INFINITE : 0); return result->b; } + + case R600_QUERY_GPIN_ASIC_ID: + result->u32 = 0; + return TRUE; + case R600_QUERY_GPIN_NUM_SIMD: + result->u32 = rctx->screen->info.num_good_compute_units; + return TRUE; + case R600_QUERY_GPIN_NUM_RB: + result->u32 = rctx->screen->info.num_render_backends; + return TRUE; + case R600_QUERY_GPIN_NUM_SPI: + result->u32 = 1; /* all supported chips have one SPI per SE */ + return TRUE; + case R600_QUERY_GPIN_NUM_SE: + result->u32 = rctx->screen->info.max_se; + return TRUE; } result->u64 = query->end_result - query->begin_result; @@ -908,7 +936,7 @@ boolean r600_query_hw_get_result(struct r600_common_context *rctx, /* Convert the time to expected units. */ if (rquery->type == PIPE_QUERY_TIME_ELAPSED || rquery->type == PIPE_QUERY_TIMESTAMP) { - result->u64 = (1000000 * result->u64) / rctx->screen->info.r600_clock_crystal_freq; + result->u64 = (1000000 * result->u64) / rctx->screen->info.clock_crystal_freq; } return TRUE; } @@ -1021,13 +1049,13 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx) struct radeon_winsys_cs *cs = ctx->gfx.cs; struct r600_resource *buffer; uint32_t *results; - unsigned num_backends = ctx->screen->info.r600_num_backends; + unsigned num_backends = ctx->screen->info.num_render_backends; unsigned i, mask = 0; /* if backend_map query is supported by the kernel */ - if (ctx->screen->info.r600_backend_map_valid) { - unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes; - unsigned backend_map = ctx->screen->info.r600_backend_map; + if (ctx->screen->info.r600_gb_backend_map_valid) { + unsigned num_tile_pipes = ctx->screen->info.num_tile_pipes; + unsigned backend_map = ctx->screen->info.r600_gb_backend_map; unsigned item_width, item_mask; if (ctx->chip_class >= EVERGREEN) { @@ -1096,15 +1124,21 @@ err: return; } -#define X(name_, query_type_, type_, result_type_) \ +#define XFULL(name_, query_type_, type_, result_type_, group_id_) \ { \ .name = name_, \ .query_type = R600_QUERY_##query_type_, \ .type = PIPE_DRIVER_QUERY_TYPE_##type_, \ .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \ - .group_id = ~(unsigned)0 \ + .group_id = group_id_ \ } +#define X(name_, query_type_, type_, result_type_) \ + XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0) + +#define XG(group_, name_, query_type_, type_, result_type_) \ + XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_) + static struct pipe_driver_query_info r600_driver_query_list[] = { X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE), X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE), @@ -1116,6 +1150,20 @@ static struct pipe_driver_query_info r600_driver_query_list[] = { X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE), X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE), X("GTT-usage", GTT_USAGE, BYTES, AVERAGE), + + /* GPIN queries are for the benefit of old versions of GPUPerfStudio, + * which use it as a fallback path to detect the GPU type. + * + * Note: The names of these queries are significant for GPUPerfStudio + * (and possibly their order as well). */ + XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE), + XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE), + XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE), + XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE), + XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE), + + /* The following queries must be at the end of the list because their + * availability is adjusted dynamically based on the DRM version. */ X("GPU-load", GPU_LOAD, UINT64, AVERAGE), X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE), X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE), @@ -1123,6 +1171,8 @@ static struct pipe_driver_query_info r600_driver_query_list[] = { }; #undef X +#undef XG +#undef XFULL static unsigned r600_get_num_queries(struct r600_common_screen *rscreen) { @@ -1167,16 +1217,40 @@ static int r600_get_driver_query_info(struct pipe_screen *screen, break; } + if (info->group_id != ~(unsigned)0 && rscreen->perfcounters) + info->group_id += rscreen->perfcounters->num_groups; + return 1; } +/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware + * performance counter groups, so be careful when changing this and related + * functions. + */ static int r600_get_driver_query_group_info(struct pipe_screen *screen, unsigned index, struct pipe_driver_query_group_info *info) { struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; + unsigned num_pc_groups = 0; - return r600_get_perfcounter_group_info(rscreen, index, info); + if (rscreen->perfcounters) + num_pc_groups = rscreen->perfcounters->num_groups; + + if (!info) + return num_pc_groups + R600_NUM_SW_QUERY_GROUPS; + + if (index < num_pc_groups) + return r600_get_perfcounter_group_info(rscreen, index, info); + + index -= num_pc_groups; + if (index >= R600_NUM_SW_QUERY_GROUPS) + return 0; + + info->name = "GPIN"; + info->max_active_queries = 5; + info->num_queries = 5; + return 1; } void r600_query_init(struct r600_common_context *rctx) @@ -1189,7 +1263,7 @@ void r600_query_init(struct r600_common_context *rctx) rctx->b.get_query_result = r600_get_query_result; rctx->render_cond_atom.emit = r600_emit_query_predication; - if (((struct r600_common_screen*)rctx->b.screen)->info.r600_num_backends > 0) + if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0) rctx->b.render_condition = r600_render_condition; LIST_INITHEAD(&rctx->active_nontimer_queries); diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h index e5a98bfe5bd..8b2c4e3fe93 100644 --- a/src/gallium/drivers/radeon/r600_query.h +++ b/src/gallium/drivers/radeon/r600_query.h @@ -54,8 +54,18 @@ struct r600_resource; #define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11) #define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12) #define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13) +#define R600_QUERY_GPIN_ASIC_ID (PIPE_QUERY_DRIVER_SPECIFIC + 14) +#define R600_QUERY_GPIN_NUM_SIMD (PIPE_QUERY_DRIVER_SPECIFIC + 15) +#define R600_QUERY_GPIN_NUM_RB (PIPE_QUERY_DRIVER_SPECIFIC + 16) +#define R600_QUERY_GPIN_NUM_SPI (PIPE_QUERY_DRIVER_SPECIFIC + 17) +#define R600_QUERY_GPIN_NUM_SE (PIPE_QUERY_DRIVER_SPECIFIC + 18) #define R600_QUERY_FIRST_PERFCOUNTER (PIPE_QUERY_DRIVER_SPECIFIC + 100) +enum { + R600_QUERY_GROUP_GPIN = 0, + R600_NUM_SW_QUERY_GROUPS +}; + struct r600_query_ops { void (*destroy)(struct r600_common_context *, struct r600_query *); boolean (*begin)(struct r600_common_context *, struct r600_query *); @@ -156,24 +166,6 @@ enum { R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4), }; -/* Shader enable bits. Chosen to coincide with SQ_PERFCOUNTER_CTRL values */ -enum { - R600_PC_SHADER_PS = (1 << 0), - R600_PC_SHADER_VS = (1 << 1), - R600_PC_SHADER_GS = (1 << 2), - R600_PC_SHADER_ES = (1 << 3), - R600_PC_SHADER_HS = (1 << 4), - R600_PC_SHADER_LS = (1 << 5), - R600_PC_SHADER_CS = (1 << 6), - - R600_PC_SHADER_ALL = R600_PC_SHADER_PS | R600_PC_SHADER_VS | - R600_PC_SHADER_GS | R600_PC_SHADER_ES | - R600_PC_SHADER_HS | R600_PC_SHADER_LS | - R600_PC_SHADER_CS, - - R600_PC_SHADER_WINDOWING = (1 << 31), -}; - /* Describes a hardware block with performance counters. Multiple instances of * each block, possibly per-SE, may exist on the chip. Depending on the block * and on the user's configuration, we either @@ -210,6 +202,10 @@ struct r600_perfcounters { unsigned num_instance_cs_dwords; unsigned num_shaders_cs_dwords; + unsigned num_shader_types; + const char * const *shader_type_suffixes; + const unsigned *shader_type_bits; + void (*get_size)(struct r600_perfcounter_block *, unsigned count, unsigned *selectors, unsigned *num_select_dw, unsigned *num_read_dw); diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 7c4717d29fa..af206e43860 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -361,8 +361,8 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height; unsigned element_bits = 4; unsigned cmask_cache_bits = 1024; - unsigned num_pipes = rscreen->tiling_info.num_channels; - unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes; + unsigned num_pipes = rscreen->info.num_tile_pipes; + unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes; unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements; @@ -394,8 +394,8 @@ static void si_texture_get_cmask_info(struct r600_common_screen *rscreen, struct r600_texture *rtex, struct r600_cmask_info *out) { - unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes; - unsigned num_pipes = rscreen->tiling_info.num_channels; + unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; + unsigned num_pipes = rscreen->info.num_tile_pipes; unsigned cl_width, cl_height; switch (num_pipes) { @@ -515,7 +515,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, { unsigned cl_width, cl_height, width, height; unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; - unsigned num_pipes = rscreen->tiling_info.num_channels; + unsigned num_pipes = rscreen->info.num_tile_pipes; if (rscreen->chip_class <= EVERGREEN && rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26) @@ -533,6 +533,10 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38) return 0; + /* Overalign HTILE on Stoney to fix piglit/depthstencil-render-miplevels 585. */ + if (rscreen->family == CHIP_STONEY) + num_pipes = 4; + switch (num_pipes) { case 1: cl_width = 32; @@ -565,7 +569,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, slice_elements = (width * height) / (8 * 8); slice_bytes = slice_elements * 4; - pipe_interleave_bytes = rscreen->tiling_info.group_bytes; + pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; base_align = num_pipes * pipe_interleave_bytes; rtex->htile.pitch = width; @@ -1212,10 +1216,30 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, const struct pipe_surface *templ) { unsigned level = templ->u.tex.level; + unsigned width = u_minify(tex->width0, level); + unsigned height = u_minify(tex->height0, level); + + if (tex->target != PIPE_BUFFER && templ->format != tex->format) { + const struct util_format_description *tex_desc + = util_format_description(tex->format); + const struct util_format_description *templ_desc + = util_format_description(templ->format); + + assert(tex_desc->block.bits == templ_desc->block.bits); + + /* Adjust size of surface if and only if the block width or + * height is changed. */ + if (tex_desc->block.width != templ_desc->block.width || + tex_desc->block.height != templ_desc->block.height) { + unsigned nblks_x = util_format_get_nblocksx(tex->format, width); + unsigned nblks_y = util_format_get_nblocksy(tex->format, height); + + width = nblks_x * templ_desc->block.width; + height = nblks_y * templ_desc->block.height; + } + } - return r600_create_surface_custom(pipe, tex, templ, - u_minify(tex->width0, level), - u_minify(tex->height0, level)); + return r600_create_surface_custom(pipe, tex, templ, width, height); } static void r600_surface_destroy(struct pipe_context *pipe, @@ -1388,7 +1412,6 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, return; for (i = 0; i < fb->nr_cbufs; i++) { - struct r600_surface *surf; struct r600_texture *tex; unsigned clear_bit = PIPE_CLEAR_COLOR0 << i; @@ -1399,7 +1422,6 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, if (!(*buffers & clear_bit)) continue; - surf = (struct r600_surface *)fb->cbufs[i]; tex = (struct r600_texture *)fb->cbufs[i]->texture; /* 128-bit formats are unusupported */ @@ -1446,8 +1468,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, if (clear_words_needed) tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; } else { - /* RB+ doesn't work with CMASK fast clear. */ - if (surf->sx_ps_downconvert) + /* Stoney/RB+ doesn't work with CMASK fast clear. */ + if (rctx->family == CHIP_STONEY) continue; /* ensure CMASK is enabled */ diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 76be37625f3..f5e3f6af1a0 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1452,6 +1452,74 @@ static void emit_minmax_int(const struct lp_build_tgsi_action *action, emit_data->args[1], ""); } +static void pk2h_fetch_args(struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_X); + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Y); +} + +static void emit_pk2h(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMContextRef context = bld_base->base.gallivm->context; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMTypeRef fp16, i16; + LLVMValueRef const16, comp[2]; + unsigned i; + + fp16 = LLVMHalfTypeInContext(context); + i16 = LLVMInt16TypeInContext(context); + const16 = lp_build_const_int32(uint_bld->gallivm, 16); + + for (i = 0; i < 2; i++) { + comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, ""); + comp[i] = LLVMBuildBitCast(builder, comp[i], i16, ""); + comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, ""); + } + + comp[1] = LLVMBuildShl(builder, comp[1], const16, ""); + comp[0] = LLVMBuildOr(builder, comp[0], comp[1], ""); + + emit_data->output[emit_data->chan] = comp[0]; +} + +static void up2h_fetch_args(struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_X); +} + +static void emit_up2h(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMContextRef context = bld_base->base.gallivm->context; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMTypeRef fp16, i16; + LLVMValueRef const16, input, val; + unsigned i; + + fp16 = LLVMHalfTypeInContext(context); + i16 = LLVMInt16TypeInContext(context); + const16 = lp_build_const_int32(uint_bld->gallivm, 16); + input = emit_data->args[0]; + + for (i = 0; i < 2; i++) { + val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input; + val = LLVMBuildTrunc(builder, val, i16, ""); + val = LLVMBuildBitCast(builder, val, fp16, ""); + emit_data->output[i] = + LLVMBuildFPExt(builder, val, bld_base->base.elem_type, ""); + } +} + void radeon_llvm_context_init(struct radeon_llvm_context * ctx) { struct lp_type type; @@ -1581,6 +1649,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb; bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not; bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or; + bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args; + bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h; bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32"; bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem; @@ -1618,6 +1688,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f; bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor; bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp; + bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args; + bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h; } void radeon_llvm_create_func(struct radeon_llvm_context * ctx, @@ -1638,11 +1710,9 @@ void radeon_llvm_create_func(struct radeon_llvm_context * ctx, void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx) { struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm; - /* End the main function with Return*/ - LLVMBuildRetVoid(gallivm->builder); /* Create the pass manager */ - ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule( + gallivm->passmgr = LLVMCreateFunctionPassManagerForModule( gallivm->module); /* This pass should eliminate all the load and store instructions */ diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 2e5caa67d10..7329ceedf04 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -245,46 +245,49 @@ struct radeon_winsys_cs { }; struct radeon_info { + /* Device info. */ uint32_t pci_id; enum radeon_family family; enum chip_class chip_class; uint64_t gart_size; uint64_t vram_size; - uint32_t max_sclk; - uint32_t num_good_compute_units; - uint32_t max_se; - uint32_t max_sh_per_se; + boolean has_virtual_memory; + bool gfx_ib_pad_with_type2; + boolean has_sdma; + boolean has_uvd; + uint32_t vce_fw_version; + uint32_t vce_harvest_config; + uint32_t clock_crystal_freq; + /* Kernel info. */ uint32_t drm_major; /* version */ uint32_t drm_minor; uint32_t drm_patchlevel; - - boolean has_uvd; - uint32_t vce_fw_version; boolean has_userptr; - bool gfx_ib_pad_with_type2; + /* Shader cores. */ + uint32_t r600_max_quad_pipes; /* wave size / 16 */ + uint32_t max_shader_clock; + uint32_t num_good_compute_units; + uint32_t max_se; /* shader engines */ + uint32_t max_sh_per_se; /* shader arrays per shader engine */ + + /* Render backends (color + depth blocks). */ uint32_t r300_num_gb_pipes; uint32_t r300_num_z_pipes; - - uint32_t r600_num_backends; - uint32_t r600_clock_crystal_freq; - uint32_t r600_tiling_config; - uint32_t r600_num_tile_pipes; - uint32_t r600_max_pipes; - boolean r600_virtual_address; - boolean r600_has_dma; - - uint32_t r600_backend_map; - boolean r600_backend_map_valid; - + uint32_t r600_gb_backend_map; /* R600 harvest config */ + boolean r600_gb_backend_map_valid; + uint32_t r600_num_banks; + uint32_t num_render_backends; + uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */ + uint32_t pipe_interleave_bytes; + uint32_t enabled_rb_mask; /* GCN harvest config */ + + /* Tile modes. */ boolean si_tile_mode_array_valid; uint32_t si_tile_mode_array[32]; - uint32_t si_backend_enabled_mask; - boolean cik_macrotile_mode_array_valid; uint32_t cik_macrotile_mode_array[16]; - uint32_t vce_harvest_config; }; enum radeon_feature_id { diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 105a1b2a878..76913914b38 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -308,7 +308,7 @@ void cik_sdma_copy(struct pipe_context *ctx, } mtilew = (8 * rsrc->surface.bankw * - sctx->screen->b.tiling_info.num_channels) * + sctx->screen->b.info.num_tile_pipes) * rsrc->surface.mtilea; assert(!(mtilew & (mtilew - 1))); mtileh = (8 * rsrc->surface.bankh * num_banks) / diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 6ef6eeec178..825fbb181ba 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -461,9 +461,6 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){ LLVMContextDispose(program->llvm_ctx); } #else - FREE(program->shader.binary.config); - FREE(program->shader.binary.rodata); - FREE(program->shader.binary.global_symbol_offsets); si_shader_destroy(&program->shader); #endif diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index baa02293c41..d60c4515625 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -177,7 +177,7 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs); si_mark_atom_dirty(ctx, &ctx->msaa_config); si_mark_atom_dirty(ctx, &ctx->sample_mask.atom); - si_mark_atom_dirty(ctx, &ctx->cb_target_mask); + si_mark_atom_dirty(ctx, &ctx->cb_render_state); si_mark_atom_dirty(ctx, &ctx->blend_color.atom); si_mark_atom_dirty(ctx, &ctx->db_render_state); si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 7ee1daee7bf..24855e4e6f2 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -56,6 +56,8 @@ enum si_pc_reg_layout { /* Registers are laid out in decreasing rather than increasing order. */ SI_PC_REG_REVERSE = 4, + + SI_PC_FAKE = 8, }; struct si_pc_block_base { @@ -79,6 +81,23 @@ struct si_pc_block { unsigned instances; }; +/* The order is chosen to be compatible with GPUPerfStudio's hardcoding of + * performance counter group IDs. + */ +static const char * const si_pc_shader_type_suffixes[] = { + "", "_ES", "_GS", "_VS", "_PS", "_LS", "_HS", "_CS" +}; + +static const unsigned si_pc_shader_type_bits[] = { + 0x7f, + S_036780_ES_EN(1), + S_036780_GS_EN(1), + S_036780_VS_EN(1), + S_036780_PS_EN(1), + S_036780_LS_EN(1), + S_036780_HS_EN(1), + S_036780_CS_EN(1), +}; static struct si_pc_block_base cik_CB = { .name = "CB", @@ -308,56 +327,80 @@ static struct si_pc_block_base cik_WD = { .counter0_lo = R_034200_WD_PERFCOUNTER0_LO, }; +static struct si_pc_block_base cik_MC = { + .name = "MC", + .num_counters = 4, + + .layout = SI_PC_FAKE, +}; + +static struct si_pc_block_base cik_SRBM = { + .name = "SRBM", + .num_counters = 2, + + .layout = SI_PC_FAKE, +}; + /* Both the number of instances and selectors varies between chips of the same * class. We only differentiate by class here and simply expose the maximum * number over all chips in a class. + * + * Unfortunately, GPUPerfStudio uses the order of performance counter groups + * blindly once it believes it has identified the hardware, so the order of + * blocks here matters. */ static struct si_pc_block groups_CIK[] = { { &cik_CB, 226, 4 }, - { &cik_CPC, 22 }, { &cik_CPF, 17 }, - { &cik_CPG, 46 }, { &cik_DB, 257, 4 }, - { &cik_GDS, 121 }, { &cik_GRBM, 34 }, { &cik_GRBMSE, 15 }, - { &cik_IA, 22 }, - { &cik_PA_SC, 395 }, { &cik_PA_SU, 153 }, + { &cik_PA_SC, 395 }, { &cik_SPI, 186 }, { &cik_SQ, 252 }, { &cik_SX, 32 }, { &cik_TA, 111, 11 }, { &cik_TCA, 39, 2 }, { &cik_TCC, 160, 16 }, - { &cik_TCP, 154, 11 }, { &cik_TD, 55, 11 }, + { &cik_TCP, 154, 11 }, + { &cik_GDS, 121 }, { &cik_VGT, 140 }, + { &cik_IA, 22 }, + { &cik_MC, 22 }, + { &cik_SRBM, 19 }, { &cik_WD, 22 }, + { &cik_CPG, 46 }, + { &cik_CPC, 22 }, + }; static struct si_pc_block groups_VI[] = { { &cik_CB, 396, 4 }, - { &cik_CPC, 24 }, { &cik_CPF, 19 }, - { &cik_CPG, 48 }, { &cik_DB, 257, 4 }, - { &cik_GDS, 121 }, { &cik_GRBM, 34 }, { &cik_GRBMSE, 15 }, - { &cik_IA, 24 }, - { &cik_PA_SC, 397 }, { &cik_PA_SU, 153 }, + { &cik_PA_SC, 397 }, { &cik_SPI, 197 }, { &cik_SQ, 273 }, { &cik_SX, 34 }, { &cik_TA, 119, 16 }, { &cik_TCA, 35, 2 }, { &cik_TCC, 192, 16 }, - { &cik_TCP, 180, 16 }, { &cik_TD, 55, 16 }, + { &cik_TCP, 180, 16 }, + { &cik_GDS, 121 }, { &cik_VGT, 147 }, + { &cik_IA, 24 }, + { &cik_MC, 22 }, + { &cik_SRBM, 27 }, { &cik_WD, 37 }, + { &cik_CPG, 48 }, + { &cik_CPC, 24 }, + }; static void si_pc_get_size(struct r600_perfcounter_block *group, @@ -368,7 +411,9 @@ static void si_pc_get_size(struct r600_perfcounter_block *group, struct si_pc_block_base *regs = sigroup->b; unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK; - if (layout_multi == SI_PC_MULTI_BLOCK) { + if (regs->layout & SI_PC_FAKE) { + *num_select_dw = 0; + } else if (layout_multi == SI_PC_MULTI_BLOCK) { if (count < regs->num_multi) *num_select_dw = 2 * (count + 2) + regs->num_prelude; else @@ -431,6 +476,9 @@ static void si_pc_emit_select(struct r600_common_context *ctx, assert(count <= regs->num_counters); + if (regs->layout & SI_PC_FAKE) + return; + if (layout_multi == SI_PC_MULTI_BLOCK) { assert(!(regs->layout & SI_PC_REG_REVERSE)); @@ -590,22 +638,35 @@ static void si_pc_emit_read(struct r600_common_context *ctx, unsigned reg = regs->counter0_lo; unsigned reg_delta = 8; - if (regs->layout & SI_PC_REG_REVERSE) - reg_delta = -reg_delta; + if (!(regs->layout & SI_PC_FAKE)) { + if (regs->layout & SI_PC_REG_REVERSE) + reg_delta = -reg_delta; - for (idx = 0; idx < count; ++idx) { - if (regs->counters) - reg = regs->counters[idx]; - - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | - COPY_DATA_DST_SEL(COPY_DATA_MEM)); - radeon_emit(cs, reg >> 2); - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - va += 4; - reg += reg_delta; + for (idx = 0; idx < count; ++idx) { + if (regs->counters) + reg = regs->counters[idx]; + + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | + COPY_DATA_DST_SEL(COPY_DATA_MEM)); + radeon_emit(cs, reg >> 2); + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + va += 4; + reg += reg_delta; + } + } else { + for (idx = 0; idx < count; ++idx) { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | + COPY_DATA_DST_SEL(COPY_DATA_MEM)); + radeon_emit(cs, 0); /* immediate */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + va += 4; + } } } @@ -656,6 +717,10 @@ void si_init_perfcounters(struct si_screen *screen) pc->num_stop_cs_dwords += 6; } + pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits); + pc->shader_type_suffixes = si_pc_shader_type_suffixes; + pc->shader_type_bits = si_pc_shader_type_bits; + pc->get_size = si_pc_get_size; pc->emit_instance = si_pc_emit_instance; pc->emit_shaders = si_pc_emit_shaders; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 0c1ae90f9da..61ce976c32c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -215,7 +215,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, r600_target = radeon_llvm_get_r600_target(triple); sctx->tm = LLVMCreateTargetMachine(r600_target, triple, r600_get_llvm_processor_name(sscreen->b.family), - "+DumpCode,+vgpr-spilling", +#if HAVE_LLVM >= 0x0308 + sscreen->b.debug_flags & DBG_SI_SCHED ? + "+DumpCode,+vgpr-spilling,+si-scheduler" : +#endif + "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); @@ -304,6 +308,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_MEMORY_INFO: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -329,12 +335,18 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return 4; + case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + return HAVE_LLVM >= 0x0306; + case PIPE_CAP_GLSL_FEATURE_LEVEL: return HAVE_LLVM >= 0x0307 ? 410 : 330; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF); + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + return 0; + /* Unsupported features. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: @@ -344,12 +356,12 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_CLEAR_TEXTURE: case PIPE_CAP_DRAW_PARAMETERS: - case PIPE_CAP_TGSI_PACK_HALF_FLOAT: case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: @@ -399,7 +411,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) /* Timer queries, present when the clock frequency is non zero. */ case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_QUERY_TIME_ELAPSED: - return sscreen->b.info.r600_clock_crystal_freq != 0; + return sscreen->b.info.clock_crystal_freq != 0; case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MIN_TEXEL_OFFSET: @@ -541,57 +553,6 @@ static void si_destroy_screen(struct pipe_screen* pscreen) r600_destroy_common_screen(&sscreen->b); } -#define SI_TILE_MODE_COLOR_2D_8BPP 14 - -/* Initialize pipe config. This is especially important for GPUs - * with 16 pipes and more where it's initialized incorrectly by - * the TILING_CONFIG ioctl. */ -static bool si_initialize_pipe_config(struct si_screen *sscreen) -{ - unsigned mode2d; - - /* This is okay, because there can be no 2D tiling without - * the tile mode array, so we won't need the pipe config. - * Return "success". - */ - if (!sscreen->b.info.si_tile_mode_array_valid) - return true; - - /* The same index is used for the 2D mode on CIK too. */ - mode2d = sscreen->b.info.si_tile_mode_array[SI_TILE_MODE_COLOR_2D_8BPP]; - - switch (G_009910_PIPE_CONFIG(mode2d)) { - case V_02803C_ADDR_SURF_P2: - sscreen->b.tiling_info.num_channels = 2; - break; - case V_02803C_X_ADDR_SURF_P4_8X16: - case V_02803C_X_ADDR_SURF_P4_16X16: - case V_02803C_X_ADDR_SURF_P4_16X32: - case V_02803C_X_ADDR_SURF_P4_32X32: - sscreen->b.tiling_info.num_channels = 4; - break; - case V_02803C_X_ADDR_SURF_P8_16X16_8X16: - case V_02803C_X_ADDR_SURF_P8_16X32_8X16: - case V_02803C_X_ADDR_SURF_P8_32X32_8X16: - case V_02803C_X_ADDR_SURF_P8_16X32_16X16: - case V_02803C_X_ADDR_SURF_P8_32X32_16X16: - case V_02803C_X_ADDR_SURF_P8_32X32_16X32: - case V_02803C_X_ADDR_SURF_P8_32X64_32X32: - sscreen->b.tiling_info.num_channels = 8; - break; - case V_02803C_X_ADDR_SURF_P16_32X32_8X16: - case V_02803C_X_ADDR_SURF_P16_32X32_16X16: - sscreen->b.tiling_info.num_channels = 16; - break; - default: - assert(0); - fprintf(stderr, "radeonsi: Unknown pipe config %i.\n", - G_009910_PIPE_CONFIG(mode2d)); - return false; - } - return true; -} - static bool si_init_gs_info(struct si_screen *sscreen) { switch (sscreen->b.family) { @@ -636,7 +597,6 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.b.resource_create = r600_resource_create_common; if (!r600_common_screen_init(&sscreen->b, ws) || - !si_initialize_pipe_config(sscreen) || !si_init_gs_info(sscreen)) { FREE(sscreen); return NULL; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e2725fe3679..48947442757 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -193,7 +193,7 @@ struct si_context { struct r600_atom db_render_state; struct r600_atom msaa_config; struct si_sample_mask sample_mask; - struct r600_atom cb_target_mask; + struct r600_atom cb_render_state; struct si_blend_color blend_color; struct r600_atom clip_regs; struct si_clip_state clip_state; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 94c1129c88d..d9ed6b234e0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4074,7 +4074,7 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, si_shader_dump_disassembly(&shader->binary, debug); si_shader_dump_stats(sscreen, &shader->config, - shader->selector->info.num_inputs, + shader->selector ? shader->selector->info.num_inputs : 0, shader->binary.code_size, debug, processor); } @@ -4092,7 +4092,7 @@ int si_compile_llvm(struct si_screen *sscreen, if (r600_can_dump_shader(&sscreen->b, processor)) { fprintf(stderr, "radeonsi: Compiling shader %d\n", count); - if (!(sscreen->b.debug_flags & DBG_NO_IR)) + if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) LLVMDumpModule(mod); } @@ -4177,6 +4177,13 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs); + LLVMBuildRetVoid(bld_base->base.gallivm->builder); + + /* Dump LLVM IR before any optimization passes */ + if (sscreen->b.debug_flags & DBG_PREOPT_IR && + r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY)) + LLVMDumpModule(bld_base->base.gallivm->module); + radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld); if (dump) @@ -4383,9 +4390,16 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, goto out; } + LLVMBuildRetVoid(bld_base->base.gallivm->builder); + mod = bld_base->base.gallivm->module; + + /* Dump LLVM IR before any optimization passes */ + if (sscreen->b.debug_flags & DBG_PREOPT_IR && + r600_can_dump_shader(&sscreen->b, si_shader_ctx.type)) + LLVMDumpModule(mod); + radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); - mod = bld_base->base.gallivm->module; r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm, mod, debug, si_shader_ctx.type); if (r) { @@ -4423,14 +4437,6 @@ out: return r; } -void si_shader_destroy_binary(struct radeon_shader_binary *binary) -{ - FREE(binary->code); - FREE(binary->rodata); - FREE(binary->relocs); - FREE(binary->disasm_string); -} - void si_shader_destroy(struct si_shader *shader) { if (shader->gs_copy_shader) { @@ -4442,5 +4448,6 @@ void si_shader_destroy(struct si_shader *shader) r600_resource_reference(&shader->scratch_bo, NULL); r600_resource_reference(&shader->bo, NULL); - si_shader_destroy_binary(&shader->binary); + + radeon_shader_binary_clean(&shader->binary); } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index c1512078a18..98bdb890a45 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -345,7 +345,6 @@ int si_compile_llvm(struct si_screen *sscreen, struct pipe_debug_callback *debug, unsigned processor); void si_shader_destroy(struct si_shader *shader); -void si_shader_destroy_binary(struct radeon_shader_binary *binary); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 9e0ccfc5dde..bf780777b50 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -97,7 +97,7 @@ uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) } /* The old way. */ - switch (sscreen->b.tiling_info.num_banks) { + switch (sscreen->b.info.r600_num_banks) { case 2: return V_02803C_ADDR_SURF_2_BANK; case 4: @@ -189,14 +189,14 @@ unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode) /* This is probably broken for a lot of chips, but it's only used * if the kernel cannot return the tile mode array for CIK. */ - switch (sscreen->b.info.r600_num_tile_pipes) { + switch (sscreen->b.info.num_tile_pipes) { case 16: return V_02803C_X_ADDR_SURF_P16_32X32_16X16; case 8: return V_02803C_X_ADDR_SURF_P8_32X32_16X16; case 4: default: - if (sscreen->b.info.r600_num_backends == 4) + if (sscreen->b.info.num_render_backends == 4) return V_02803C_X_ADDR_SURF_P4_16X16; else return V_02803C_X_ADDR_SURF_P4_8X16; @@ -238,7 +238,8 @@ static unsigned si_pack_float_12p4(float x) /* * Inferred framebuffer and blender state. * - * One of the reasons this must be derived from the framebuffer state is that: + * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state + * is that: * - The blend state mask is 0xf most of the time. * - The COLOR1 format isn't INVALID because of possible dual-source blending, * so COLOR1 is enabled pretty much all the time. @@ -246,18 +247,18 @@ static unsigned si_pack_float_12p4(float x) * * Another reason is to avoid a hang with dual source blending. */ -static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *atom) +static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; struct si_state_blend *blend = sctx->queued.named.blend; - uint32_t mask = 0, i; + uint32_t cb_target_mask = 0, i; for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) if (sctx->framebuffer.state.cbufs[i]) - mask |= 0xf << (4*i); + cb_target_mask |= 0xf << (4*i); if (blend) - mask &= blend->cb_target_mask; + cb_target_mask &= blend->cb_target_mask; /* Avoid a hang that happens when dual source blending is enabled * but there is not enough color outputs. This is undefined behavior, @@ -268,9 +269,146 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at if (blend && blend->dual_src_blend && sctx->ps_shader.cso && (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) - mask = 0; + cb_target_mask = 0; - radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask); + radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); + + /* STONEY-specific register settings. */ + if (sctx->b.family == CHIP_STONEY) { + unsigned spi_shader_col_format = + sctx->ps_shader.cso ? + sctx->ps_shader.current->key.ps.spi_shader_col_format : 0; + unsigned sx_ps_downconvert = 0; + unsigned sx_blend_opt_epsilon = 0; + unsigned sx_blend_opt_control = 0; + + for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { + struct r600_surface *surf = + (struct r600_surface*)sctx->framebuffer.state.cbufs[i]; + unsigned format, swap, spi_format, colormask; + bool has_alpha, has_rgb; + + if (!surf) + continue; + + format = G_028C70_FORMAT(surf->cb_color_info); + swap = G_028C70_COMP_SWAP(surf->cb_color_info); + spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; + colormask = (cb_target_mask >> (i * 4)) & 0xf; + + /* Set if RGB and A are present. */ + has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); + + if (format == V_028C70_COLOR_8 || + format == V_028C70_COLOR_16 || + format == V_028C70_COLOR_32) + has_rgb = !has_alpha; + else + has_rgb = true; + + /* Check the colormask and export format. */ + if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) + has_rgb = false; + if (!(colormask & PIPE_MASK_A)) + has_alpha = false; + + if (spi_format == V_028714_SPI_SHADER_ZERO) { + has_rgb = false; + has_alpha = false; + } + + /* Disable value checking for disabled channels. */ + if (!has_rgb) + sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); + if (!has_alpha) + sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); + + /* Enable down-conversion for 32bpp and smaller formats. */ + switch (format) { + case V_028C70_COLOR_8: + case V_028C70_COLOR_8_8: + case V_028C70_COLOR_8_8_8_8: + /* For 1 and 2-channel formats, use the superset thereof. */ + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || + spi_format == V_028714_SPI_SHADER_UINT16_ABGR || + spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); + sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); + } + break; + + case V_028C70_COLOR_5_6_5: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); + sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); + } + break; + + case V_028C70_COLOR_1_5_5_5: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); + sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); + } + break; + + case V_028C70_COLOR_4_4_4_4: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); + sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); + } + break; + + case V_028C70_COLOR_32: + if (swap == V_0280A0_SWAP_STD && + spi_format == V_028714_SPI_SHADER_32_R) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); + else if (swap == V_0280A0_SWAP_ALT_REV && + spi_format == V_028714_SPI_SHADER_32_AR) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); + break; + + case V_028C70_COLOR_16: + case V_028C70_COLOR_16_16: + /* For 1-channel formats, use the superset thereof. */ + if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || + spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || + spi_format == V_028714_SPI_SHADER_UINT16_ABGR || + spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { + if (swap == V_0280A0_SWAP_STD || + swap == V_0280A0_SWAP_STD_REV) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); + else + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); + } + break; + + case V_028C70_COLOR_10_11_11: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); + sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); + } + break; + + case V_028C70_COLOR_2_10_10_10: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); + sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); + } + break; + } + } + + if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) { + sx_ps_downconvert = 0; + sx_blend_opt_epsilon = 0; + sx_blend_opt_control = 0; + } + + radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); + radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ + radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ + radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ + } } /* @@ -390,6 +528,36 @@ static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) } } +/** + * Get rid of DST in the blend factors by commuting the operands: + * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) + */ +static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, + unsigned *dst_factor, unsigned expected_dst, + unsigned replacement_src) +{ + if (*src_factor == expected_dst && + *dst_factor == PIPE_BLENDFACTOR_ZERO) { + *src_factor = PIPE_BLENDFACTOR_ZERO; + *dst_factor = replacement_src; + + /* Commuting the operands requires reversing subtractions. */ + if (*func == PIPE_BLEND_SUBTRACT) + *func = PIPE_BLEND_REVERSE_SUBTRACT; + else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) + *func = PIPE_BLEND_SUBTRACT; + } +} + +static bool si_blend_factor_uses_dst(unsigned factor) +{ + return factor == PIPE_BLENDFACTOR_DST_COLOR || + factor == PIPE_BLENDFACTOR_DST_ALPHA || + factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || + factor == PIPE_BLENDFACTOR_INV_DST_COLOR; +} + static void *si_create_blend_state_mode(struct pipe_context *ctx, const struct pipe_blend_state *state, unsigned mode) @@ -397,7 +565,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, struct si_context *sctx = (struct si_context*)ctx; struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); struct si_pm4_state *pm4 = &blend->pm4; - + uint32_t sx_mrt_blend_opt[8] = {0}; uint32_t color_control = 0; if (!blend) @@ -435,12 +603,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, unsigned srcA = state->rt[j].alpha_src_factor; unsigned dstA = state->rt[j].alpha_dst_factor; + unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; unsigned blend_cntl = 0; + sx_mrt_blend_opt[i] = + S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | + S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); + if (!state->rt[j].colormask) continue; - /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ + /* cb_render_state will disable unused ones */ blend->cb_target_mask |= state->rt[j].colormask << (4 * i); if (!state->rt[j].blend_enable) { @@ -448,6 +621,50 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, continue; } + /* Blending optimizations for Stoney. + * These transformations don't change the behavior. + * + * First, get rid of DST in the blend factors: + * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) + */ + si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, + PIPE_BLENDFACTOR_DST_COLOR, + PIPE_BLENDFACTOR_SRC_COLOR); + si_blend_remove_dst(&eqA, &srcA, &dstA, + PIPE_BLENDFACTOR_DST_COLOR, + PIPE_BLENDFACTOR_SRC_COLOR); + si_blend_remove_dst(&eqA, &srcA, &dstA, + PIPE_BLENDFACTOR_DST_ALPHA, + PIPE_BLENDFACTOR_SRC_ALPHA); + + /* Look up the ideal settings from tables. */ + srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); + dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); + srcA_opt = si_translate_blend_opt_factor(srcA, true); + dstA_opt = si_translate_blend_opt_factor(dstA, true); + + /* Handle interdependencies. */ + if (si_blend_factor_uses_dst(srcRGB)) + dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; + if (si_blend_factor_uses_dst(srcA)) + dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; + + if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && + (dstRGB == PIPE_BLENDFACTOR_ZERO || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) + dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; + + /* Set the final value. */ + sx_mrt_blend_opt[i] = + S_028760_COLOR_SRC_OPT(srcRGB_opt) | + S_028760_COLOR_DST_OPT(dstRGB_opt) | + S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | + S_028760_ALPHA_SRC_OPT(srcA_opt) | + S_028760_ALPHA_DST_OPT(dstA_opt) | + S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); + + /* Set blend state. */ blend_cntl |= S_028780_ENABLE(1); blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); @@ -480,41 +697,13 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, } if (sctx->b.family == CHIP_STONEY) { - uint32_t sx_blend_opt_control = 0; - - for (int i = 0; i < 8; i++) { - const int j = state->independent_blend_enable ? i : 0; - - /* TODO: We can also set this if the surface doesn't contain RGB. */ - if (!state->rt[j].blend_enable || - !(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B))) - sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i); - - /* TODO: We can also set this if the surface doesn't contain alpha. */ - if (!state->rt[j].blend_enable || - !(state->rt[j].colormask & PIPE_MASK_A)) - sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i); - - if (!state->rt[j].blend_enable) { - si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, - S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | - S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED)); - continue; - } - + for (int i = 0; i < 8; i++) si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, - S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) | - S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) | - S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) | - S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) | - S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) | - S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func))); - } - - si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control); + sx_mrt_blend_opt[i]); - /* RB+ doesn't work with dual source blending */ - if (blend->dual_src_blend) + /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ + if (blend->dual_src_blend || state->logicop_enable || + mode == V_028808_CB_RESOLVE) color_control |= S_028808_DISABLE_DUAL_QUAD(1); } @@ -532,7 +721,7 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); - si_mark_atom_dirty(sctx, &sctx->cb_target_mask); + si_mark_atom_dirty(sctx, &sctx->cb_render_state); } static void si_delete_blend_state(struct pipe_context *ctx, void *state) @@ -2097,8 +2286,10 @@ static void si_initialize_color_surface(struct si_context *sctx, color_pitch = S_028C64_TILE_MAX(pitch); + /* Intensity is implemented as Red, so treat it that way. */ color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) | - S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1); + S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1 || + util_format_is_intensity(surf->base.format)); if (rtex->resource.b.b.nr_samples > 1) { unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); @@ -2169,61 +2360,6 @@ static void si_initialize_color_surface(struct si_context *sctx, /* Determine pixel shader export format */ si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); - if (sctx->b.family == CHIP_STONEY && - !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) { - switch (desc->channel[0].size) { - case 32: - if (desc->nr_channels == 1) { - if (swap == V_0280A0_SWAP_STD) - surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R; - else if (swap == V_0280A0_SWAP_ALT_REV) - surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A; - } - break; - case 16: - /* For 1-channel formats, use the superset thereof. */ - if (desc->nr_channels <= 2) { - if (swap == V_0280A0_SWAP_STD || - swap == V_0280A0_SWAP_STD_REV) - surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR; - else - surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR; - } - break; - case 11: - if (desc->nr_channels == 3) { - surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11; - surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT; - } - break; - case 10: - if (desc->nr_channels == 4) { - surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10; - surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT; - } - break; - case 8: - /* For 1 and 2-channel formats, use the superset thereof. */ - surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8; - surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT; - break; - case 5: - if (desc->nr_channels == 3) { - surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5; - surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT; - } else if (desc->nr_channels == 4) { - surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5; - surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT; - } - break; - case 4: - /* For 1 nad 2-channel formats, use the superset thereof. */ - surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4; - surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT; - break; - } - } - surf->color_initialized = true; } @@ -2459,7 +2595,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, } si_update_poly_offset_state(sctx); - si_mark_atom_dirty(sctx, &sctx->cb_target_mask); + si_mark_atom_dirty(sctx, &sctx->cb_render_state); si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); if (sctx->framebuffer.nr_samples != old_nr_samples) { @@ -2512,8 +2648,6 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom unsigned i, nr_cbufs = state->nr_cbufs; struct r600_texture *tex = NULL; struct r600_surface *cb = NULL; - uint32_t sx_ps_downconvert = 0; - uint32_t sx_blend_opt_epsilon = 0; /* Colorbuffers. */ for (i = 0; i < nr_cbufs; i++) { @@ -2564,29 +2698,18 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom if (sctx->b.chip_class >= VI) radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */ - - sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i); - sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i); } /* set CB_COLOR1_INFO for possible dual-src blending */ if (i == 1 && state->cbufs[0] && sctx->framebuffer.dirty_cbufs & (1 << 0)) { radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, cb->cb_color_info | tex->cb_color_info); - sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i); - sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i); i++; } for (; i < 8 ; i++) if (sctx->framebuffer.dirty_cbufs & (1 << i)) radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); - if (sctx->b.family == CHIP_STONEY) { - radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2); - radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ - radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ - } - /* ZS buffer. */ if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { struct r600_surface *zb = (struct r600_surface*)state->zsbuf; @@ -3374,7 +3497,7 @@ void si_init_state_functions(struct si_context *sctx) si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); - si_init_atom(sctx, &sctx->cb_target_mask, &sctx->atoms.s.cb_target_mask, si_emit_cb_target_mask); + si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state); si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color); si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs); si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state); @@ -3449,8 +3572,8 @@ si_write_harvested_raster_configs(struct si_context *sctx, { unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); - unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; - unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16); + unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; + unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); unsigned rb_per_se = num_rb / num_se; unsigned se_mask[4]; @@ -3579,8 +3702,8 @@ si_write_harvested_raster_configs(struct si_context *sctx, static void si_init_config(struct si_context *sctx) { struct si_screen *sscreen = sctx->screen; - unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16); - unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; + unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); + unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; unsigned raster_config, raster_config_1; uint64_t border_color_va = sctx->border_color_buffer->gpu_address; struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index be3488e6dba..507f45938ce 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -124,7 +124,7 @@ union si_state_atoms { struct r600_atom *db_render_state; struct r600_atom *msaa_config; struct r600_atom *sample_mask; - struct r600_atom *cb_target_mask; + struct r600_atom *cb_render_state; struct r600_atom *blend_color; struct r600_atom *clip_regs; struct r600_atom *clip_state; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 36174eb5a94..bbef429edc5 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -705,23 +705,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, } /* Select the hw shader variant depending on the current state. */ -static int si_shader_select(struct pipe_context *ctx, - struct si_shader_ctx_state *state) +static int si_shader_select_with_key(struct pipe_context *ctx, + struct si_shader_ctx_state *state, + union si_shader_key *key) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state->cso; struct si_shader *current = state->current; - union si_shader_key key; struct si_shader *iter, *shader = NULL; int r; - si_shader_selector_key(ctx, sel, &key); - /* Check if we don't need to change anything. * This path is also used for most shaders that don't need multiple * variants, it will cost just a computation of the key and this * test. */ - if (likely(current && memcmp(¤t->key, &key, sizeof(key)) == 0)) + if (likely(current && memcmp(¤t->key, key, sizeof(*key)) == 0)) return 0; pipe_mutex_lock(sel->mutex); @@ -730,7 +728,7 @@ static int si_shader_select(struct pipe_context *ctx, for (iter = sel->first_variant; iter; iter = iter->next_variant) { /* Don't check the "current" shader. We checked it above. */ if (current != iter && - memcmp(&iter->key, &key, sizeof(key)) == 0) { + memcmp(&iter->key, key, sizeof(*key)) == 0) { state->current = iter; pipe_mutex_unlock(sel->mutex); return 0; @@ -744,7 +742,7 @@ static int si_shader_select(struct pipe_context *ctx, return -ENOMEM; } shader->selector = sel; - shader->key = key; + shader->key = *key; r = si_shader_create(sctx->screen, sctx->tm, shader, &sctx->b.debug); if (unlikely(r)) { @@ -768,6 +766,15 @@ static int si_shader_select(struct pipe_context *ctx, return 0; } +static int si_shader_select(struct pipe_context *ctx, + struct si_shader_ctx_state *state) +{ + union si_shader_key key; + + si_shader_selector_key(ctx, state->cso, &key); + return si_shader_select_with_key(ctx, state, &key); +} + static void *si_create_shader_selector(struct pipe_context *ctx, const struct pipe_shader_state *state) { @@ -888,8 +895,27 @@ static void *si_create_shader_selector(struct pipe_context *ctx, /* Pre-compilation. */ if (sscreen->b.debug_flags & DBG_PRECOMPILE) { struct si_shader_ctx_state state = {sel}; + union si_shader_key key; - if (si_shader_select(ctx, &state)) { + memset(&key, 0, sizeof(key)); + + /* Set reasonable defaults, so that the shader key doesn't + * cause any code to be eliminated. + */ + switch (sel->type) { + case PIPE_SHADER_TESS_CTRL: + key.tcs.prim_mode = PIPE_PRIM_TRIANGLES; + break; + case PIPE_SHADER_FRAGMENT: + key.ps.alpha_func = PIPE_FUNC_ALWAYS; + for (i = 0; i < 8; i++) + if (sel->info.colors_written & (1 << i)) + key.ps.spi_shader_col_format |= + V_028710_SPI_SHADER_FP16_ABGR << (i * 4); + break; + } + + if (si_shader_select_with_key(ctx, &state, &key)) { fprintf(stderr, "radeonsi: can't create a shader\n"); tgsi_free_tokens(sel->tokens); FREE(sel); @@ -1001,7 +1027,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) sctx->ps_shader.cso = sel; sctx->ps_shader.current = sel ? sel->first_variant : NULL; - si_mark_atom_dirty(sctx, &sctx->cb_target_mask); + si_mark_atom_dirty(sctx, &sctx->cb_render_state); } static void si_delete_shader_selector(struct pipe_context *ctx, void *state) @@ -1726,6 +1752,9 @@ bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->spi_ps_input); } + if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps)) + si_mark_atom_dirty(sctx, &sctx->cb_render_state); + if (sctx->ps_db_shader_control != db_shader_control) { sctx->ps_db_shader_control = db_shader_control; si_mark_atom_dirty(sctx, &sctx->db_render_state); diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 3bc580899d4..097ffe6f920 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -179,6 +179,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return 1; + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + return 0; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: return 65536; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: @@ -261,6 +263,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 8d04222a0cd..d5405f8eacf 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -358,6 +358,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return 64; @@ -396,6 +398,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DRAW_PARAMETERS: case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; } diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 6e703f76499..4d03fe1ee0b 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1578,6 +1578,45 @@ static void trace_context_set_tess_state(struct pipe_context *_context, } +static void trace_context_set_shader_buffers(struct pipe_context *_context, + unsigned shader, + unsigned start, unsigned nr, + struct pipe_shader_buffer *buffers) +{ + struct trace_context *tr_context = trace_context(_context); + struct pipe_context *context = tr_context->pipe; + struct pipe_shader_buffer *_buffers = NULL; + + trace_dump_call_begin("pipe_context", "set_shader_buffers"); + trace_dump_arg(ptr, context); + trace_dump_arg(uint, shader); + trace_dump_arg(uint, start); + trace_dump_arg_begin("buffers"); + trace_dump_struct_array(shader_buffer, buffers, nr); + trace_dump_arg_end(); + trace_dump_call_end(); + + if (buffers) { + int i; + + _buffers = MALLOC(nr * sizeof(struct pipe_shader_buffer)); + if (!_buffers) + return; + + for (i = 0; i < nr; i++) { + _buffers[i] = buffers[i]; + _buffers[i].buffer = trace_resource_unwrap( + tr_context, _buffers[i].buffer); + } + } + + context->set_shader_buffers(context, shader, start, nr, _buffers); + + if (_buffers) + FREE(_buffers); +} + + static const struct debug_named_value rbug_blocker_flags[] = { {"before", 1, NULL}, {"after", 2, NULL}, @@ -1675,6 +1714,7 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(texture_barrier); TR_CTX_INIT(memory_barrier); TR_CTX_INIT(set_tess_state); + TR_CTX_INIT(set_shader_buffers); TR_CTX_INIT(transfer_map); TR_CTX_INIT(transfer_unmap); diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 54f022a8ab6..cfbf53cf767 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -688,6 +688,24 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) } +void trace_dump_shader_buffer(const struct pipe_shader_buffer *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_shader_buffer"); + trace_dump_member(resource_ptr, state, buffer); + trace_dump_member(uint, state, buffer_offset); + trace_dump_member(uint, state, buffer_size); + trace_dump_struct_end(); +} + + void trace_dump_draw_info(const struct pipe_draw_info *state) { if (!trace_dumping_enabled_locked()) diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 117b3c75e87..4f4ade155bc 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -78,6 +78,8 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state); void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); +void trace_dump_shader_buffer(const struct pipe_shader_buffer *buffer); + void trace_dump_draw_info(const struct pipe_draw_info *state); void trace_dump_blit_info(const struct pipe_blit_info *); diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index 5d071ec862f..41660f6ac4d 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -245,10 +245,19 @@ vc4_job_submit(struct vc4_context *vc4) fprintf(stderr, "Draw call returned %s. " "Expect corruption.\n", strerror(errno)); warned = true; + } else if (!ret) { + vc4->last_emit_seqno = submit.seqno; } } - vc4->last_emit_seqno = submit.seqno; + if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) { + if (!vc4_wait_seqno(vc4->screen, + vc4->last_emit_seqno - 5, + PIPE_TIMEOUT_INFINITE, + "job throttling")) { + fprintf(stderr, "Job throttling failed\n"); + } + } if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) { if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno, diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 08c2dad8406..b19d31af6ac 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -127,6 +127,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) /* Unsupported features. */ case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: case PIPE_CAP_CUBE_MAP_ARRAY: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: @@ -199,6 +200,9 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c index fb2e5670ef0..18263e91e6a 100644 --- a/src/gallium/drivers/virgl/virgl_screen.c +++ b/src/gallium/drivers/virgl/virgl_screen.c @@ -169,6 +169,8 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param) return vscreen->caps.caps.v1.max_tbo_size > 0; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: return 0; + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + return 0; case PIPE_CAP_CUBE_MAP_ARRAY: return vscreen->caps.caps.v1.bset.cube_map_array; case PIPE_CAP_TEXTURE_MULTISAMPLE: @@ -228,6 +230,8 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; case PIPE_CAP_VENDOR_ID: return 0x1af4; @@ -557,6 +561,7 @@ virgl_create_screen(struct virgl_winsys *vws) vws->get_caps(vws, &screen->caps); + screen->refcnt = 1; util_format_s3tc_init(); return &screen->base; diff --git a/src/gallium/drivers/virgl/virgl_screen.h b/src/gallium/drivers/virgl/virgl_screen.h index 52e72ca4958..8cac38d7e96 100644 --- a/src/gallium/drivers/virgl/virgl_screen.h +++ b/src/gallium/drivers/virgl/virgl_screen.h @@ -28,6 +28,12 @@ struct virgl_screen { struct pipe_screen base; + + int refcnt; + + /* place for winsys to stash it's own stuff: */ + void *winsys_priv; + struct virgl_winsys *vws; struct virgl_drm_caps caps; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index f69a75be50e..6c95b7b2178 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -150,6 +150,28 @@ struct pipe_context { struct pipe_query *q, boolean wait, union pipe_query_result *result); + + /** + * Get results of a query, storing into resource. Note that this may not + * be used with batch queries. + * + * \param wait if true, this query will block until the result is ready + * \param result_type the type of the value being stored: + * \param index for queries that return multiple pieces of data, which + * item of that data to store (e.g. for + * PIPE_QUERY_PIPELINE_STATISTICS). + * When the index is -1, instead of the value of the query + * the driver should instead write a 1/0 to the appropriate + * location with 1 meaning that the query result is available. + */ + void (*get_query_result_resource)(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset); + /*@}*/ /** diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index b46187bc8a1..800f16cd250 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -352,6 +352,8 @@ enum pipe_flush_flags * Flags for pipe_context::memory_barrier. */ #define PIPE_BARRIER_MAPPED_BUFFER (1 << 0) +#define PIPE_BARRIER_SHADER_BUFFER (1 << 1) +#define PIPE_BARRIER_QUERY_BUFFER (1 << 2) /** * Resource binding flags -- state tracker must specify in advance all @@ -375,6 +377,7 @@ enum pipe_flush_flags #define PIPE_BIND_SHADER_IMAGE (1 << 15) /* set_shader_images */ #define PIPE_BIND_COMPUTE_RESOURCE (1 << 16) /* set_compute_resources */ #define PIPE_BIND_COMMAND_ARGS_BUFFER (1 << 17) /* pipe_draw_info.indirect */ +#define PIPE_BIND_QUERY_BUFFER (1 << 18) /* get_query_result_resource */ /** * The first two flags above were previously part of the amorphous @@ -588,6 +591,7 @@ enum pipe_cap PIPE_CAP_CUBE_MAP_ARRAY, PIPE_CAP_TEXTURE_BUFFER_OBJECTS, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT, + PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY, PIPE_CAP_TGSI_TEXCOORD, PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER, PIPE_CAP_QUERY_PIPELINE_STATISTICS, @@ -645,6 +649,9 @@ enum pipe_cap PIPE_CAP_INVALIDATE_BUFFER, PIPE_CAP_GENERATE_MIPMAP, PIPE_CAP_STRING_MARKER, + PIPE_CAP_SURFACE_REINTERPRET_BLOCKS, + PIPE_CAP_QUERY_BUFFER_OBJECT, + PIPE_CAP_QUERY_MEMORY_INFO, }; #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) @@ -837,6 +844,14 @@ union pipe_query_result union pipe_numeric_type_union batch[1]; }; +enum pipe_query_value_type +{ + PIPE_QUERY_TYPE_I32, + PIPE_QUERY_TYPE_U32, + PIPE_QUERY_TYPE_I64, + PIPE_QUERY_TYPE_U64, +}; + union pipe_color_union { float f[4]; diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index f868d71db23..211bc2440f9 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -57,6 +57,7 @@ struct pipe_resource; struct pipe_surface; struct pipe_transfer; struct pipe_box; +struct pipe_memory_info; /** @@ -260,6 +261,11 @@ struct pipe_screen { unsigned index, struct pipe_driver_query_group_info *info); + /** + * Query information about memory usage. + */ + void (*query_memory_info)(struct pipe_screen *screen, + struct pipe_memory_info *info); }; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index f300207d4dd..6539017b77c 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -420,7 +420,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_FSLT 110 #define TGSI_OPCODE_FSNE 111 - /* gap */ +#define TGSI_OPCODE_MEMBAR 112 #define TGSI_OPCODE_CALLNZ 113 /* gap */ #define TGSI_OPCODE_BREAKC 115 @@ -744,6 +744,11 @@ struct tgsi_instruction_memory unsigned Padding : 29; }; +#define TGSI_MEMBAR_SHADER_BUFFER (1 << 0) +#define TGSI_MEMBAR_ATOMIC_BUFFER (1 << 1) +#define TGSI_MEMBAR_SHADER_IMAGE (1 << 2) +#define TGSI_MEMBAR_SHARED (1 << 3) +#define TGSI_MEMBAR_THREAD_GROUP (1 << 4) #ifdef __cplusplus } diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 2e4d2830199..ed62a33ad72 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -720,6 +720,19 @@ struct pipe_debug_callback void *data; }; +/** + * Information about memory usage. All sizes are in kilobytes. + */ +struct pipe_memory_info +{ + unsigned total_device_memory; /**< size of device memory, e.g. VRAM */ + unsigned avail_device_memory; /**< free device memory at the moment */ + unsigned total_staging_memory; /**< size of staging memory, e.g. GART */ + unsigned avail_staging_memory; /**< free staging memory at the moment */ + unsigned device_memory_evicted; /**< size of memory evicted (monotonic counter) */ + unsigned nr_device_memory_evictions; /**< # of evictions (monotonic counter) */ +}; + #ifdef __cplusplus } #endif diff --git a/src/gallium/state_trackers/nine/Makefile.sources b/src/gallium/state_trackers/nine/Makefile.sources index 99b623a5b59..8d178d4b18f 100644 --- a/src/gallium/state_trackers/nine/Makefile.sources +++ b/src/gallium/state_trackers/nine/Makefile.sources @@ -5,6 +5,8 @@ C_SOURCES := \ authenticatedchannel9.h \ basetexture9.c \ basetexture9.h \ + buffer9.c \ + buffer9.h \ cryptosession9.c \ cryptosession9.h \ cubetexture9.c \ diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c index 69e0fa25961..8428b1bd7eb 100644 --- a/src/gallium/state_trackers/nine/adapter9.c +++ b/src/gallium/state_trackers/nine/adapter9.c @@ -563,7 +563,7 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, D3DPIPECAP(INDEP_BLEND_ENABLE, D3DPMISCCAPS_INDEPENDENTWRITEMASKS) | /*D3DPMISCCAPS_PERSTAGECONSTANT |*/ /* TODO */ /*D3DPMISCCAPS_POSTBLENDSRGBCONVERT |*/ /* TODO */ - D3DPMISCCAPS_FOGANDSPECULARALPHA | + D3DPMISCCAPS_FOGANDSPECULARALPHA | /* Note: documentation of the flag is wrong */ D3DPIPECAP(BLEND_EQUATION_SEPARATE, D3DPMISCCAPS_SEPARATEALPHABLEND) | D3DPIPECAP(MIXED_COLORBUFFER_FORMATS, D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS) | D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING | @@ -618,7 +618,8 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, pCaps->DestBlendCaps = pCaps->SrcBlendCaps; - pCaps->AlphaCmpCaps = D3DPCMPCAPS_LESS | + pCaps->AlphaCmpCaps = D3DPCMPCAPS_NEVER | + D3DPCMPCAPS_LESS | D3DPCMPCAPS_EQUAL | D3DPCMPCAPS_LESSEQUAL | D3DPCMPCAPS_GREATER | @@ -980,7 +981,8 @@ NineAdapter9_CreateDevice( struct NineAdapter9 *This, hr = NineDevice9_new(screen, ¶ms, &caps, pPresentationParameters, pD3D9, pPresentationGroup, This->ctx, FALSE, NULL, - (struct NineDevice9 **)ppReturnedDeviceInterface); + (struct NineDevice9 **)ppReturnedDeviceInterface, + minor); if (FAILED(hr)) { DBG("Failed to create device.\n"); return hr; @@ -1041,7 +1043,8 @@ NineAdapter9_CreateDeviceEx( struct NineAdapter9 *This, hr = NineDevice9Ex_new(screen, ¶ms, &caps, pPresentationParameters, pFullscreenDisplayMode, pD3D9Ex, pPresentationGroup, This->ctx, - (struct NineDevice9Ex **)ppReturnedDeviceInterface); + (struct NineDevice9Ex **)ppReturnedDeviceInterface, + minor); if (FAILED(hr)) { DBG("Failed to create device.\n"); return hr; diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index d13138b7d5c..7a0959a8f3e 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -319,7 +319,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) if (tex->dirty_box.width) { for (l = min_level_dirty; l <= last_level; ++l) { - u_box_minify_2d(&box, &tex->dirty_box, l); + u_box_minify_3d(&box, &tex->dirty_box, l); NineVolume9_UploadSelf(tex->volumes[l], &box); } memset(&tex->dirty_box, 0, sizeof(tex->dirty_box)); diff --git a/src/gallium/state_trackers/nine/buffer9.c b/src/gallium/state_trackers/nine/buffer9.c new file mode 100644 index 00000000000..b4b91ec2a02 --- /dev/null +++ b/src/gallium/state_trackers/nine/buffer9.c @@ -0,0 +1,189 @@ +/* + * Copyright 2011 Joakim Sindholt <[email protected]> + * Copyright 2015 Patrick Rudolph <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "buffer9.h" +#include "device9.h" +#include "nine_helpers.h" +#include "nine_pipe.h" + +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "util/u_box.h" + +#define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER) + +HRESULT +NineBuffer9_ctor( struct NineBuffer9 *This, + struct NineUnknownParams *pParams, + D3DRESOURCETYPE Type, + DWORD Usage, + UINT Size, + D3DPOOL Pool ) +{ + struct pipe_resource *info = &This->base.info; + HRESULT hr; + + DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This, Size, Usage, Pool); + + user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL); + + This->maps = MALLOC(sizeof(struct pipe_transfer *)); + if (!This->maps) + return E_OUTOFMEMORY; + This->nmaps = 0; + This->maxmaps = 1; + This->size = Size; + + This->pipe = pParams->device->pipe; + + info->screen = pParams->device->screen; + info->target = PIPE_BUFFER; + info->format = PIPE_FORMAT_R8_UNORM; + info->width0 = Size; + info->flags = 0; + + info->bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE; + if (!(Usage & D3DUSAGE_WRITEONLY)) + info->bind |= PIPE_BIND_TRANSFER_READ; + + info->usage = PIPE_USAGE_DEFAULT; + if (Usage & D3DUSAGE_DYNAMIC) + info->usage = PIPE_USAGE_STREAM; + else if (Pool == D3DPOOL_SYSTEMMEM) + info->usage = PIPE_USAGE_STAGING; + + /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */ + /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */ + /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */ + /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */ + /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */ + if (Usage & D3DUSAGE_SOFTWAREPROCESSING) + DBG("Application asked for Software Vertex Processing, " + "but this is unimplemented\n"); + /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */ + + info->height0 = 1; + info->depth0 = 1; + info->array_size = 1; + info->last_level = 0; + info->nr_samples = 0; + + hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE, + Type, Pool, Usage); + return hr; +} + +void +NineBuffer9_dtor( struct NineBuffer9 *This ) +{ + if (This->maps) { + while (This->nmaps) { + NineBuffer9_Unlock(This); + } + FREE(This->maps); + } + + NineResource9_dtor(&This->base); +} + +struct pipe_resource * +NineBuffer9_GetResource( struct NineBuffer9 *This ) +{ + return NineResource9_GetResource(&This->base); +} + +HRESULT WINAPI +NineBuffer9_Lock( struct NineBuffer9 *This, + UINT OffsetToLock, + UINT SizeToLock, + void **ppbData, + DWORD Flags ) +{ + struct pipe_box box; + void *data; + unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags); + + DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n", + This, This->base.resource, + OffsetToLock, SizeToLock, Flags); + + user_assert(ppbData, E_POINTER); + user_assert(!(Flags & ~(D3DLOCK_DISCARD | + D3DLOCK_DONOTWAIT | + D3DLOCK_NO_DIRTY_UPDATE | + D3DLOCK_NOSYSLOCK | + D3DLOCK_READONLY | + D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL); + + if (This->nmaps == This->maxmaps) { + struct pipe_transfer **newmaps = + REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps, + sizeof(struct pipe_transfer *)*(This->maxmaps << 1)); + if (newmaps == NULL) + return E_OUTOFMEMORY; + + This->maxmaps <<= 1; + This->maps = newmaps; + } + + if (SizeToLock == 0) { + SizeToLock = This->size - OffsetToLock; + user_warn(OffsetToLock != 0); + } + + u_box_1d(OffsetToLock, SizeToLock, &box); + + data = This->pipe->transfer_map(This->pipe, This->base.resource, 0, + usage, &box, &This->maps[This->nmaps]); + + if (!data) { + DBG("pipe::transfer_map failed\n" + " usage = %x\n" + " box.x = %u\n" + " box.width = %u\n", + usage, box.x, box.width); + /* not sure what to return, msdn suggests this */ + if (Flags & D3DLOCK_DONOTWAIT) + return D3DERR_WASSTILLDRAWING; + return D3DERR_INVALIDCALL; + } + + DBG("returning pointer %p\n", data); + This->nmaps++; + *ppbData = data; + + return D3D_OK; +} + +HRESULT WINAPI +NineBuffer9_Unlock( struct NineBuffer9 *This ) +{ + DBG("This=%p\n", This); + + user_assert(This->nmaps > 0, D3DERR_INVALIDCALL); + This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]); + return D3D_OK; +} diff --git a/src/gallium/state_trackers/nine/buffer9.h b/src/gallium/state_trackers/nine/buffer9.h new file mode 100644 index 00000000000..1afd9a996ea --- /dev/null +++ b/src/gallium/state_trackers/nine/buffer9.h @@ -0,0 +1,73 @@ +/* + * Copyright 2011 Joakim Sindholt <[email protected]> + * Copyright 2015 Patrick Rudolph <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _NINE_BUFFER9_H_ +#define _NINE_BUFFER9_H_ + +#include "resource9.h" + +struct pipe_screen; +struct pipe_context; +struct pipe_transfer; + +struct NineBuffer9 +{ + struct NineResource9 base; + + /* G3D */ + struct pipe_context *pipe; + struct pipe_transfer **maps; + int nmaps, maxmaps; + UINT size; +}; +static inline struct NineBuffer9 * +NineBuffer9( void *data ) +{ + return (struct NineBuffer9 *)data; +} + +HRESULT +NineBuffer9_ctor( struct NineBuffer9 *This, + struct NineUnknownParams *pParams, + D3DRESOURCETYPE Type, + DWORD Usage, + UINT Size, + D3DPOOL Pool ); + +void +NineBuffer9_dtor( struct NineBuffer9 *This ); + +struct pipe_resource * +NineBuffer9_GetResource( struct NineBuffer9 *This ); + +HRESULT WINAPI +NineBuffer9_Lock( struct NineBuffer9 *This, + UINT OffsetToLock, + UINT SizeToLock, + void **ppbData, + DWORD Flags ); + +HRESULT WINAPI +NineBuffer9_Unlock( struct NineBuffer9 *This ); + +#endif /* _NINE_BUFFER9_H_ */ diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c index abba2637946..460cc853942 100644 --- a/src/gallium/state_trackers/nine/cubetexture9.c +++ b/src/gallium/state_trackers/nine/cubetexture9.c @@ -181,7 +181,7 @@ NineCubeTexture9_dtor( struct NineCubeTexture9 *This ) } if (This->managed_buffer) - FREE(This->managed_buffer); + align_free(This->managed_buffer); NineBaseTexture9_dtor(&This->base); } diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 0be83658928..475ef96788e 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -38,6 +38,7 @@ #include "nine_pipe.h" #include "nine_ff.h" #include "nine_dump.h" +#include "nine_limits.h" #include "pipe/p_screen.h" #include "pipe/p_context.h" @@ -81,7 +82,7 @@ static void nine_setup_fpu(void) #endif -static void +void NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset ) { struct NineSurface9 *refSurf = NULL; @@ -112,8 +113,10 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset ) This->state.scissor.maxy = refSurf->desc.Height; } - if (This->nswapchains && This->swapchains[0]->params.EnableAutoDepthStencil) + if (This->nswapchains && This->swapchains[0]->params.EnableAutoDepthStencil) { This->state.rs[D3DRS_ZENABLE] = TRUE; + This->state.rs_advertised[D3DRS_ZENABLE] = TRUE; + } if (This->state.rs[D3DRS_ZENABLE]) NineDevice9_SetDepthStencilSurface( This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf); @@ -131,7 +134,8 @@ NineDevice9_ctor( struct NineDevice9 *This, ID3DPresentGroup *pPresentationGroup, struct d3dadapter9_context *pCTX, boolean ex, - D3DDISPLAYMODEEX *pFullscreenDisplayMode ) + D3DDISPLAYMODEEX *pFullscreenDisplayMode, + int minorVersionNum ) { unsigned i; HRESULT hr = NineUnknown_ctor(&This->base, pParams); @@ -152,6 +156,8 @@ NineDevice9_ctor( struct NineDevice9 *This, This->params = *pCreationParameters; This->ex = ex; This->present = pPresentationGroup; + This->minor_version_num = minorVersionNum; + IDirect3D9_AddRef(This->d3d9); ID3DPresentGroup_AddRef(This->present); @@ -172,6 +178,19 @@ NineDevice9_ctor( struct NineDevice9 *This, /* Create first, it messes up our state. */ This->hud = hud_create(This->pipe, This->cso); /* NULL result is fine */ + /* Available memory counter. Updated only for allocations with this device + * instance. This is the Win 7 behavior. + * Win XP shares this counter across multiple devices. */ + This->available_texture_mem = This->screen->get_param(This->screen, PIPE_CAP_VIDEO_MEMORY); + if (This->available_texture_mem < 4096) + This->available_texture_mem <<= 20; + else + This->available_texture_mem = UINT_MAX; + /* We cap texture memory usage to 80% of what is reported free initially + * This helps get closer Win behaviour. For example VertexBuffer allocation + * still succeeds when texture allocation fails. */ + This->available_texture_limit = This->available_texture_mem * 20LL / 100LL; + /* create implicit swapchains */ This->nswapchains = ID3DPresentGroup_GetMultiheadCount(This->present); This->swapchains = CALLOC(This->nswapchains, @@ -460,7 +479,8 @@ NineDevice9_dtor( struct NineDevice9 *This ) if (This->swapchains) { for (i = 0; i < This->nswapchains; ++i) - NineUnknown_Unbind(NineUnknown(This->swapchains[i])); + if (This->swapchains[i]) + NineUnknown_Unbind(NineUnknown(This->swapchains[i])); FREE(This->swapchains); } @@ -523,17 +543,20 @@ NineDevice9_ResumeRecording( struct NineDevice9 *This ) HRESULT WINAPI NineDevice9_TestCooperativeLevel( struct NineDevice9 *This ) { - return D3D_OK; /* TODO */ + if (NineSwapChain9_GetOccluded(This->swapchains[0])) { + This->device_needs_reset = TRUE; + return D3DERR_DEVICELOST; + } else if (This->device_needs_reset) { + return D3DERR_DEVICENOTRESET; + } + + return D3D_OK; } UINT WINAPI NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This ) { - const unsigned mem = This->screen->get_param(This->screen, PIPE_CAP_VIDEO_MEMORY); - if (mem < 4096) - return mem << 20; - else - return UINT_MAX; + return This->available_texture_mem; } HRESULT WINAPI @@ -606,6 +629,7 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This, "pCursorBitmap=%p\n", This, XHotSpot, YHotSpot, pCursorBitmap); user_assert(pCursorBitmap, D3DERR_INVALIDCALL); + user_assert(surf->desc.Format == D3DFMT_A8R8G8B8, D3DERR_INVALIDCALL); if (This->swapchains[0]->params.Windowed) { This->cursor.w = MIN2(surf->desc.Width, 32); @@ -709,6 +733,11 @@ NineDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This, This, pPresentationParameters, pSwapChain); user_assert(pPresentationParameters, D3DERR_INVALIDCALL); + user_assert(tmplt->params.Windowed && pPresentationParameters->Windowed, D3DERR_INVALIDCALL); + + /* TODO: this deserves more tests */ + if (!pPresentationParameters->hDeviceWindow) + pPresentationParameters->hDeviceWindow = This->params.hFocusWindow; hr = ID3DPresentGroup_CreateAdditionalPresent(This->present, pPresentationParameters, &present); @@ -757,11 +786,16 @@ NineDevice9_Reset( struct NineDevice9 *This, DBG("This=%p pPresentationParameters=%p\n", This, pPresentationParameters); + if (NineSwapChain9_GetOccluded(This->swapchains[0])) { + This->device_needs_reset = TRUE; + return D3DERR_DEVICELOST; + } + for (i = 0; i < This->nswapchains; ++i) { D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i]; hr = NineSwapChain9_Resize(This->swapchains[i], params, NULL); if (hr != D3D_OK) - return hr; + break; } nine_pipe_context_clear(This); @@ -772,6 +806,7 @@ NineDevice9_Reset( struct NineDevice9 *This, This, 0, (IDirect3DSurface9 *)This->swapchains[0]->buffers[0]); /* XXX: better use GetBackBuffer here ? */ + This->device_needs_reset = (hr != D3D_OK); return hr; } @@ -806,6 +841,8 @@ NineDevice9_GetBackBuffer( struct NineDevice9 *This, IDirect3DSurface9 **ppBackBuffer ) { user_assert(ppBackBuffer != NULL, D3DERR_INVALIDCALL); + /* return NULL on error */ + *ppBackBuffer = NULL; user_assert(iSwapChain < This->nswapchains, D3DERR_INVALIDCALL); return NineSwapChain9_GetBackBuffer(This->swapchains[iSwapChain], @@ -1455,7 +1492,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This, struct NineSurface9 *src = NineSurface9(pSourceSurface); struct pipe_resource *dst_res = NineSurface9_GetResource(dst); struct pipe_resource *src_res = NineSurface9_GetResource(src); - const boolean zs = util_format_is_depth_or_stencil(dst_res->format); + boolean zs; struct pipe_blit_info blit; boolean scaled, clamped, ms, flip_x = FALSE, flip_y = FALSE; @@ -1470,6 +1507,9 @@ NineDevice9_StretchRect( struct NineDevice9 *This, DBG("pDestRect=(%u,%u)-(%u,%u)\n", pDestRect->left, pDestRect->top, pDestRect->right, pDestRect->bottom); + user_assert(dst->base.pool == D3DPOOL_DEFAULT && + src->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); + zs = util_format_is_depth_or_stencil(dst_res->format); user_assert(!zs || !This->in_scene, D3DERR_INVALIDCALL); user_assert(!zs || !pSourceRect || (pSourceRect->left == 0 && @@ -1493,8 +1533,6 @@ NineDevice9_StretchRect( struct NineDevice9 *This, src_res->nr_samples, PIPE_BIND_SAMPLER_VIEW), D3DERR_INVALIDCALL); - user_assert(dst->base.pool == D3DPOOL_DEFAULT && - src->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); /* We might want to permit these, but wine thinks we shouldn't. */ user_assert(!pDestRect || @@ -1668,6 +1706,8 @@ NineDevice9_ColorFill( struct NineDevice9 *This, user_assert((surf->base.usage & D3DUSAGE_RENDERTARGET) || NineSurface9_IsOffscreenPlain(surf), D3DERR_INVALIDCALL); + user_assert(surf->desc.Format != D3DFMT_NULL, D3D_OK); + if (pRect) { x = pRect->left; y = pRect->top; @@ -1884,15 +1924,18 @@ NineDevice9_Clear( struct NineDevice9 *This, Count = 0; #endif + nine_update_state_framebuffer_clear(This); + if (Flags & D3DCLEAR_TARGET) bufs |= PIPE_CLEAR_COLOR; - if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH; - if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL; + /* Ignore Z buffer if not bound */ + if (This->state.fb.zsbuf != NULL) { + if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH; + if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL; + } if (!bufs) return D3D_OK; d3dcolor_to_pipe_color_union(&rgba, Color); - nine_update_state_framebuffer(This); - rect.x1 = This->state.viewport.X; rect.y1 = This->state.viewport.Y; rect.x2 = This->state.viewport.Width + rect.x1; @@ -1935,7 +1978,6 @@ NineDevice9_Clear( struct NineDevice9 *This, /* Case we clear depth buffer (and eventually rt too). * depth buffer size is always >= rt size. Compare to clear region */ ((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) && - This->state.fb.zsbuf != NULL && rect.x2 >= zsbuf_surf->desc.Width && rect.y2 >= zsbuf_surf->desc.Height))) { DBG("Clear fast path\n"); @@ -2342,8 +2384,15 @@ NineDevice9_SetRenderState( struct NineDevice9 *This, DBG("This=%p State=%u(%s) Value=%08x\n", This, State, nine_d3drs_to_string(State), Value); + user_assert(State < D3DRS_COUNT, D3DERR_INVALIDCALL); + + if (state->rs_advertised[State] == Value && likely(!This->is_recording)) + return D3D_OK; + + state->rs_advertised[State] = Value; + /* Amd hacks (equivalent to GL extensions) */ - if (State == D3DRS_POINTSIZE) { + if (unlikely(State == D3DRS_POINTSIZE)) { if (Value == RESZ_CODE) return NineDevice9_ResolveZ(This); @@ -2356,20 +2405,17 @@ NineDevice9_SetRenderState( struct NineDevice9 *This, } /* NV hack */ - if (State == D3DRS_ADAPTIVETESS_Y && - (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && state->rs[NINED3DRS_ALPHACOVERAGE]))) { + if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) { + if (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && state->rs[NINED3DRS_ALPHACOVERAGE])) { state->rs[NINED3DRS_ALPHACOVERAGE] = (Value == D3DFMT_ATOC); state->changed.group |= NINE_STATE_BLEND; return D3D_OK; + } } - user_assert(State < Elements(state->rs), D3DERR_INVALIDCALL); - - if (likely(state->rs[State] != Value) || unlikely(This->is_recording)) { - state->rs[State] = Value; - state->changed.rs[State / 32] |= 1 << (State % 32); - state->changed.group |= nine_render_state_group[State]; - } + state->rs[State] = nine_fix_render_state_value(State, Value); + state->changed.rs[State / 32] |= 1 << (State % 32); + state->changed.group |= nine_render_state_group[State]; return D3D_OK; } @@ -2379,9 +2425,9 @@ NineDevice9_GetRenderState( struct NineDevice9 *This, D3DRENDERSTATETYPE State, DWORD *pValue ) { - user_assert(State < Elements(This->state.rs), D3DERR_INVALIDCALL); + user_assert(State < D3DRS_COUNT, D3DERR_INVALIDCALL); - *pValue = This->state.rs[State]; + *pValue = This->state.rs_advertised[State]; return D3D_OK; } @@ -3122,7 +3168,7 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This, buffer_offset = 0; } else { /* SO matches vertex declaration */ - resource = dst->base.resource; + resource = NineVertexBuffer9_GetResource(dst); buffer_offset = DestIndex * vs->so->stride[0]; } target = This->pipe->create_stream_output_target(This->pipe, resource, @@ -3184,13 +3230,21 @@ NineDevice9_SetVertexDeclaration( struct NineDevice9 *This, IDirect3DVertexDeclaration9 *pDecl ) { struct nine_state *state = This->update; + BOOL was_programmable_vs = This->state.programmable_vs; DBG("This=%p pDecl=%p\n", This, pDecl); if (likely(!This->is_recording) && state->vdecl == NineVertexDeclaration9(pDecl)) return D3D_OK; + nine_bind(&state->vdecl, pDecl); + This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t); + if (likely(!This->is_recording) && was_programmable_vs != This->state.programmable_vs) { + state->commit |= NINE_STATE_COMMIT_CONST_VS; + state->changed.group |= NINE_STATE_VS; + } + state->changed.group |= NINE_STATE_VDECL; return D3D_OK; @@ -3262,18 +3316,21 @@ NineDevice9_SetVertexShader( struct NineDevice9 *This, IDirect3DVertexShader9 *pShader ) { struct nine_state *state = This->update; + BOOL was_programmable_vs = This->state.programmable_vs; DBG("This=%p pShader=%p\n", This, pShader); if (!This->is_recording && state->vs == (struct NineVertexShader9*)pShader) return D3D_OK; + nine_bind(&state->vs, pShader); + + This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t); + /* ff -> non-ff: commit back non-ff constants */ - if (!state->vs && pShader) + if (!was_programmable_vs && This->state.programmable_vs) state->commit |= NINE_STATE_COMMIT_CONST_VS; - nine_bind(&state->vs, pShader); - state->changed.group |= NINE_STATE_VS; return D3D_OK; @@ -3499,7 +3556,8 @@ NineDevice9_SetStreamSource( struct NineDevice9 *This, state->vtxbuf[i].stride = Stride; state->vtxbuf[i].buffer_offset = OffsetInBytes; } - state->vtxbuf[i].buffer = pStreamData ? pVBuf9->base.resource : NULL; + pipe_resource_reference(&state->vtxbuf[i].buffer, + pStreamData ? NineVertexBuffer9_GetResource(pVBuf9) : NULL); return D3D_OK; } @@ -3542,6 +3600,9 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This, (Setting & D3DSTREAMSOURCE_INDEXEDDATA)), D3DERR_INVALIDCALL); user_assert(Setting, D3DERR_INVALIDCALL); + if (likely(!This->is_recording) && state->stream_freq[StreamNumber] == Setting) + return D3D_OK; + state->stream_freq[StreamNumber] = Setting; if (Setting & D3DSTREAMSOURCE_INSTANCEDATA) @@ -3549,7 +3610,9 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This, else state->stream_instancedata_mask &= ~(1 << StreamNumber); - state->changed.stream_freq |= 1 << StreamNumber; + state->changed.stream_freq |= 1 << StreamNumber; /* Used for stateblocks */ + if (StreamNumber != 0) + state->changed.group |= NINE_STATE_STREAMFREQ; return D3D_OK; } @@ -4013,7 +4076,8 @@ NineDevice9_new( struct pipe_screen *pScreen, struct d3dadapter9_context *pCTX, boolean ex, D3DDISPLAYMODEEX *pFullscreenDisplayMode, - struct NineDevice9 **ppOut ) + struct NineDevice9 **ppOut, + int minorVersionNum ) { BOOL lock; lock = !!(pCreationParameters->BehaviorFlags & D3DCREATE_MULTITHREADED); @@ -4021,5 +4085,5 @@ NineDevice9_new( struct pipe_screen *pScreen, NINE_NEW(Device9, ppOut, lock, /* args */ pScreen, pCreationParameters, pCaps, pPresentationParameters, pD3D9, pPresentationGroup, pCTX, - ex, pFullscreenDisplayMode); + ex, pFullscreenDisplayMode, minorVersionNum ); } diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h index cbc1e61f5db..34edf0cfa48 100644 --- a/src/gallium/state_trackers/nine/device9.h +++ b/src/gallium/state_trackers/nine/device9.h @@ -137,6 +137,10 @@ struct NineDevice9 /* dummy vbo (containing 0 0 0 0) to bind if vertex shader input * is not bound to anything by the vertex declaration */ struct pipe_resource *dummy_vbo; + BOOL device_needs_reset; + int minor_version_num; + long long available_texture_mem; + long long available_texture_limit; }; static inline struct NineDevice9 * NineDevice9( void *data ) @@ -154,7 +158,8 @@ NineDevice9_new( struct pipe_screen *pScreen, struct d3dadapter9_context *pCTX, boolean ex, D3DDISPLAYMODEEX *pFullscreenDisplayMode, - struct NineDevice9 **ppOut ); + struct NineDevice9 **ppOut, + int minorVersionNum ); HRESULT NineDevice9_ctor( struct NineDevice9 *This, @@ -167,12 +172,15 @@ NineDevice9_ctor( struct NineDevice9 *This, ID3DPresentGroup *pPresentationGroup, struct d3dadapter9_context *pCTX, boolean ex, - D3DDISPLAYMODEEX *pFullscreenDisplayMode ); + D3DDISPLAYMODEEX *pFullscreenDisplayMode, + int minorVersionNum ); void NineDevice9_dtor( struct NineDevice9 *This ); /*** Nine private ***/ +void +NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset ); struct pipe_screen * NineDevice9_GetScreen( struct NineDevice9 *This ); diff --git a/src/gallium/state_trackers/nine/device9ex.c b/src/gallium/state_trackers/nine/device9ex.c index fe8aa9b2704..11244b1bedf 100644 --- a/src/gallium/state_trackers/nine/device9ex.c +++ b/src/gallium/state_trackers/nine/device9ex.c @@ -20,7 +20,9 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "device9.h" #include "device9ex.h" +#include "nine_pipe.h" #include "swapchain9ex.h" #include "nine_helpers.h" @@ -37,7 +39,8 @@ NineDevice9Ex_ctor( struct NineDevice9Ex *This, D3DDISPLAYMODEEX *pFullscreenDisplayMode, IDirect3D9Ex *pD3D9Ex, ID3DPresentGroup *pPresentationGroup, - struct d3dadapter9_context *pCTX ) + struct d3dadapter9_context *pCTX, + int minorVersionNum ) { DBG("This=%p pParams=%p pScreen=%p pCreationParameters=%p pCaps=%p " "pPresentationParameters=%p pFullscreenDisplayMode=%p " @@ -50,7 +53,7 @@ NineDevice9Ex_ctor( struct NineDevice9Ex *This, pScreen, pCreationParameters, pCaps, pPresentationParameters, (IDirect3D9 *)pD3D9Ex, pPresentationGroup, pCTX, - TRUE, pFullscreenDisplayMode); + TRUE, pFullscreenDisplayMode, minorVersionNum); } static void @@ -158,6 +161,14 @@ NineDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This, DBG("This=%p hDestinationWindow=%p\n", This, hDestinationWindow); + user_assert(!This->base.swapchains[0]->params.Windowed, D3D_OK); + + if (This->base.params.hFocusWindow == hDestinationWindow) { + if (NineSwapChain9_GetOccluded(This->base.swapchains[0])) + return S_PRESENT_OCCLUDED; + } else if(!NineSwapChain9_GetOccluded(This->base.swapchains[0])) { + return S_PRESENT_OCCLUDED; + } /* TODO: handle the other return values */ return D3D_OK; } @@ -221,12 +232,37 @@ NineDevice9Ex_ResetEx( struct NineDevice9Ex *This, if (pFullscreenDisplayMode) mode = &(pFullscreenDisplayMode[i]); hr = NineSwapChain9_Resize(This->base.swapchains[i], params, mode); if (FAILED(hr)) - return (hr == D3DERR_OUTOFVIDEOMEMORY) ? hr : D3DERR_DEVICELOST; + break; } NineDevice9_SetRenderTarget( (struct NineDevice9 *)This, 0, (IDirect3DSurface9 *)This->base.swapchains[0]->buffers[0]); + return hr; +} + +HRESULT WINAPI +NineDevice9Ex_Reset( struct NineDevice9Ex *This, + D3DPRESENT_PARAMETERS *pPresentationParameters ) +{ + HRESULT hr = D3D_OK; + unsigned i; + + DBG("This=%p pPresentationParameters=%p\n", This, pPresentationParameters); + + for (i = 0; i < This->base.nswapchains; ++i) { + D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i]; + hr = NineSwapChain9_Resize(This->base.swapchains[i], params, NULL); + if (FAILED(hr)) + break; + } + + nine_pipe_context_clear((struct NineDevice9 *)This); + nine_state_clear(&This->base.state, TRUE); + + NineDevice9_SetDefaultState((struct NineDevice9 *)This, TRUE); + NineDevice9_SetRenderTarget( + (struct NineDevice9 *)This, 0, (IDirect3DSurface9 *)This->base.swapchains[0]->buffers[0]); return hr; } @@ -248,11 +284,18 @@ NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This, return NineSwapChain9Ex_GetDisplayModeEx(swapchain, pMode, pRotation); } +HRESULT WINAPI +NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This ) +{ + return D3D_OK; +} + + IDirect3DDevice9ExVtbl NineDevice9Ex_vtable = { (void *)NineUnknown_QueryInterface, (void *)NineUnknown_AddRef, (void *)NineUnknown_Release, - (void *)NineDevice9_TestCooperativeLevel, + (void *)NineDevice9Ex_TestCooperativeLevel, (void *)NineDevice9_GetAvailableTextureMem, (void *)NineDevice9_EvictManagedResources, (void *)NineDevice9_GetDirect3D, @@ -265,7 +308,7 @@ IDirect3DDevice9ExVtbl NineDevice9Ex_vtable = { (void *)NineDevice9_CreateAdditionalSwapChain, (void *)NineDevice9_GetSwapChain, (void *)NineDevice9_GetNumberOfSwapChains, - (void *)NineDevice9_Reset, + (void *)NineDevice9Ex_Reset, (void *)NineDevice9_Present, (void *)NineDevice9_GetBackBuffer, (void *)NineDevice9_GetRasterStatus, @@ -401,13 +444,14 @@ NineDevice9Ex_new( struct pipe_screen *pScreen, IDirect3D9Ex *pD3D9Ex, ID3DPresentGroup *pPresentationGroup, struct d3dadapter9_context *pCTX, - struct NineDevice9Ex **ppOut ) + struct NineDevice9Ex **ppOut, + int minorVersionNum ) { BOOL lock; lock = !!(pCreationParameters->BehaviorFlags & D3DCREATE_MULTITHREADED); NINE_NEW(Device9Ex, ppOut, lock, pScreen, pCreationParameters, pCaps, pPresentationParameters, - pFullscreenDisplayMode, pD3D9Ex, pPresentationGroup, pCTX); + pFullscreenDisplayMode, pD3D9Ex, pPresentationGroup, pCTX, minorVersionNum ); } diff --git a/src/gallium/state_trackers/nine/device9ex.h b/src/gallium/state_trackers/nine/device9ex.h index 8375622d8a1..1c7e57e0974 100644 --- a/src/gallium/state_trackers/nine/device9ex.h +++ b/src/gallium/state_trackers/nine/device9ex.h @@ -44,7 +44,8 @@ NineDevice9Ex_new( struct pipe_screen *pScreen, IDirect3D9Ex *pD3D9Ex, ID3DPresentGroup *pPresentationGroup, struct d3dadapter9_context *pCTX, - struct NineDevice9Ex **ppOut ); + struct NineDevice9Ex **ppOut, + int minorVersionNum ); HRESULT WINAPI NineDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This, @@ -73,6 +74,13 @@ NineDevice9Ex_PresentEx( struct NineDevice9Ex *This, DWORD dwFlags ); HRESULT WINAPI +NineDevice9Ex_Present( struct NineDevice9Ex *This, + const RECT *pSourceRect, + const RECT *pDestRect, + HWND hDestWindowOverride, + const RGNDATA *pDirtyRegion ); + +HRESULT WINAPI NineDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This, INT *pPriority ); @@ -141,9 +149,16 @@ NineDevice9Ex_ResetEx( struct NineDevice9Ex *This, D3DDISPLAYMODEEX *pFullscreenDisplayMode ); HRESULT WINAPI +NineDevice9Ex_Reset( struct NineDevice9Ex *This, + D3DPRESENT_PARAMETERS *pPresentationParameters ); + +HRESULT WINAPI NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This, UINT iSwapChain, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation ); +HRESULT WINAPI +NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This ); + #endif /* _NINE_DEVICE9EX_H_ */ diff --git a/src/gallium/state_trackers/nine/guid.c b/src/gallium/state_trackers/nine/guid.c index 5034feb4d71..5e63d2f6629 100644 --- a/src/gallium/state_trackers/nine/guid.c +++ b/src/gallium/state_trackers/nine/guid.c @@ -20,6 +20,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include <stdio.h> #include "guid.h" const GUID IID_IUnknown = { 0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46 } }; @@ -64,3 +65,20 @@ GUID_equal( const GUID *a, } return TRUE; } + +char* GUID_sprintf(char *guid_str, REFGUID id) { + sprintf( guid_str, + "{%08X,%04X,%04X,%02X%02X%02X%02X%02X%02X%02X%02X}", + id->Data1, + id->Data2, + id->Data3, + id->Data4[0], + id->Data4[1], + id->Data4[2], + id->Data4[3], + id->Data4[4], + id->Data4[5], + id->Data4[6], + id->Data4[7]); + return guid_str; +} diff --git a/src/gallium/state_trackers/nine/guid.h b/src/gallium/state_trackers/nine/guid.h index 1f9ff009ad8..af8f081bfb5 100644 --- a/src/gallium/state_trackers/nine/guid.h +++ b/src/gallium/state_trackers/nine/guid.h @@ -33,4 +33,8 @@ boolean GUID_equal( const GUID *a, const GUID *b ); +char* +GUID_sprintf( char *guid_str, + REFGUID id ); + #endif /* _NINE_GUID_H_ */ diff --git a/src/gallium/state_trackers/nine/indexbuffer9.c b/src/gallium/state_trackers/nine/indexbuffer9.c index 860313b7f7e..401fe75e95f 100644 --- a/src/gallium/state_trackers/nine/indexbuffer9.c +++ b/src/gallium/state_trackers/nine/indexbuffer9.c @@ -40,52 +40,17 @@ NineIndexBuffer9_ctor( struct NineIndexBuffer9 *This, struct NineUnknownParams *pParams, D3DINDEXBUFFER_DESC *pDesc ) { - struct pipe_resource *info = &This->base.info; HRESULT hr; DBG("This=%p pParams=%p pDesc=%p Usage=%s\n", This, pParams, pDesc, nine_D3DUSAGE_to_str(pDesc->Usage)); - This->pipe = pParams->device->pipe; - - info->screen = pParams->device->screen; - info->target = PIPE_BUFFER; - info->format = PIPE_FORMAT_R8_UNORM; - info->width0 = pDesc->Size; - info->flags = 0; - - info->bind = PIPE_BIND_INDEX_BUFFER | PIPE_BIND_TRANSFER_WRITE; - if (!(pDesc->Usage & D3DUSAGE_WRITEONLY)) - info->bind |= PIPE_BIND_TRANSFER_READ; - - info->usage = PIPE_USAGE_DEFAULT; - if (pDesc->Usage & D3DUSAGE_DYNAMIC) - info->usage = PIPE_USAGE_STREAM; - if (pDesc->Pool == D3DPOOL_SYSTEMMEM) - info->usage = PIPE_USAGE_STAGING; - - /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */ - /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */ - /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */ - /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */ - /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */ - if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING) - DBG("Application asked for Software Vertex Processing, " - "but this is unimplemented\n"); - - info->height0 = 1; - info->depth0 = 1; - info->array_size = 1; - info->last_level = 0; - info->nr_samples = 0; - - hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE, D3DRTYPE_INDEXBUFFER, - pDesc->Pool, pDesc->Usage); + hr = NineBuffer9_ctor(&This->base, pParams, D3DRTYPE_INDEXBUFFER, + pDesc->Usage, pDesc->Size, pDesc->Pool); if (FAILED(hr)) return hr; - This->buffer.buffer = This->base.resource; + This->buffer.buffer = NineIndexBuffer9_GetResource(This); This->buffer.offset = 0; - This->map_count = 0; switch (pDesc->Format) { case D3DFMT_INDEX16: This->buffer.index_size = 2; break; @@ -105,9 +70,7 @@ NineIndexBuffer9_ctor( struct NineIndexBuffer9 *This, void NineIndexBuffer9_dtor( struct NineIndexBuffer9 *This ) { - if (This->transfer) { NineIndexBuffer9_Unlock(This); } - - NineResource9_dtor(&This->base); + NineBuffer9_dtor(&This->base); } const struct pipe_index_buffer * @@ -116,6 +79,12 @@ NineIndexBuffer9_GetBuffer( struct NineIndexBuffer9 *This ) return &This->buffer; } +struct pipe_resource * +NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This ) +{ + return NineBuffer9_GetResource(&This->base); +} + HRESULT WINAPI NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This, UINT OffsetToLock, @@ -123,59 +92,13 @@ NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This, void **ppbData, DWORD Flags ) { - struct pipe_box box; - void *data; - UINT count; - const unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags); - - DBG("This=%p OffsetToLock=%u SizeToLock=%u ppbData=%p Flags=%i " - "transfer=%p map_count=%u\n", This, OffsetToLock, - SizeToLock, ppbData, Flags, This->transfer, This->map_count); - - count = ++This->map_count; - - if (SizeToLock == 0) { - SizeToLock = This->desc.Size - OffsetToLock; - user_warn(OffsetToLock != 0); - } - - u_box_1d(OffsetToLock, SizeToLock, &box); - - if (unlikely(count != 1)) { - DBG("Lock has been called on already locked buffer." - "Unmapping before mapping again."); - This->pipe->transfer_unmap(This->pipe, This->transfer); - } - data = This->pipe->transfer_map(This->pipe, This->base.resource, 0, - usage, &box, &This->transfer); - if (!This->transfer) { - DBG("pipe::transfer_map failed\n" - " usage = %u\n" - " box.x = %u\n" - " box.width = %u\n", - usage, box.x, box.width); - } - *ppbData = data; - DBG("Returning memory at %p at address %p\n", *ppbData, ppbData); - - return D3D_OK; + return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags); } HRESULT WINAPI NineIndexBuffer9_Unlock( struct NineIndexBuffer9 *This ) { - DBG("This=%p\n", This); - if (!This->map_count) { - DBG("Unmap called without a previous map call.\n"); - return D3D_OK; - } - if (--This->map_count) { - DBG("Ignoring unmap.\n"); - return D3D_OK; - } - This->pipe->transfer_unmap(This->pipe, This->transfer); - This->transfer = NULL; - return D3D_OK; + return NineBuffer9_Unlock(&This->base); } HRESULT WINAPI diff --git a/src/gallium/state_trackers/nine/indexbuffer9.h b/src/gallium/state_trackers/nine/indexbuffer9.h index f10578f47ba..f3274b71224 100644 --- a/src/gallium/state_trackers/nine/indexbuffer9.h +++ b/src/gallium/state_trackers/nine/indexbuffer9.h @@ -24,7 +24,7 @@ #define _NINE_INDEXBUFFER9_H_ #include "resource9.h" - +#include "buffer9.h" #include "pipe/p_state.h" struct pipe_screen; @@ -35,13 +35,10 @@ struct NineDevice9; struct NineIndexBuffer9 { - struct NineResource9 base; + struct NineBuffer9 base; /* g3d stuff */ - struct pipe_context *pipe; struct pipe_index_buffer buffer; - struct pipe_transfer *transfer; - UINT map_count; D3DINDEXBUFFER_DESC desc; }; @@ -69,6 +66,8 @@ NineIndexBuffer9_dtor( struct NineIndexBuffer9 *This ); const struct pipe_index_buffer * NineIndexBuffer9_GetBuffer( struct NineIndexBuffer9 *This ); +struct pipe_resource * +NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This ); /*** Direct3D public ***/ HRESULT WINAPI diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c index 0feaeab7330..a5466a7bdd4 100644 --- a/src/gallium/state_trackers/nine/nine_ff.c +++ b/src/gallium/state_trackers/nine/nine_ff.c @@ -58,7 +58,8 @@ struct nine_ff_vs_key uint32_t color0in_one : 1; uint32_t color1in_one : 1; uint32_t fog : 1; - uint32_t pad1 : 7; + uint32_t specular_enable : 1; + uint32_t pad1 : 6; uint32_t tc_dim_input: 16; /* 8 * 2 bits */ uint32_t pad2 : 16; uint32_t tc_dim_output: 24; /* 8 * 3 bits */ @@ -466,6 +467,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 224.0f)); ureg_ARL(ureg, AR, ureg_src(tmp)); } + + ureg_MOV(ureg, r[2], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); + ureg_MOV(ureg, r[3], ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f)); + for (i = 0; i < key->vertexblend; ++i) { for (c = 0; c < 4; ++c) { cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (224 + i * 4) * !key->vertexblend_indexed + c); @@ -473,22 +478,27 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i)); } /* multiply by WORLD(index) */ - ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), cWM[0]); - ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), cWM[1], ureg_src(r[0])); - ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), cWM[2], ureg_src(r[0])); - ureg_MAD(ureg, r[0], _WWWW(vs->aVtx), cWM[3], ureg_src(r[0])); - - /* accumulate weighted position value */ - if (i) - ureg_MAD(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, i), ureg_src(r[2])); - else - ureg_MUL(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, 0)); + ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]); + ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp)); + ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp)); + ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp)); + + if (i < (key->vertexblend - 1)) { + /* accumulate weighted position value */ + ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(r[2])); + /* subtract weighted position value for last value */ + ureg_SUB(ureg, r[3], ureg_src(r[3]), ureg_scalar(vs->aWgt, i)); + } } + + /* the last weighted position is always 1 - sum_of_previous_weights */ + ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(ureg_src(r[3]), key->vertexblend - 1), ureg_src(r[2])); + /* multiply by VIEW_PROJ */ - ureg_MUL(ureg, r[0], _X(r[2]), _CONST(8)); - ureg_MAD(ureg, r[0], _Y(r[2]), _CONST(9), ureg_src(r[0])); - ureg_MAD(ureg, r[0], _Z(r[2]), _CONST(10), ureg_src(r[0])); - ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(r[0])); + ureg_MUL(ureg, tmp, _X(r[2]), _CONST(8)); + ureg_MAD(ureg, tmp, _Y(r[2]), _CONST(9), ureg_src(tmp)); + ureg_MAD(ureg, tmp, _Z(r[2]), _CONST(10), ureg_src(tmp)); + ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(tmp)); if (need_rVtx) vs->aVtx = ureg_src(r[2]); @@ -515,10 +525,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_MOV(ureg, oPos, ureg_src(tmp)); } else { /* position = vertex * WORLD_VIEW_PROJ */ - ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), _CONST(0)); - ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), _CONST(1), ureg_src(r[0])); - ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), _CONST(2), ureg_src(r[0])); - ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(r[0])); + ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0)); + ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp)); + ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp)); + ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp)); } if (need_rVtx) { @@ -746,12 +756,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) { /* hitDir = light.position - eyeVtx * d = length(hitDir) - * hitDir /= d */ ureg_SUB(ureg, rHit, cLPos, ureg_src(rVtx)); ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit)); ureg_RSQ(ureg, tmp_y, _X(tmp)); - ureg_MUL(ureg, rHit, ureg_src(rHit), _Y(tmp)); /* normalize */ ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */ /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */ @@ -765,6 +773,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); ureg_ENDIF(ureg); + /* normalize hitDir */ + ureg_normalize3(ureg, rHit, ureg_src(rHit), tmp); + /* if (SPOT light) */ ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT)); ureg_IF(ureg, _X(tmp), &label[l++]); @@ -799,9 +810,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) /* midVec = normalize(hitDir + eyeDir) */ if (key->localviewer) { ureg_normalize3(ureg, rMid, ureg_src(rVtx), tmp); - ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid))); + ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid)); } else { - ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f)); + ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f)); } ureg_normalize3(ureg, rMid, ureg_src(rMid), tmp); ureg_DP3(ureg, ureg_saturate(tmp_y), ureg_src(rNrm), ureg_src(rMid)); @@ -849,7 +860,14 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE); ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE); } - ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp)); + + if (key->specular_enable) { + /* add oCol[1] to oCol[0] */ + ureg_MAD(ureg, tmp, ureg_src(rD), vs->mtlD, ureg_src(tmp)); + ureg_MAD(ureg, oCol[0], ureg_src(rS), vs->mtlS, ureg_src(tmp)); + } else { + ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp)); + } ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS); } else /* COLOR */ @@ -1012,10 +1030,10 @@ ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta) reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc; break; case D3DTA_DIFFUSE: - reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE); + reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); break; case D3DTA_SPECULAR: - reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE); + reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); break; case D3DTA_TEMP: reg = ps->rTmpSrc; @@ -1222,7 +1240,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) ps.ureg = ureg; ps.stage.index_pre_mod = -1; - ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE); + ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); /* Declare all TEMPs we might need, serious drivers have a register allocator. */ for (i = 0; i < Elements(ps.r); ++i) @@ -1241,7 +1259,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) if (key->ts[s].colorarg0 == D3DTA_SPECULAR || key->ts[s].colorarg1 == D3DTA_SPECULAR || key->ts[s].colorarg2 == D3DTA_SPECULAR) - ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE); + ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); if (key->ts[s].colorarg0 == D3DTA_TEXTURE || key->ts[s].colorarg1 == D3DTA_TEXTURE || @@ -1258,7 +1276,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) if (key->ts[s].alphaarg0 == D3DTA_SPECULAR || key->ts[s].alphaarg1 == D3DTA_SPECULAR || key->ts[s].alphaarg2 == D3DTA_SPECULAR) - ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE); + ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); if (key->ts[s].alphaarg0 == D3DTA_TEXTURE || key->ts[s].alphaarg1 == D3DTA_TEXTURE || @@ -1269,7 +1287,7 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) } } if (key->specular) - ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE); + ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); @@ -1500,6 +1518,9 @@ nine_ff_get_vs(struct NineDevice9 *device) if (key.fog_mode) key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE]; + key.localviewer = !!state->rs[D3DRS_LOCALVIEWER]; + key.specular_enable = !!state->rs[D3DRS_SPECULARENABLE]; + if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE]; @@ -1847,7 +1868,7 @@ nine_ff_update(struct NineDevice9 *device) DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps); /* NOTE: the only reference belongs to the hash table */ - if (!device->state.vs) { + if (!state->programmable_vs) { device->ff.vs = nine_ff_get_vs(device); device->state.changed.group |= NINE_STATE_VS; } @@ -1856,7 +1877,7 @@ nine_ff_update(struct NineDevice9 *device) device->state.changed.group |= NINE_STATE_PS; } - if (!device->state.vs) { + if (!state->programmable_vs) { nine_ff_load_vs_transforms(device); nine_ff_load_tex_matrices(device); nine_ff_load_lights(device); diff --git a/src/gallium/state_trackers/nine/nine_limits.h b/src/gallium/state_trackers/nine/nine_limits.h new file mode 100644 index 00000000000..ef1ed2566ba --- /dev/null +++ b/src/gallium/state_trackers/nine/nine_limits.h @@ -0,0 +1,211 @@ +/* + * Copyright 2015 Axel Davy <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _NINE_LIMITS_H_ +#define _NINE_LIMITS_H_ + +#include "assert.h" +#include "d3d9types.h" + +// state can be any value +#define NINE_STATE_NO_LIMIT 0 +// value is clamped if below min or max +#define NINE_STATE_CLAMP 1 +// boolean: 0 -> false; any other value -> true +#define NINE_STATE_BOOL 2 +// a mask is applied on the value +#define NINE_STATE_MASK 3 +// if outside a range, state value is changed to a default value +#define NINE_STATE_RANGE_DEF_VAL 4 + +struct nine_state_behaviour { + unsigned state_value_behaviour; + union { + struct { + unsigned min; + unsigned max; + } clamp; + unsigned mask; + struct { + unsigned min; + unsigned max; + unsigned default_val; + } range_def_val; + } u; +}; + +#define __NO_LIMIT_RS(o) \ + [D3DRS_##o] = {NINE_STATE_NO_LIMIT} + +#define __CLAMP_RS(o, m, M) \ + [D3DRS_##o] = {NINE_STATE_CLAMP, {.clamp = {m, M}}} + +#define __BOOLEAN_RS(o) \ + [D3DRS_##o] = {NINE_STATE_BOOL} + +#define __MASK_RS(o, m) \ + [D3DRS_##o] = {NINE_STATE_MASK, {.mask = m}} + +#define __RANGE_DEF_VAL_RS(o, m, M, d) \ + [D3DRS_##o] = {NINE_STATE_RANGE_DEF_VAL, {.range_def_val = {m, M, d}}} + +#define __TO_DETERMINE_RS(o, m, M) \ + [D3DRS_##o] = {NINE_STATE_NO_LIMIT} + +static const struct nine_state_behaviour +render_state_limits_table[D3DRS_BLENDOPALPHA + 1] = { + __TO_DETERMINE_RS(ZENABLE, 0, 3), + __TO_DETERMINE_RS(FILLMODE, 1, 3), + __CLAMP_RS(SHADEMODE, 1, 3), + __BOOLEAN_RS(ZWRITEENABLE), + __BOOLEAN_RS(ALPHATESTENABLE), + __BOOLEAN_RS(LASTPIXEL), + __RANGE_DEF_VAL_RS(SRCBLEND, 1, 17, D3DBLEND_ZERO), + __RANGE_DEF_VAL_RS(DESTBLEND, 1, 17, D3DBLEND_ZERO), + __CLAMP_RS(CULLMODE, 1, 3), + __CLAMP_RS(ZFUNC, 1, 8), + __MASK_RS(ALPHAREF, 0x000000FF), + __CLAMP_RS(ALPHAFUNC, 1, 8), + __BOOLEAN_RS(DITHERENABLE), + __BOOLEAN_RS(ALPHABLENDENABLE), + __BOOLEAN_RS(FOGENABLE), + __BOOLEAN_RS(SPECULARENABLE), + __NO_LIMIT_RS(FOGCOLOR), + __MASK_RS(FOGTABLEMODE, 0x00000007), + __NO_LIMIT_RS(FOGSTART), /* a bit more complex than that, lets ignore */ + __NO_LIMIT_RS(FOGEND), + __NO_LIMIT_RS(FOGDENSITY), /* actually should be between 0.0 and 1.0 */ + __BOOLEAN_RS(RANGEFOGENABLE), + __BOOLEAN_RS(STENCILENABLE), + __CLAMP_RS(STENCILFAIL, 1, 8), + __CLAMP_RS(STENCILZFAIL, 1, 8), + __CLAMP_RS(STENCILPASS, 1, 8), + __CLAMP_RS(STENCILFUNC, 1, 8), + __NO_LIMIT_RS(STENCILREF), + __NO_LIMIT_RS(STENCILMASK), + __NO_LIMIT_RS(STENCILWRITEMASK), + __NO_LIMIT_RS(TEXTUREFACTOR), + __TO_DETERMINE_RS(WRAP0, 0, 15), + __TO_DETERMINE_RS(WRAP1, 0, 15), + __TO_DETERMINE_RS(WRAP2, 0, 15), + __TO_DETERMINE_RS(WRAP3, 0, 15), + __TO_DETERMINE_RS(WRAP4, 0, 15), + __TO_DETERMINE_RS(WRAP5, 0, 15), + __TO_DETERMINE_RS(WRAP6, 0, 15), + __TO_DETERMINE_RS(WRAP7, 0, 15), + __BOOLEAN_RS(CLIPPING), + __BOOLEAN_RS(LIGHTING), + __NO_LIMIT_RS(AMBIENT), + __MASK_RS(FOGVERTEXMODE, 0x00000007), + __BOOLEAN_RS(COLORVERTEX), + __BOOLEAN_RS(LOCALVIEWER), + __BOOLEAN_RS(NORMALIZENORMALS), + __TO_DETERMINE_RS(DIFFUSEMATERIALSOURCE, 0, 2), + __TO_DETERMINE_RS(SPECULARMATERIALSOURCE, 0, 2), + __TO_DETERMINE_RS(AMBIENTMATERIALSOURCE, 0, 2), + __TO_DETERMINE_RS(EMISSIVEMATERIALSOURCE, 0, 2), + __TO_DETERMINE_RS(VERTEXBLEND, 0, 256), /* values between 4 and 254 -both included- are forbidden too */ + __NO_LIMIT_RS(CLIPPLANEENABLE), /* expected check seems complex */ + __TO_DETERMINE_RS(POINTSIZE, 0, 0xFFFFFFFF), + __TO_DETERMINE_RS(POINTSIZE_MIN, 0, 0x7FFFFFFF), /* float >= 0.0 */ + __BOOLEAN_RS(POINTSPRITEENABLE), + __BOOLEAN_RS(POINTSCALEENABLE), + __TO_DETERMINE_RS(POINTSCALE_A, 0, 0x7FFFFFFF), /* float >= 0.0 */ + __TO_DETERMINE_RS(POINTSCALE_B, 0, 0x7FFFFFFF), /* float >= 0.0 */ + __TO_DETERMINE_RS(POINTSCALE_C, 0, 0x7FFFFFFF), /* float >= 0.0 */ + __BOOLEAN_RS(MULTISAMPLEANTIALIAS), + __NO_LIMIT_RS(MULTISAMPLEMASK), + __TO_DETERMINE_RS(PATCHEDGESTYLE, 0, 1), + __TO_DETERMINE_RS(DEBUGMONITORTOKEN, 0, 1), + __TO_DETERMINE_RS(POINTSIZE_MAX, 0, 0x7FFFFFFF), /* check more complex than that */ + __BOOLEAN_RS(INDEXEDVERTEXBLENDENABLE), + __TO_DETERMINE_RS(COLORWRITEENABLE, 0, 15), + __NO_LIMIT_RS(TWEENFACTOR), + __CLAMP_RS(BLENDOP, 1, 5), + __TO_DETERMINE_RS(POSITIONDEGREE, 1, 5), /* can actually be only 1 or 5 */ + __TO_DETERMINE_RS(NORMALDEGREE, 1, 2), + __BOOLEAN_RS(SCISSORTESTENABLE), + __NO_LIMIT_RS(SLOPESCALEDEPTHBIAS), + __BOOLEAN_RS(ANTIALIASEDLINEENABLE), + __NO_LIMIT_RS(MINTESSELLATIONLEVEL), + __NO_LIMIT_RS(MAXTESSELLATIONLEVEL), + __NO_LIMIT_RS(ADAPTIVETESS_X), + __NO_LIMIT_RS(ADAPTIVETESS_Y), + __NO_LIMIT_RS(ADAPTIVETESS_Z), + __NO_LIMIT_RS(ADAPTIVETESS_W), + __BOOLEAN_RS(ENABLEADAPTIVETESSELLATION), + __BOOLEAN_RS(TWOSIDEDSTENCILMODE), + __CLAMP_RS(CCW_STENCILFAIL, 1, 8), + __CLAMP_RS(CCW_STENCILZFAIL, 1, 8), + __CLAMP_RS(CCW_STENCILPASS, 1, 8), + __CLAMP_RS(CCW_STENCILFUNC, 1, 8), + __TO_DETERMINE_RS(COLORWRITEENABLE1, 0, 15), + __TO_DETERMINE_RS(COLORWRITEENABLE2, 0, 15), + __TO_DETERMINE_RS(COLORWRITEENABLE3, 0, 15), + __NO_LIMIT_RS(BLENDFACTOR), + __BOOLEAN_RS(SRGBWRITEENABLE), + __NO_LIMIT_RS(DEPTHBIAS), + __TO_DETERMINE_RS(WRAP8, 0, 15), + __TO_DETERMINE_RS(WRAP9, 0, 15), + __TO_DETERMINE_RS(WRAP10, 0, 15), + __TO_DETERMINE_RS(WRAP11, 0, 15), + __TO_DETERMINE_RS(WRAP12, 0, 15), + __TO_DETERMINE_RS(WRAP13, 0, 15), + __TO_DETERMINE_RS(WRAP14, 0, 15), + __TO_DETERMINE_RS(WRAP15, 0, 15), + __BOOLEAN_RS(SEPARATEALPHABLENDENABLE), + __RANGE_DEF_VAL_RS(SRCBLENDALPHA, 1, 17, D3DBLEND_ZERO), + __RANGE_DEF_VAL_RS(DESTBLENDALPHA, 1, 17, D3DBLEND_ZERO), + __CLAMP_RS(BLENDOPALPHA, 1, 5) +}; + +static DWORD inline +nine_fix_render_state_value(D3DRENDERSTATETYPE State, + DWORD Value) +{ + struct nine_state_behaviour behaviour = render_state_limits_table[State]; + + switch (behaviour.state_value_behaviour) { + case NINE_STATE_NO_LIMIT: + break; + case NINE_STATE_CLAMP: + if (Value < behaviour.u.clamp.min) + Value = behaviour.u.clamp.min; + else if (Value > behaviour.u.clamp.max) + Value = behaviour.u.clamp.max; + break; + case NINE_STATE_BOOL: + Value = Value ? 1 : 0; + break; + case NINE_STATE_MASK: + Value = Value & behaviour.u.mask; + break; + case NINE_STATE_RANGE_DEF_VAL: + if (Value < behaviour.u.range_def_val.min || Value > behaviour.u.range_def_val.max) + Value = behaviour.u.range_def_val.default_val; + break; + } + + return Value; +} + +#endif /* _NINE_HELPERS_H_ */ diff --git a/src/gallium/state_trackers/nine/nine_pdata.h b/src/gallium/state_trackers/nine/nine_pdata.h index 7bdd702cfbb..0e9a2aa7160 100644 --- a/src/gallium/state_trackers/nine/nine_pdata.h +++ b/src/gallium/state_trackers/nine/nine_pdata.h @@ -5,6 +5,7 @@ struct pheader { boolean unknown; + GUID guid; DWORD size; char data[1]; }; diff --git a/src/gallium/state_trackers/nine/nine_pipe.c b/src/gallium/state_trackers/nine/nine_pipe.c index 2be30f7e097..27a10d64473 100644 --- a/src/gallium/state_trackers/nine/nine_pipe.c +++ b/src/gallium/state_trackers/nine/nine_pipe.c @@ -181,6 +181,7 @@ nine_convert_blend_state(struct pipe_blend_state *blend_state, const DWORD *rs) } nine_convert_blend_state_fixup(&blend, rs); /* for BOTH[INV]SRCALPHA */ } + blend.rt[0].colormask = rs[D3DRS_COLORWRITEENABLE]; if (rs[D3DRS_COLORWRITEENABLE1] != rs[D3DRS_COLORWRITEENABLE] || @@ -222,8 +223,8 @@ nine_convert_sampler_state(struct cso_context *ctx, int idx, const DWORD *ss) samp.wrap_s = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSU]); samp.wrap_t = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSV]); samp.wrap_r = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSW]); - samp.min_img_filter = ss[D3DSAMP_MINFILTER] == D3DTEXF_POINT ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - samp.mag_img_filter = ss[D3DSAMP_MAGFILTER] == D3DTEXF_POINT ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + samp.min_img_filter = (ss[D3DSAMP_MINFILTER] == D3DTEXF_POINT && !ss[NINED3DSAMP_SHADOW]) ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + samp.mag_img_filter = (ss[D3DSAMP_MAGFILTER] == D3DTEXF_POINT && !ss[NINED3DSAMP_SHADOW]) ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; if (ss[D3DSAMP_MINFILTER] == D3DTEXF_ANISOTROPIC || ss[D3DSAMP_MAGFILTER] == D3DTEXF_ANISOTROPIC) samp.max_anisotropy = ss[D3DSAMP_MAXANISOTROPY]; @@ -265,7 +266,7 @@ nine_pipe_context_clear(struct NineDevice9 *This) const enum pipe_format nine_d3d9_to_pipe_format_map[120] = { [D3DFMT_UNKNOWN] = PIPE_FORMAT_NONE, - [D3DFMT_R8G8B8] = PIPE_FORMAT_NONE, + [D3DFMT_R8G8B8] = PIPE_FORMAT_R8G8B8_UNORM, [D3DFMT_A8R8G8B8] = PIPE_FORMAT_B8G8R8A8_UNORM, [D3DFMT_X8R8G8B8] = PIPE_FORMAT_B8G8R8X8_UNORM, [D3DFMT_R5G6B5] = PIPE_FORMAT_B5G6R5_UNORM, @@ -323,8 +324,8 @@ const enum pipe_format nine_d3d9_to_pipe_format_map[120] = const D3DFORMAT nine_pipe_to_d3d9_format_map[PIPE_FORMAT_COUNT] = { [PIPE_FORMAT_NONE] = D3DFMT_UNKNOWN, - -/* [PIPE_FORMAT_B8G8R8_UNORM] = D3DFMT_R8G8B8, */ + /* TODO: rename PIPE_FORMAT_R8G8B8_UNORM to PIPE_FORMAT_B8G8R8_UNORM */ + [PIPE_FORMAT_R8G8B8_UNORM] = D3DFMT_R8G8B8, [PIPE_FORMAT_B8G8R8A8_UNORM] = D3DFMT_A8R8G8B8, [PIPE_FORMAT_B8G8R8X8_UNORM] = D3DFMT_X8R8G8B8, [PIPE_FORMAT_B5G6R5_UNORM] = D3DFMT_R5G6B5, diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index ed431738abc..a7a7da27903 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -852,7 +852,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) /* the address register (vs only) must be * assigned before use */ assert(!ureg_dst_is_undef(tx->regs.a0)); - ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0)); + /* Round to lowest for vs1.1 (contrary to the doc), else + * round to nearest */ + if (tx->version.major < 2 && tx->version.minor < 2) + ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0)); + else + ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0)); src = ureg_src(tx->regs.address); } else { if (tx->version.major < 2 && tx->version.minor < 4) { @@ -870,9 +875,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) } else { if (tx->version.major < 3) { assert(!param->rel); - src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR, - param->idx, - TGSI_INTERPOLATE_PERSPECTIVE); + src = ureg_DECL_fs_input_cyl_centroid( + ureg, TGSI_SEMANTIC_COLOR, param->idx, + TGSI_INTERPOLATE_COLOR, 0, + tx->info->force_color_in_centroid ? + TGSI_INTERPOLATE_LOC_CENTROID : 0, + 0, 1); } else { assert(!param->rel); /* TODO */ assert(param->idx < Elements(tx->regs.v)); @@ -1163,12 +1171,9 @@ _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) assert(!param->rel); tx->info->rt_mask |= 1 << param->idx; if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) { - /* ps < 3: oCol[0] will have fog blending afterward - * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */ + /* ps < 3: oCol[0] will have fog blending afterward */ if (!IS_VS && tx->version.major < 3 && param->idx == 0) { tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg); - } else if (IS_VS && tx->version.major < 3 && param->idx == 1) { - tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg); } else { tx->regs.oCol[param->idx] = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx); @@ -1543,25 +1548,6 @@ DECL_SPECIAL(CALLNZ) return D3D_OK; } -DECL_SPECIAL(MOV_vs1x) -{ - if (tx->insn.dst[0].file == D3DSPR_ADDR) { - /* Implementation note: We don't write directly - * to the addr register, but to an intermediate - * float register. - * Contrary to the doc, when writing to ADDR here, - * the rounding is not to nearest, but to lowest - * (wine test). - * Since we use ARR next, substract 0.5. */ - ureg_SUB(tx->ureg, - tx_dst_param(tx, &tx->insn.dst[0]), - tx_src_param(tx, &tx->insn.src[0]), - ureg_imm1f(tx->ureg, 0.5f)); - return D3D_OK; - } - return NineTranslateInstruction_Generic(tx); -} - DECL_SPECIAL(LOOP) { struct ureg_program *ureg = tx->ureg; @@ -1978,6 +1964,7 @@ nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem) return TGSI_INTERPOLATE_LINEAR; case TGSI_SEMANTIC_BCOLOR: case TGSI_SEMANTIC_COLOR: + return TGSI_INTERPOLATE_COLOR; case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_GENERIC: case TGSI_SEMANTIC_TEXCOORD: @@ -2058,13 +2045,17 @@ DECL_SPECIAL(DCL) } } else { if (is_input && tx->version.major >= 3) { + unsigned interp_location = 0; /* SM3 only, SM2 input semantic determined by file */ assert(sem.reg.idx < Elements(tx->regs.v)); + if (sem.reg.mod & NINED3DSPDM_CENTROID || + (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid)) + interp_location = TGSI_INTERPOLATE_LOC_CENTROID; tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid( ureg, tgsi.Name, tgsi.Index, nine_tgsi_to_interp_mode(&tgsi), 0, /* cylwrap */ - sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1); + interp_location, 0, 1); } else if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */ /* FragColor or FragDepth */ @@ -2736,8 +2727,7 @@ DECL_SPECIAL(COMMENT) struct sm1_op_info inst_table[] = { _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */ - _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)), - _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), + _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */ _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */ _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */ @@ -3426,13 +3416,6 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f)); } - /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */ - if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) { - struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1); - ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1])); - ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f)); - } - if (info->position_t) ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h index 41577ac572b..1fe0c4bd182 100644 --- a/src/gallium/state_trackers/nine/nine_shader.h +++ b/src/gallium/state_trackers/nine/nine_shader.h @@ -61,6 +61,7 @@ struct nine_shader_info uint8_t fog_enable; uint8_t fog_mode; + uint8_t force_color_in_centroid; uint16_t projected; /* ps 1.1 to 1.3 */ unsigned const_i_base; /* in vec4 (16 byte) units */ diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index aee31622088..6f94e378984 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -367,14 +367,14 @@ prepare_vs(struct NineDevice9 *device, uint8_t shader_changed) uint32_t changed_group = 0; int has_key_changed = 0; - if (likely(vs)) + if (likely(state->programmable_vs)) has_key_changed = NineVertexShader9_UpdateKey(vs, state); if (!shader_changed && !has_key_changed) return 0; /* likely because we dislike FF */ - if (likely(vs)) { + if (likely(state->programmable_vs)) { state->cso.vs = NineVertexShader9_GetVariant(vs); } else { vs = device->ff.vs; @@ -427,8 +427,8 @@ prepare_ps(struct NineDevice9 *device, uint8_t shader_changed) /* State preparation + State commit */ -static uint32_t -update_framebuffer(struct NineDevice9 *device) +static void +update_framebuffer(struct NineDevice9 *device, bool is_clear) { struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; @@ -438,7 +438,8 @@ update_framebuffer(struct NineDevice9 *device) unsigned w = rt0->desc.Width; unsigned h = rt0->desc.Height; D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType; - unsigned mask = state->ps ? state->ps->rt_mask : 1; + unsigned ps_mask = state->ps ? state->ps->rt_mask : 1; + unsigned mask = is_clear ? 0xf : ps_mask; const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0; DBG("\n"); @@ -498,13 +499,13 @@ update_framebuffer(struct NineDevice9 *device) pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */ - return state->changed.group; + if (is_clear && state->rt_mask == ps_mask) + state->changed.group &= ~NINE_STATE_FB; } static void update_viewport(struct NineDevice9 *device) { - struct pipe_context *pipe = device->pipe; const D3DVIEWPORT9 *vport = &device->state.viewport; struct pipe_viewport_state pvport; @@ -543,7 +544,7 @@ update_viewport(struct NineDevice9 *device) pvport.translate[1] -= 1.0f / 128.0f; } - pipe->set_viewport_states(pipe, 0, 1, &pvport); + cso_set_viewport(device->cso, &pvport); } /* Loop through VS inputs and pick the vertex elements with the declared @@ -567,7 +568,7 @@ update_vertex_elements(struct NineDevice9 *device) state->stream_usage_mask = 0; memset(vdecl_index_map, -1, 16); memset(used_streams, 0, device->caps.MaxStreams); - vs = device->state.vs ? device->state.vs : device->ff.vs; + vs = state->programmable_vs ? device->state.vs : device->ff.vs; if (vdecl) { for (n = 0; n < vs->num_inputs; ++n) { @@ -761,7 +762,7 @@ update_textures_and_samplers(struct NineDevice9 *device) cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT); commit_samplers = FALSE; - sampler_mask = state->vs ? state->vs->sampler_mask : 0; + sampler_mask = state->programmable_vs ? state->vs->sampler_mask : 0; state->bound_samplers_mask_vs = 0; for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_VS; ++i) { const unsigned s = NINE_SAMPLER_VS(i); @@ -854,7 +855,7 @@ commit_vs_constants(struct NineDevice9 *device) { struct pipe_context *pipe = device->pipe; - if (unlikely(!device->state.vs)) + if (unlikely(!device->state.programmable_vs)) pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff); else pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs); @@ -913,7 +914,8 @@ commit_ps(struct NineDevice9 *device) NINE_STATE_DSA | \ NINE_STATE_VIEWPORT | \ NINE_STATE_VDECL | \ - NINE_STATE_IDXBUF) + NINE_STATE_IDXBUF | \ + NINE_STATE_STREAMFREQ) #define NINE_STATE_RARE \ (NINE_STATE_SCISSOR | \ @@ -934,16 +936,14 @@ validate_textures(struct NineDevice9 *device) } void -nine_update_state_framebuffer(struct NineDevice9 *device) +nine_update_state_framebuffer_clear(struct NineDevice9 *device) { struct nine_state *state = &device->state; validate_textures(device); if (state->changed.group & NINE_STATE_FB) - update_framebuffer(device); - - state->changed.group &= ~NINE_STATE_FB; + update_framebuffer(device, TRUE); } boolean @@ -964,7 +964,7 @@ nine_update_state(struct NineDevice9 *device) validate_textures(device); /* may clobber state */ /* ff_update may change VS/PS dirty bits */ - if (unlikely(!state->vs || !state->ps)) + if (unlikely(!state->programmable_vs || !state->ps)) nine_ff_update(device); group = state->changed.group; @@ -977,15 +977,14 @@ nine_update_state(struct NineDevice9 *device) if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) { if (group & NINE_STATE_FB) - group |= update_framebuffer(device); /* may set NINE_STATE_RASTERIZER */ + update_framebuffer(device, FALSE); if (group & NINE_STATE_BLEND) prepare_blend(device); if (group & NINE_STATE_DSA) prepare_dsa(device); if (group & NINE_STATE_VIEWPORT) update_viewport(device); - if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) || - state->changed.stream_freq & ~1) + if (group & (NINE_STATE_VDECL | NINE_STATE_VS | NINE_STATE_STREAMFREQ)) update_vertex_elements(device); if (group & NINE_STATE_IDXBUF) commit_index_buffer(device); @@ -997,12 +996,12 @@ nine_update_state(struct NineDevice9 *device) if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER)) update_textures_and_samplers(device); if (device->prefer_user_constbuf) { - if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs) + if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->programmable_vs) prepare_vs_constants_userbuf(device); if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) prepare_ps_constants_userbuf(device); } else { - if ((group & NINE_STATE_VS_CONST) && state->vs) + if ((group & NINE_STATE_VS_CONST) && state->programmable_vs) upload_constants(device, PIPE_SHADER_VERTEX); if ((group & NINE_STATE_PS_CONST) && state->ps) upload_constants(device, PIPE_SHADER_FRAGMENT); @@ -1262,6 +1261,8 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, */ state->rs[D3DRS_POINTSIZE_MAX] = fui(caps->MaxPointSize); + memcpy(state->rs_advertised, state->rs, sizeof(state->rs)); + /* Set changed flags to initialize driver. */ state->changed.group = NINE_STATE_ALL; @@ -1314,8 +1315,10 @@ nine_state_clear(struct nine_state *state, const boolean device) nine_bind(&state->vs, NULL); nine_bind(&state->ps, NULL); nine_bind(&state->vdecl, NULL); - for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) { nine_bind(&state->stream[i], NULL); + pipe_resource_reference(&state->vtxbuf[i].buffer, NULL); + } nine_bind(&state->idxbuf, NULL); for (i = 0; i < NINE_MAX_SAMPLERS; ++i) { if (device && diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index b34da70ef48..a4ec4e3b63a 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -61,23 +61,24 @@ #define NINE_STATE_SAMPLER (1 << 11) #define NINE_STATE_VDECL (1 << 12) #define NINE_STATE_IDXBUF (1 << 13) -#define NINE_STATE_PRIM (1 << 14) -#define NINE_STATE_MATERIAL (1 << 15) -#define NINE_STATE_BLEND_COLOR (1 << 16) -#define NINE_STATE_STENCIL_REF (1 << 17) -#define NINE_STATE_SAMPLE_MASK (1 << 18) -#define NINE_STATE_FF (0x1f << 19) -#define NINE_STATE_FF_VS (0x17 << 19) -#define NINE_STATE_FF_PS (0x18 << 19) -#define NINE_STATE_FF_LIGHTING (1 << 19) -#define NINE_STATE_FF_MATERIAL (1 << 20) -#define NINE_STATE_FF_VSTRANSF (1 << 21) -#define NINE_STATE_FF_PSSTAGES (1 << 22) -#define NINE_STATE_FF_OTHER (1 << 23) -#define NINE_STATE_FOG_SHADER (1 << 24) -#define NINE_STATE_PS1X_SHADER (1 << 25) -#define NINE_STATE_ALL 0x3ffffff -#define NINE_STATE_UNHANDLED (1 << 26) +#define NINE_STATE_STREAMFREQ (1 << 14) +#define NINE_STATE_PRIM (1 << 15) +#define NINE_STATE_MATERIAL (1 << 16) +#define NINE_STATE_BLEND_COLOR (1 << 17) +#define NINE_STATE_STENCIL_REF (1 << 18) +#define NINE_STATE_SAMPLE_MASK (1 << 19) +#define NINE_STATE_FF (0x1f << 20) +#define NINE_STATE_FF_VS (0x17 << 20) +#define NINE_STATE_FF_PS (0x18 << 20) +#define NINE_STATE_FF_LIGHTING (1 << 20) +#define NINE_STATE_FF_MATERIAL (1 << 21) +#define NINE_STATE_FF_VSTRANSF (1 << 22) +#define NINE_STATE_FF_PSSTAGES (1 << 23) +#define NINE_STATE_FF_OTHER (1 << 24) +#define NINE_STATE_FOG_SHADER (1 << 25) +#define NINE_STATE_PS1X_SHADER (1 << 26) +#define NINE_STATE_ALL 0x7ffffff +#define NINE_STATE_UNHANDLED (1 << 27) #define NINE_STATE_COMMIT_DSA (1 << 0) #define NINE_STATE_COMMIT_RASTERIZER (1 << 1) @@ -152,6 +153,7 @@ struct nine_state int vs_const_i[NINE_MAX_CONST_I][4]; BOOL vs_const_b[NINE_MAX_CONST_B]; float *vs_lconstf_temp; + BOOL programmable_vs; struct NinePixelShader9 *ps; float *ps_const_f; @@ -179,6 +181,7 @@ struct nine_state uint8_t rt_mask; DWORD rs[NINED3DRS_COUNT]; + DWORD rs_advertised[NINED3DRS_COUNT]; /* the ones apps get with GetRenderState */ struct NineBaseTexture9 *texture[NINE_MAX_SAMPLERS]; /* PS, DMAP, VS */ @@ -236,7 +239,7 @@ extern const uint32_t nine_render_states_vertex[(NINED3DRS_COUNT + 31) / 32]; struct NineDevice9; -void nine_update_state_framebuffer(struct NineDevice9 *); +void nine_update_state_framebuffer_clear(struct NineDevice9 *); boolean nine_update_state(struct NineDevice9 *); void nine_state_restore_non_cso(struct NineDevice9 *device); diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c index 42bc349c2cc..00be67f8955 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.c +++ b/src/gallium/state_trackers/nine/pixelshader9.c @@ -160,6 +160,7 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This ) info.sampler_ps1xtypes = key; info.fog_enable = device->state.rs[D3DRS_FOGENABLE]; info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE]; + info.force_color_in_centroid = key >> 34 & 1; info.projected = (key >> 48) & 0xffff; hr = nine_translate_shader(This->base.device, &info); diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h index e09009f6621..6b431813a81 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.h +++ b/src/gallium/state_trackers/nine/pixelshader9.h @@ -28,6 +28,7 @@ #include "nine_state.h" #include "basetexture9.h" #include "nine_ff.h" +#include "surface9.h" struct nine_lconstf; @@ -92,6 +93,10 @@ NinePixelShader9_UpdateKey( struct NinePixelShader9 *ps, key |= ((uint64_t)state->rs[D3DRS_FOGTABLEMODE]) << 33; } + /* centroid interpolation automatically used for color ps inputs */ + if (state->rt[0]->desc.MultiSampleType > 1) + key |= ((uint64_t)1) << 34; + if (unlikely(ps->byte_code.version < 0x14)) { projected = nine_ff_get_projected_key(state); key |= ((uint64_t) projected) << 48; diff --git a/src/gallium/state_trackers/nine/resource9.c b/src/gallium/state_trackers/nine/resource9.c index 6d915338b24..b929c50a83c 100644 --- a/src/gallium/state_trackers/nine/resource9.c +++ b/src/gallium/state_trackers/nine/resource9.c @@ -29,12 +29,12 @@ #include "util/u_hash_table.h" #include "util/u_inlines.h" +#include "util/u_resource.h" #include "nine_pdata.h" #define DBG_CHANNEL DBG_RESOURCE - HRESULT NineResource9_ctor( struct NineResource9 *This, struct NineUnknownParams *pParams, @@ -62,6 +62,33 @@ NineResource9_ctor( struct NineResource9 *This, if (Allocate) { assert(!initResource); + + /* On Windows it is possible allocation fails when + * IDirect3DDevice9::GetAvailableTextureMem() still reports + * enough free space. + * + * Some games allocate surfaces + * in a loop until they receive D3DERR_OUTOFVIDEOMEMORY to measure + * the available texture memory size. + * + * We are not using the drivers VRAM statistics because: + * * This would add overhead to each resource allocation. + * * Freeing memory is lazy and takes some time, but applications + * expects the memory counter to change immediately after allocating + * or freeing memory. + * + * Vertexbuffers and indexbuffers are not accounted ! + */ + if (This->info.target != PIPE_BUFFER) { + This->size = util_resource_size(&This->info); + + This->base.device->available_texture_mem -= This->size; + if (This->base.device->available_texture_mem <= + This->base.device->available_texture_limit) { + return D3DERR_OUTOFVIDEOMEMORY; + } + } + DBG("(%p) Creating pipe_resource.\n", This); This->resource = screen->resource_create(screen, &This->info); if (!This->resource) @@ -92,6 +119,10 @@ NineResource9_dtor( struct NineResource9 *This ) * still hold a reference. */ pipe_resource_reference(&This->resource, NULL); + /* NOTE: size is 0, unless something has actually been allocated */ + if (This->base.device) + This->base.device->available_texture_mem += This->size; + NineUnknown_dtor(&This->base); } @@ -117,9 +148,10 @@ NineResource9_SetPrivateData( struct NineResource9 *This, enum pipe_error err; struct pheader *header; const void *user_data = pData; + char guid_str[64]; - DBG("This=%p refguid=%p pData=%p SizeOfData=%u Flags=%x\n", - This, refguid, pData, SizeOfData, Flags); + DBG("This=%p GUID=%s pData=%p SizeOfData=%u Flags=%x\n", + This, GUID_sprintf(guid_str, refguid), pData, SizeOfData, Flags); if (Flags & D3DSPD_IUNKNOWN) user_assert(SizeOfData == sizeof(IUnknown *), D3DERR_INVALIDCALL); @@ -141,8 +173,9 @@ NineResource9_SetPrivateData( struct NineResource9 *This, header->size = SizeOfData; memcpy(header->data, user_data, header->size); + memcpy(&header->guid, refguid, sizeof(header->guid)); - err = util_hash_table_set(This->pdata, refguid, header); + err = util_hash_table_set(This->pdata, &header->guid, header); if (err == PIPE_OK) { if (header->unknown) { IUnknown_AddRef(*(IUnknown **)header->data); } return D3D_OK; @@ -162,9 +195,10 @@ NineResource9_GetPrivateData( struct NineResource9 *This, { struct pheader *header; DWORD sizeofdata; + char guid_str[64]; - DBG("This=%p refguid=%p pData=%p pSizeOfData=%p\n", - This, refguid, pData, pSizeOfData); + DBG("This=%p GUID=%s pData=%p pSizeOfData=%p\n", + This, GUID_sprintf(guid_str, refguid), pData, pSizeOfData); header = util_hash_table_get(This->pdata, refguid); if (!header) { return D3DERR_NOTFOUND; } @@ -191,8 +225,9 @@ NineResource9_FreePrivateData( struct NineResource9 *This, REFGUID refguid ) { struct pheader *header; + char guid_str[64]; - DBG("This=%p refguid=%p\n", This, refguid); + DBG("This=%p GUID=%s\n", This, GUID_sprintf(guid_str, refguid)); header = util_hash_table_get(This->pdata, refguid); if (!header) diff --git a/src/gallium/state_trackers/nine/resource9.h b/src/gallium/state_trackers/nine/resource9.h index 906f90806ce..8122257b7a7 100644 --- a/src/gallium/state_trackers/nine/resource9.h +++ b/src/gallium/state_trackers/nine/resource9.h @@ -45,6 +45,8 @@ struct NineResource9 /* for [GS]etPrivateData/FreePrivateData */ struct util_hash_table *pdata; + + long long size; }; static inline struct NineResource9 * NineResource9( void *data ) diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c index 6d6e1be0b7f..0d1a04b657a 100644 --- a/src/gallium/state_trackers/nine/stateblock9.c +++ b/src/gallium/state_trackers/nine/stateblock9.c @@ -24,6 +24,7 @@ #include "device9.h" #include "basetexture9.h" #include "nine_helpers.h" +#include "vertexdeclaration9.h" #define DBG_CHANNEL DBG_STATEBLOCK @@ -179,6 +180,7 @@ nine_state_copy_common(struct nine_state *dst, const int r = ffs(m) - 1; m &= ~(1 << r); dst->rs[i * 32 + r] = src->rs[i * 32 + r]; + dst->rs_advertised[i * 32 + r] = src->rs_advertised[i * 32 + r]; } } @@ -223,7 +225,7 @@ nine_state_copy_common(struct nine_state *dst, nine_bind(&dst->stream[i], src->stream[i]); if (src->stream[i]) { dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset; - dst->vtxbuf[i].buffer = src->vtxbuf[i].buffer; + pipe_resource_reference(&dst->vtxbuf[i].buffer, src->vtxbuf[i].buffer); dst->vtxbuf[i].stride = src->vtxbuf[i].stride; } } @@ -269,6 +271,10 @@ nine_state_copy_common(struct nine_state *dst, dst->ff.light = REALLOC(dst->ff.light, dst->ff.num_lights * sizeof(D3DLIGHT9), mask->ff.num_lights * sizeof(D3DLIGHT9)); + for (i = dst->ff.num_lights; i < mask->ff.num_lights; ++i) { + memset(&dst->ff.light[i], 0, sizeof(D3DLIGHT9)); + dst->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID; + } dst->ff.num_lights = mask->ff.num_lights; } for (i = 0; i < mask->ff.num_lights; ++i) @@ -353,6 +359,7 @@ nine_state_copy_common_all(struct nine_state *dst, /* Render states. */ memcpy(dst->rs, src->rs, sizeof(dst->rs)); + memcpy(dst->rs_advertised, src->rs_advertised, sizeof(dst->rs_advertised)); if (apply) memcpy(dst->changed.rs, src->changed.rs, sizeof(dst->changed.rs)); @@ -377,7 +384,7 @@ nine_state_copy_common_all(struct nine_state *dst, nine_bind(&dst->stream[i], src->stream[i]); if (src->stream[i]) { dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset; - dst->vtxbuf[i].buffer = src->vtxbuf[i].buffer; + pipe_resource_reference(&dst->vtxbuf[i].buffer, src->vtxbuf[i].buffer); dst->vtxbuf[i].stride = src->vtxbuf[i].stride; } dst->stream_freq[i] = src->stream_freq[i]; @@ -486,7 +493,10 @@ NineStateBlock9_Apply( struct NineStateBlock9 *This ) nine_state_copy_common(dst, src, src, TRUE, pool); if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl) - nine_bind(&dst->vdecl, src->vdecl); + NineDevice9_SetVertexDeclaration(This->base.device, (IDirect3DVertexDeclaration9 *)src->vdecl); + + /* Recomputing it is needed if we changed vs but not vdecl */ + dst->programmable_vs = dst->vs && !(dst->vdecl && dst->vdecl->position_t); /* Textures */ if (src->changed.texture) { diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index 14c1ce927ad..f88b75c3dd7 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -56,6 +56,9 @@ NineSurface9_ctor( struct NineSurface9 *This, D3DSURFACE_DESC *pDesc ) { HRESULT hr; + union pipe_color_union rgba = {0}; + struct pipe_surface *surf; + struct pipe_context *pipe = pParams->device->pipe; DBG("This=%p pDevice=%p pResource=%p Level=%u Layer=%u pDesc=%p\n", This, pParams->device, pResource, Level, Layer, pDesc); @@ -140,6 +143,12 @@ NineSurface9_ctor( struct NineSurface9 *This, if (pResource && NineSurface9_IsOffscreenPlain(This)) pResource->flags |= NINE_RESOURCE_FLAG_LOCKABLE; + /* TODO: investigate what else exactly needs to be cleared */ + if (This->base.resource && (pDesc->Usage & D3DUSAGE_RENDERTARGET)) { + surf = NineSurface9_GetSurface(This, 0); + pipe->clear_render_target(pipe, surf, &rgba, 0, 0, pDesc->Width, pDesc->Height); + } + NineSurface9_Dump(This); return D3D_OK; @@ -156,7 +165,7 @@ NineSurface9_dtor( struct NineSurface9 *This ) /* Release system memory when we have to manage it (no parent) */ if (!This->base.base.container && This->data) - FREE(This->data); + align_free(This->data); NineResource9_dtor(&This->base); } @@ -348,7 +357,7 @@ NineSurface9_LockRect( struct NineSurface9 *This, D3DERR_INVALIDCALL); if (pRect && This->desc.Pool == D3DPOOL_DEFAULT && - compressed_format (This->desc.Format)) { + util_format_is_compressed(This->base.info.format)) { const unsigned w = util_format_get_blockwidth(This->base.info.format); const unsigned h = util_format_get_blockheight(This->base.info.format); user_assert((pRect->left == 0 && pRect->right == This->desc.Width && @@ -384,8 +393,8 @@ NineSurface9_LockRect( struct NineSurface9 *This, * and bpp 8, and the app has a workaround to work with the fact * that it is actually compressed. */ if (is_ATI1_ATI2(This->base.info.format)) { - pLockedRect->Pitch = This->desc.Height; - pLockedRect->pBits = This->data + box.y * This->desc.Height + box.x; + pLockedRect->Pitch = This->desc.Width; + pLockedRect->pBits = This->data + box.y * This->desc.Width + box.x; } else { pLockedRect->Pitch = This->stride; pLockedRect->pBits = NineSurface9_GetSystemMemPointer(This, diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c index 3b1a7a4493c..82d4173fbb2 100644 --- a/src/gallium/state_trackers/nine/swapchain9.c +++ b/src/gallium/state_trackers/nine/swapchain9.c @@ -118,6 +118,14 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This, DBG("This=%p pParams=%p\n", This, pParams); user_assert(pParams != NULL, E_POINTER); + user_assert(pParams->SwapEffect, D3DERR_INVALIDCALL); + user_assert((pParams->SwapEffect != D3DSWAPEFFECT_COPY) || + (pParams->BackBufferCount <= 1), D3DERR_INVALIDCALL); + user_assert(pDevice->ex || pParams->BackBufferCount <= 3, D3DERR_INVALIDCALL); + user_assert(pDevice->ex || + (pParams->SwapEffect == D3DSWAPEFFECT_FLIP) || + (pParams->SwapEffect == D3DSWAPEFFECT_COPY) || + (pParams->SwapEffect == D3DSWAPEFFECT_DISCARD), D3DERR_INVALIDCALL); DBG("pParams(%p):\n" "BackBufferWidth: %u\n" @@ -145,11 +153,6 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This, pParams->FullScreen_RefreshRateInHz, pParams->PresentationInterval); - if (pParams->SwapEffect == D3DSWAPEFFECT_COPY && - pParams->BackBufferCount > 1) { - pParams->BackBufferCount = 1; - } - if (pParams->BackBufferCount > 3) { pParams->BackBufferCount = 3; } @@ -713,6 +716,10 @@ present( struct NineSwapChain9 *This, This->pipe->blit(This->pipe, &blit); } + /* The resource we present has to resolve fast clears + * if needed (and other things) */ + This->pipe->flush_resource(This->pipe, resource); + if (This->params.SwapEffect != D3DSWAPEFFECT_DISCARD) handle_draw_cursor_and_hud(This, resource); @@ -738,12 +745,6 @@ bypass_rendering: return D3DERR_WASSTILLDRAWING; } - if (This->present_buffers) - resource = This->present_buffers[0]; - else - resource = This->buffers[0]->base.resource; - This->pipe->flush_resource(This->pipe, resource); - if (!This->enable_threadpool) { This->tasks[0]=NULL; fence = swap_fences_pop_front(This); @@ -786,6 +787,19 @@ NineSwapChain9_Present( struct NineSwapChain9 *This, if (hr == D3DERR_WASSTILLDRAWING) return hr; + if (This->base.device->ex) { + if (NineSwapChain9_GetOccluded(This)) { + return S_PRESENT_OCCLUDED; + } + } else { + if (NineSwapChain9_GetOccluded(This)) { + This->base.device->device_needs_reset = TRUE; + } + if (This->base.device->device_needs_reset) { + return D3DERR_DEVICELOST; + } + } + switch (This->params.SwapEffect) { case D3DSWAPEFFECT_FLIP: UNTESTED(4); @@ -840,7 +854,6 @@ NineSwapChain9_Present( struct NineSwapChain9 *This, ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]); This->base.device->state.changed.group |= NINE_STATE_FB; - nine_update_state_framebuffer(This->base.device); return hr; } @@ -907,8 +920,9 @@ NineSwapChain9_GetBackBuffer( struct NineSwapChain9 *This, DBG("GetBackBuffer: This=%p iBackBuffer=%d Type=%d ppBackBuffer=%p\n", This, iBackBuffer, Type, ppBackBuffer); (void)user_error(Type == D3DBACKBUFFER_TYPE_MONO); + /* don't touch ppBackBuffer on error */ + user_assert(ppBackBuffer != NULL, D3DERR_INVALIDCALL); user_assert(iBackBuffer < This->params.BackBufferCount, D3DERR_INVALIDCALL); - user_assert(ppBackBuffer != NULL, E_POINTER); NineUnknown_AddRef(NineUnknown(This->buffers[iBackBuffer])); *ppBackBuffer = (IDirect3DSurface9 *)This->buffers[iBackBuffer]; @@ -990,3 +1004,13 @@ NineSwapChain9_new( struct NineDevice9 *pDevice, implicit, pPresent, pPresentationParameters, pCTX, hFocusWindow, NULL); } + +BOOL +NineSwapChain9_GetOccluded( struct NineSwapChain9 *This ) +{ + if (This->base.device->minor_version_num > 0) { + return ID3DPresent_GetWindowOccluded(This->present); + } + + return FALSE; +} diff --git a/src/gallium/state_trackers/nine/swapchain9.h b/src/gallium/state_trackers/nine/swapchain9.h index 5e48dde5004..4bd74f7b6ec 100644 --- a/src/gallium/state_trackers/nine/swapchain9.h +++ b/src/gallium/state_trackers/nine/swapchain9.h @@ -139,4 +139,7 @@ HRESULT WINAPI NineSwapChain9_GetPresentParameters( struct NineSwapChain9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters ); +BOOL +NineSwapChain9_GetOccluded( struct NineSwapChain9 *This ); + #endif /* _NINE_SWAPCHAIN9_H_ */ diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c index bc325c1335e..ada08cea90a 100644 --- a/src/gallium/state_trackers/nine/texture9.c +++ b/src/gallium/state_trackers/nine/texture9.c @@ -235,7 +235,7 @@ NineTexture9_dtor( struct NineTexture9 *This ) } if (This->managed_buffer) - FREE(This->managed_buffer); + align_free(This->managed_buffer); NineBaseTexture9_dtor(&This->base); } diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.c b/src/gallium/state_trackers/nine/vertexbuffer9.c index 8e2eaaf8ff9..10311b428fe 100644 --- a/src/gallium/state_trackers/nine/vertexbuffer9.c +++ b/src/gallium/state_trackers/nine/vertexbuffer9.c @@ -39,56 +39,13 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This, struct NineUnknownParams *pParams, D3DVERTEXBUFFER_DESC *pDesc ) { - struct pipe_resource *info = &This->base.info; HRESULT hr; DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This, pDesc->Size, pDesc->Usage, pDesc->Pool); - user_assert(pDesc->Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL); - - This->maps = MALLOC(sizeof(struct pipe_transfer *)); - if (!This->maps) - return E_OUTOFMEMORY; - This->nmaps = 0; - This->maxmaps = 1; - - This->pipe = pParams->device->pipe; - - info->screen = pParams->device->screen; - info->target = PIPE_BUFFER; - info->format = PIPE_FORMAT_R8_UNORM; - info->width0 = pDesc->Size; - info->flags = 0; - - info->bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE; - if (!(pDesc->Usage & D3DUSAGE_WRITEONLY)) - info->bind |= PIPE_BIND_TRANSFER_READ; - - info->usage = PIPE_USAGE_DEFAULT; - if (pDesc->Usage & D3DUSAGE_DYNAMIC) - info->usage = PIPE_USAGE_STREAM; - if (pDesc->Pool == D3DPOOL_SYSTEMMEM) - info->usage = PIPE_USAGE_STAGING; - - /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */ - /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */ - /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */ - /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */ - /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */ - if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING) - DBG("Application asked for Software Vertex Processing, " - "but this is unimplemented\n"); - /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */ - - info->height0 = 1; - info->depth0 = 1; - info->array_size = 1; - info->last_level = 0; - info->nr_samples = 0; - - hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE, - D3DRTYPE_VERTEXBUFFER, pDesc->Pool, pDesc->Usage); + hr = NineBuffer9_ctor(&This->base, pParams, D3DRTYPE_VERTEXBUFFER, + pDesc->Usage, pDesc->Size, pDesc->Pool); if (FAILED(hr)) return hr; @@ -102,85 +59,29 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This, void NineVertexBuffer9_dtor( struct NineVertexBuffer9 *This ) { - if (This->maps) { - while (This->nmaps) { - NineVertexBuffer9_Unlock(This); - } - FREE(This->maps); - } - - NineResource9_dtor(&This->base); + NineBuffer9_dtor(&This->base); +} + +struct pipe_resource * +NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This ) +{ + return NineBuffer9_GetResource(&This->base); } HRESULT WINAPI NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This, - UINT OffsetToLock, - UINT SizeToLock, - void **ppbData, - DWORD Flags ) + UINT OffsetToLock, + UINT SizeToLock, + void **ppbData, + DWORD Flags ) { - struct pipe_box box; - void *data; - const unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags); - - DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n", - This, This->base.resource, - OffsetToLock, SizeToLock, Flags); - - user_assert(ppbData, E_POINTER); - user_assert(!(Flags & ~(D3DLOCK_DISCARD | - D3DLOCK_DONOTWAIT | - D3DLOCK_NO_DIRTY_UPDATE | - D3DLOCK_NOSYSLOCK | - D3DLOCK_READONLY | - D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL); - - if (This->nmaps == This->maxmaps) { - struct pipe_transfer **newmaps = - REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps, - sizeof(struct pipe_transfer *)*(This->maxmaps << 1)); - if (newmaps == NULL) - return E_OUTOFMEMORY; - - This->maxmaps <<= 1; - This->maps = newmaps; - } - - if (SizeToLock == 0) { - SizeToLock = This->desc.Size - OffsetToLock; - user_warn(OffsetToLock != 0); - } - - u_box_1d(OffsetToLock, SizeToLock, &box); - - data = This->pipe->transfer_map(This->pipe, This->base.resource, 0, - usage, &box, &This->maps[This->nmaps]); - if (!data) { - DBG("pipe::transfer_map failed\n" - " usage = %x\n" - " box.x = %u\n" - " box.width = %u\n", - usage, box.x, box.width); - /* not sure what to return, msdn suggests this */ - if (Flags & D3DLOCK_DONOTWAIT) - return D3DERR_WASSTILLDRAWING; - return D3DERR_INVALIDCALL; - } - - This->nmaps++; - *ppbData = data; - - return D3D_OK; + return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags); } HRESULT WINAPI NineVertexBuffer9_Unlock( struct NineVertexBuffer9 *This ) { - DBG("This=%p\n", This); - - user_assert(This->nmaps > 0, D3DERR_INVALIDCALL); - This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]); - return D3D_OK; + return NineBuffer9_Unlock(&This->base); } HRESULT WINAPI diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.h b/src/gallium/state_trackers/nine/vertexbuffer9.h index 6174de4df08..859402b925b 100644 --- a/src/gallium/state_trackers/nine/vertexbuffer9.h +++ b/src/gallium/state_trackers/nine/vertexbuffer9.h @@ -22,8 +22,8 @@ #ifndef _NINE_VERTEXBUFFER9_H_ #define _NINE_VERTEXBUFFER9_H_ - #include "resource9.h" +#include "buffer9.h" struct pipe_screen; struct pipe_context; @@ -31,13 +31,10 @@ struct pipe_transfer; struct NineVertexBuffer9 { - struct NineResource9 base; + struct NineBuffer9 base; /* G3D */ struct pipe_context *pipe; - struct pipe_transfer **maps; - int nmaps, maxmaps; - D3DVERTEXBUFFER_DESC desc; }; static inline struct NineVertexBuffer9 * @@ -58,6 +55,12 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This, void NineVertexBuffer9_dtor( struct NineVertexBuffer9 *This ); +/*** Nine private ***/ + +struct pipe_resource * +NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This ); + +/*** Direct3D public ***/ HRESULT WINAPI NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This, diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.c b/src/gallium/state_trackers/nine/vertexdeclaration9.c index 2047b91abc4..36c594b5be3 100644 --- a/src/gallium/state_trackers/nine/vertexdeclaration9.c +++ b/src/gallium/state_trackers/nine/vertexdeclaration9.c @@ -174,24 +174,24 @@ NineVertexDeclaration9_ctor( struct NineVertexDeclaration9 *This, const D3DVERTEXELEMENT9 *pElements ) { const D3DCAPS9 *caps; - unsigned i; - + unsigned i, nelems; DBG("This=%p pParams=%p pElements=%p\n", This, pParams, pElements); - HRESULT hr = NineUnknown_ctor(&This->base, pParams); - if (FAILED(hr)) { return hr; } - /* wine */ - for (This->nelems = 0; - pElements[This->nelems].Stream != 0xFF; - ++This->nelems) { - user_assert(pElements[This->nelems].Type != D3DDECLTYPE_UNUSED, E_FAIL); - user_assert(!(pElements[This->nelems].Offset & 3), E_FAIL); + for (nelems = 0; + pElements[nelems].Stream != 0xFF; + ++nelems) { + user_assert(pElements[nelems].Type != D3DDECLTYPE_UNUSED, E_FAIL); + user_assert(!(pElements[nelems].Offset & 3), E_FAIL); } - caps = NineDevice9_GetCaps(This->base.device); - user_assert(This->nelems <= caps->MaxStreams, D3DERR_INVALIDCALL); + caps = NineDevice9_GetCaps(pParams->device); + user_assert(nelems <= caps->MaxStreams, D3DERR_INVALIDCALL); + HRESULT hr = NineUnknown_ctor(&This->base, pParams); + if (FAILED(hr)) { return hr; } + + This->nelems = nelems; This->decls = CALLOC(This->nelems+1, sizeof(D3DVERTEXELEMENT9)); This->elems = CALLOC(This->nelems, sizeof(struct pipe_vertex_element)); This->usage_map = CALLOC(This->nelems, sizeof(uint16_t)); @@ -203,6 +203,9 @@ NineVertexDeclaration9_ctor( struct NineVertexDeclaration9 *This, This->decls[i].UsageIndex); This->usage_map[i] = usage; + if (This->decls[i].Usage == D3DDECLUSAGE_POSITIONT) + This->position_t = TRUE; + This->elems[i].src_offset = This->decls[i].Offset; This->elems[i].instance_divisor = 0; This->elems[i].vertex_buffer_index = This->decls[i].Stream; diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.h b/src/gallium/state_trackers/nine/vertexdeclaration9.h index 655bcfbf165..e39f259440f 100644 --- a/src/gallium/state_trackers/nine/vertexdeclaration9.h +++ b/src/gallium/state_trackers/nine/vertexdeclaration9.h @@ -46,6 +46,8 @@ struct NineVertexDeclaration9 D3DVERTEXELEMENT9 *decls; DWORD fvf; + + BOOL position_t; }; static inline struct NineVertexDeclaration9 * NineVertexDeclaration9( void *data ) diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c index 0b9005685a9..f6988923caa 100644 --- a/src/gallium/state_trackers/nine/volume9.c +++ b/src/gallium/state_trackers/nine/volume9.c @@ -136,7 +136,7 @@ NineVolume9_dtor( struct NineVolume9 *This ) NineVolume9_UnlockBox(This); if (This->data) - FREE(This->data); + align_free(This->data); pipe_resource_reference(&This->resource, NULL); @@ -264,6 +264,13 @@ NineVolume9_LockBox( struct NineVolume9 *This, usage |= PIPE_TRANSFER_DONTBLOCK; if (pBox) { + user_assert(pBox->Right > pBox->Left, D3DERR_INVALIDCALL); + user_assert(pBox->Bottom > pBox->Top, D3DERR_INVALIDCALL); + user_assert(pBox->Back > pBox->Front, D3DERR_INVALIDCALL); + user_assert(pBox->Right <= This->desc.Width, D3DERR_INVALIDCALL); + user_assert(pBox->Bottom <= This->desc.Height, D3DERR_INVALIDCALL); + user_assert(pBox->Back <= This->desc.Depth, D3DERR_INVALIDCALL); + d3dbox_to_pipe_box(&box, pBox); if (u_box_clip_2d(&box, &box, This->desc.Width, This->desc.Height) < 0) { DBG("Locked volume intersection empty.\n"); diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c index f66ed896e62..b4536828909 100644 --- a/src/gallium/state_trackers/omx/vid_dec_h264.c +++ b/src/gallium/state_trackers/omx/vid_dec_h264.c @@ -35,6 +35,7 @@ #include "util/u_memory.h" #include "util/u_video.h" #include "vl/vl_rbsp.h" +#include "vl/vl_zscan.h" #include "entrypoint.h" #include "vid_dec.h" @@ -205,6 +206,7 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si const uint8_t *defaultList, const uint8_t *fallbackList) { unsigned lastScale = 8, nextScale = 8; + const int *list; unsigned i; /* (pic|seq)_scaling_list_present_flag[i] */ @@ -214,6 +216,7 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si return; } + list = (sizeOfScalingList == 16) ? vl_zscan_normal_16 : vl_zscan_normal; for (i = 0; i < sizeOfScalingList; ++i ) { if (nextScale != 0) { @@ -224,8 +227,8 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si return; } } - scalingList[i] = nextScale == 0 ? lastScale : nextScale; - lastScale = scalingList[i]; + scalingList[list[i]] = nextScale == 0 ? lastScale : nextScale; + lastScale = scalingList[list[i]]; } } diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c index 5cd1ba7815c..233db8ae372 100644 --- a/src/gallium/targets/d3dadapter9/drm.c +++ b/src/gallium/targets/d3dadapter9/drm.c @@ -53,22 +53,29 @@ DRI_CONF_BEGIN DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1) DRI_CONF_SECTION_END DRI_CONF_SECTION_NINE + DRI_CONF_NINE_OVERRIDEVENDOR(-1) DRI_CONF_NINE_THROTTLE(-2) DRI_CONF_NINE_THREADSUBMIT("false") DRI_CONF_SECTION_END DRI_CONF_END; -/* define fallback value here: NVIDIA GeForce GTX 970 */ -#define FALLBACK_NAME "NV124" -#define FALLBACK_DEVID 0x13C2 -#define FALLBACK_VENID 0x10de +struct fallback_card_config { + const char *name; + unsigned vendor_id; + unsigned device_id; +} fallback_cards[] = { + {"NV124", 0x10de, 0x13C2}, /* NVIDIA GeForce GTX 970 */ + {"HAWAII", 0x1002, 0x67b1}, /* AMD Radeon R9 290 */ + {"Haswell Mobile", 0x8086, 0x13C2}, /* Intel Haswell Mobile */ + {"SVGA3D", 0x15ad, 0x0405}, /* VMware SVGA 3D */ +}; /* prototypes */ void d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid, - unsigned fallback_ven, - unsigned fallback_dev, - const char* fallback_name ); + unsigned fallback_ven, + unsigned fallback_dev, + const char* fallback_name ); void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid); @@ -118,9 +125,9 @@ get_bus_info( int fd, *subsysid = 0; *revision = 0; } else { - DBG("Unable to detect card. Faking %s\n", FALLBACK_NAME); - *vendorid = FALLBACK_VENID; - *deviceid = FALLBACK_DEVID; + DBG("Unable to detect card. Faking %s\n", fallback_cards[0].name); + *vendorid = fallback_cards[0].vendor_id; + *deviceid = fallback_cards[0].device_id; *subsysid = 0; *revision = 0; } @@ -128,8 +135,10 @@ get_bus_info( int fd, static inline void read_descriptor( struct d3dadapter9_context *ctx, - int fd ) + int fd, int override_vendorid ) { + unsigned i; + BOOL found; D3DADAPTER_IDENTIFIER9 *drvid = &ctx->identifier; memset(drvid, 0, sizeof(*drvid)); @@ -140,9 +149,30 @@ read_descriptor( struct d3dadapter9_context *ctx, strncpy(drvid->Description, ctx->hal->get_name(ctx->hal), sizeof(drvid->Description)); + if (override_vendorid > 0) { + found = FALSE; + /* fill in device_id and card name for fake vendor */ + for (i = 0; i < sizeof(fallback_cards)/sizeof(fallback_cards[0]); i++) { + if (fallback_cards[i].vendor_id == override_vendorid) { + DBG("Faking card '%s' vendor 0x%04x, device 0x%04x\n", + fallback_cards[i].name, + fallback_cards[i].vendor_id, + fallback_cards[i].device_id); + drvid->VendorId = fallback_cards[i].vendor_id; + drvid->DeviceId = fallback_cards[i].device_id; + strncpy(drvid->Description, fallback_cards[i].name, + sizeof(drvid->Description)); + found = TRUE; + break; + } + } + if (!found) { + DBG("Unknown fake vendor 0x%04x! Using detected vendor !\n", override_vendorid); + } + } /* choose fall-back vendor if necessary to allow * the following functions to return sane results */ - d3d_match_vendor_id(drvid, FALLBACK_VENID, FALLBACK_DEVID, FALLBACK_NAME); + d3d_match_vendor_id(drvid, fallback_cards[0].vendor_id, fallback_cards[0].device_id, fallback_cards[0].name); /* fill in driver name and version info */ d3d_fill_driver_version(drvid); /* override Description field with Windows like names */ @@ -177,6 +207,7 @@ drm_create_adapter( int fd, driOptionCache defaultInitOptions; driOptionCache userInitOptions; int throttling_value_user = -2; + int override_vendorid = -1; if (!ctx) { return E_OUTOFMEMORY; } @@ -247,6 +278,10 @@ drm_create_adapter( int fd, "You should not expect any benefit."); } + if (driCheckOption(&userInitOptions, "override_vendorid", DRI_INT)) { + override_vendorid = driQueryOptioni(&userInitOptions, "override_vendorid"); + } + driDestroyOptionCache(&userInitOptions); driDestroyOptionInfo(&defaultInitOptions); @@ -260,7 +295,7 @@ drm_create_adapter( int fd, } /* read out PCI info */ - read_descriptor(&ctx->base, fd); + read_descriptor(&ctx->base, fd, override_vendorid); /* create and return new ID3DAdapter9 */ hr = NineAdapter9_new(&ctx->base, (struct NineAdapter9 **)ppAdapter); diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index d4030852943..2a7738e6979 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -94,7 +94,7 @@ gallium_DRIVERS += libmesa_winsys_vc4 libmesa_pipe_vc4 endif ifneq ($(filter virgl,$(MESA_GPU_DRIVERS)),) LOCAL_CFLAGS += -DGALLIUM_VIRGL -gallium_DRIVERS += libmesa_winsys_virgl libmesa_pipe_virgl +gallium_DRIVERS += libmesa_winsys_virgl libmesa_winsys_virgl_vtest libmesa_pipe_virgl endif ifneq ($(filter vmwgfx,$(MESA_GPU_DRIVERS)),) gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 30a1aa8d6ba..59a801b1426 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -288,16 +288,17 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, request.alloc_size = size; request.phys_alignment = alignment; - if (initial_domain & RADEON_DOMAIN_VRAM) { + if (initial_domain & RADEON_DOMAIN_VRAM) request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; - if (flags & RADEON_FLAG_CPU_ACCESS) - request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - } - if (initial_domain & RADEON_DOMAIN_GTT) { + if (initial_domain & RADEON_DOMAIN_GTT) request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; - if (flags & RADEON_FLAG_GTT_WC) - request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; - } + + if (flags & RADEON_FLAG_CPU_ACCESS) + request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (flags & RADEON_FLAG_NO_CPU_ACCESS) + request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + if (flags & RADEON_FLAG_GTT_WC) + request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); if (r) { diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 7393a1d1eb4..dab27dfba96 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -68,7 +68,6 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info) switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) { case CIK__PIPE_CONFIG__ADDR_SURF_P2: - default: return 2; case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16: case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16: @@ -86,23 +85,13 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info) case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16: case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16: return 16; + default: + fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n"); + assert(!"this should never occur"); + return 2; } } -/* Convert Sea Islands register values GB_ADDR_CFG and MC_ADDR_CFG - * into GB_TILING_CONFIG register which is only present on R600-R700. */ -static unsigned r600_get_gb_tiling_config(struct amdgpu_gpu_info *info) -{ - unsigned num_pipes = info->gb_addr_cfg & 0x7; - unsigned num_banks = info->mc_arb_ramcfg & 0x3; - unsigned pipe_interleave_bytes = (info->gb_addr_cfg >> 4) & 0x7; - unsigned row_size = (info->gb_addr_cfg >> 28) & 0x3; - - return num_pipes | (num_banks << 4) | - (pipe_interleave_bytes << 8) | - (row_size << 12); -} - /* Helper function to do the ioctls needed for setup and init. */ static boolean do_winsys_init(struct amdgpu_winsys *ws) { @@ -251,20 +240,19 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws) ws->info.gart_size = gtt.heap_size; ws->info.vram_size = vram.heap_size; /* convert the shader clock from KHz to MHz */ - ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000; + ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000; ws->info.max_se = ws->amdinfo.num_shader_engines; ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine; ws->info.has_uvd = uvd.available_rings != 0; ws->info.vce_fw_version = vce.available_rings ? vce_version : 0; ws->info.has_userptr = TRUE; - ws->info.r600_num_backends = ws->amdinfo.rb_pipes; - ws->info.r600_clock_crystal_freq = ws->amdinfo.gpu_counter_freq; - ws->info.r600_tiling_config = r600_get_gb_tiling_config(&ws->amdinfo); - ws->info.r600_num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo); - ws->info.r600_max_pipes = ws->amdinfo.max_quad_shader_pipes; /* TODO: is this correct? */ - ws->info.r600_virtual_address = TRUE; - ws->info.r600_has_dma = dma.available_rings != 0; + ws->info.num_render_backends = ws->amdinfo.rb_pipes; + ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq; + ws->info.num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo); + ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7); + ws->info.has_virtual_memory = TRUE; + ws->info.has_sdma = dma.available_rings != 0; /* Get the number of good compute units. */ ws->info.num_good_compute_units = 0; @@ -276,7 +264,7 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws) memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode, sizeof(ws->amdinfo.gb_tile_mode)); ws->info.si_tile_mode_array_valid = TRUE; - ws->info.si_backend_enabled_mask = ws->amdinfo.enabled_rb_pipes_mask; + ws->info.enabled_rb_mask = ws->amdinfo.enabled_rb_pipes_mask; memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode, sizeof(ws->amdinfo.gb_macro_tile_mode)); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 3ec6a065c7d..7e9ed0ca0fe 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -281,7 +281,7 @@ void radeon_bo_destroy(struct pb_buffer *_buf) if (bo->ptr) os_munmap(bo->ptr, bo->base.size); - if (rws->info.r600_virtual_address) { + if (rws->info.has_virtual_memory) { if (rws->va_unmap_working) { struct drm_radeon_gem_va va; @@ -552,7 +552,7 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws, pipe_mutex_init(bo->map_mutex); pb_cache_init_entry(&rws->bo_cache, &bo->cache_entry, &bo->base); - if (rws->info.r600_virtual_address) { + if (rws->info.has_virtual_memory) { struct drm_radeon_gem_va va; bo->va = radeon_bomgr_find_va(rws, size, alignment); @@ -834,7 +834,7 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws, pipe_mutex_unlock(ws->bo_handles_mutex); - if (ws->info.r600_virtual_address) { + if (ws->info.has_virtual_memory) { struct drm_radeon_gem_va va; bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20); @@ -966,7 +966,7 @@ done: if (stride) *stride = whandle->stride; - if (ws->info.r600_virtual_address && !bo->va) { + if (ws->info.has_virtual_memory && !bo->va) { struct drm_radeon_gem_va va; bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 085071c381c..155a13008a4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -283,7 +283,7 @@ static unsigned radeon_add_buffer(struct radeon_drm_cs *cs, * This doesn't have to be done if virtual memory is enabled, * because there is no offset patching with virtual memory. */ - if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) { + if (cs->base.ring_type != RING_DMA || cs->ws->info.has_virtual_memory) { return i; } } @@ -540,7 +540,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; - if (cs->ws->info.r600_virtual_address) { + if (cs->ws->info.has_virtual_memory) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; @@ -567,7 +567,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } - if (cs->ws->info.r600_virtual_address) { + if (cs->ws->info.has_virtual_memory) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 8a1ed3ae08c..35dc7e69dcf 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -298,10 +298,10 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) } /* Check for dma */ - ws->info.r600_has_dma = FALSE; + ws->info.has_sdma = FALSE; /* DMA is disabled on R700. There is IB corruption and hangs. */ if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) { - ws->info.r600_has_dma = TRUE; + ws->info.has_sdma = TRUE; } /* Check for UVD and VCE */ @@ -351,11 +351,11 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) /* Get max clock frequency info and convert it to MHz */ radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL, - &ws->info.max_sclk); - ws->info.max_sclk /= 1000; + &ws->info.max_shader_clock); + ws->info.max_shader_clock /= 1000; radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, - &ws->info.si_backend_enabled_mask); + &ws->info.enabled_rb_mask); ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); @@ -372,51 +372,72 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) return FALSE; } else if (ws->gen >= DRV_R600) { + uint32_t tiling_config = 0; + if (ws->info.drm_minor >= 9 && !radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, "num backends", - &ws->info.r600_num_backends)) + &ws->info.num_render_backends)) return FALSE; /* get the GPU counter frequency, failure is not fatal */ radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL, - &ws->info.r600_clock_crystal_freq); + &ws->info.clock_crystal_freq); radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL, - &ws->info.r600_tiling_config); + &tiling_config); + + ws->info.r600_num_banks = + ws->info.chip_class >= EVERGREEN ? + 4 << ((tiling_config & 0xf0) >> 4) : + 4 << ((tiling_config & 0x30) >> 4); + + ws->info.pipe_interleave_bytes = + ws->info.chip_class >= EVERGREEN ? + 256 << ((tiling_config & 0xf00) >> 8) : + 256 << ((tiling_config & 0xc0) >> 6); + + if (!ws->info.pipe_interleave_bytes) + ws->info.pipe_interleave_bytes = + ws->info.chip_class >= EVERGREEN ? 512 : 256; if (ws->info.drm_minor >= 11) { radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, - &ws->info.r600_num_tile_pipes); + &ws->info.num_tile_pipes); if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, - &ws->info.r600_backend_map)) - ws->info.r600_backend_map_valid = TRUE; + &ws->info.r600_gb_backend_map)) + ws->info.r600_gb_backend_map_valid = TRUE; + } else { + ws->info.num_tile_pipes = + ws->info.chip_class >= EVERGREEN ? + 1 << (tiling_config & 0xf) : + 1 << ((tiling_config & 0xe) >> 1); } - ws->info.r600_virtual_address = FALSE; + ws->info.has_virtual_memory = FALSE; if (ws->info.drm_minor >= 13) { uint32_t ib_vm_max_size; - ws->info.r600_virtual_address = TRUE; + ws->info.has_virtual_memory = TRUE; if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, &ws->va_start)) - ws->info.r600_virtual_address = FALSE; + ws->info.has_virtual_memory = FALSE; if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL, &ib_vm_max_size)) - ws->info.r600_virtual_address = FALSE; + ws->info.has_virtual_memory = FALSE; radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL, &ws->va_unmap_working); } if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", FALSE)) - ws->info.r600_virtual_address = FALSE; + ws->info.has_virtual_memory = FALSE; } /* Get max pipes, this is only needed for compute shaders. All evergreen+ * chips have at least 2 pipes, so we use 2 as a default. */ - ws->info.r600_max_pipes = 2; + ws->info.r600_max_quad_pipes = 2; radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL, - &ws->info.r600_max_pipes); + &ws->info.r600_max_quad_pipes); /* All GPUs have at least one compute unit */ ws->info.num_good_compute_units = 1; @@ -742,7 +763,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) ws->fd = dup(fd); if (!do_winsys_init(ws)) - goto fail; + goto fail1; pb_cache_init(&ws->bo_cache, 500000, 2.0f, 0, MIN2(ws->info.vram_size, ws->info.gart_size), @@ -812,8 +833,9 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) return &ws->base; fail: - pipe_mutex_unlock(fd_tab_mutex); pb_cache_deinit(&ws->bo_cache); +fail1: + pipe_mutex_unlock(fd_tab_mutex); if (ws->surf_man) radeon_surface_manager_free(ws->surf_man); if (ws->fd >= 0) diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_public.h b/src/gallium/winsys/virgl/drm/virgl_drm_public.h index be01021ca9a..f70f0e50448 100644 --- a/src/gallium/winsys/virgl/drm/virgl_drm_public.h +++ b/src/gallium/winsys/virgl/drm/virgl_drm_public.h @@ -23,8 +23,8 @@ #ifndef VIRGL_DRM_PUBLIC_H #define VIRGL_DRM_PUBLIC_H -struct virgl_winsys; +struct pipe_screen; -struct virgl_winsys *virgl_drm_winsys_create(int drmFD); +struct pipe_screen *virgl_drm_screen_create(int fd); #endif diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c index b5d4435e5e6..ba009882ec2 100644 --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c @@ -25,6 +25,7 @@ #include <fcntl.h> #include <stdio.h> #include <sys/ioctl.h> +#include <sys/stat.h> #include "os/os_mman.h" #include "os/os_time.h" @@ -33,6 +34,8 @@ #include "util/u_hash_table.h" #include "util/u_inlines.h" #include "state_tracker/drm_driver.h" +#include "virgl/virgl_screen.h" +#include "virgl/virgl_public.h" #include <xf86drm.h> #include "virtgpu_drm.h" @@ -50,10 +53,17 @@ static void virgl_hw_res_destroy(struct virgl_drm_winsys *qdws, { struct drm_gem_close args; - if (res->name) { + if (res->flinked) { + pipe_mutex_lock(qdws->bo_handles_mutex); + util_hash_table_remove(qdws->bo_names, + (void *)(uintptr_t)res->flink); + pipe_mutex_unlock(qdws->bo_handles_mutex); + } + + if (res->bo_handle) { pipe_mutex_lock(qdws->bo_handles_mutex); util_hash_table_remove(qdws->bo_handles, - (void *)(uintptr_t)res->name); + (void *)(uintptr_t)res->bo_handle); pipe_mutex_unlock(qdws->bo_handles_mutex); } @@ -109,6 +119,7 @@ virgl_drm_winsys_destroy(struct virgl_winsys *qws) virgl_cache_flush(qdws); util_hash_table_destroy(qdws->bo_handles); + util_hash_table_destroy(qdws->bo_names); pipe_mutex_destroy(qdws->bo_handles_mutex); pipe_mutex_destroy(qdws->mutex); @@ -367,11 +378,12 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws, struct drm_gem_open open_arg = {}; struct drm_virtgpu_resource_info info_arg = {}; struct virgl_hw_res *res; + uint32_t handle = whandle->handle; pipe_mutex_lock(qdws->bo_handles_mutex); if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { - res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)whandle->handle); + res = util_hash_table_get(qdws->bo_names, (void*)(uintptr_t)handle); if (res) { struct virgl_hw_res *r = NULL; virgl_drm_resource_reference(qdws, &r, res); @@ -379,21 +391,31 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws, } } - res = CALLOC_STRUCT(virgl_hw_res); - if (!res) - goto done; - if (whandle->type == DRM_API_HANDLE_TYPE_FD) { int r; - uint32_t handle; r = drmPrimeFDToHandle(qdws->fd, whandle->handle, &handle); if (r) { - FREE(res); res = NULL; goto done; } + } + + res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)handle); + fprintf(stderr, "resource %p for handle %d, pfd=%d\n", res, handle, whandle->handle); + if (res) { + struct virgl_hw_res *r = NULL; + virgl_drm_resource_reference(qdws, &r, res); + goto done; + } + + res = CALLOC_STRUCT(virgl_hw_res); + if (!res) + goto done; + + if (whandle->type == DRM_API_HANDLE_TYPE_FD) { res->bo_handle = handle; } else { + fprintf(stderr, "gem open handle %d\n", handle); memset(&open_arg, 0, sizeof(open_arg)); open_arg.name = whandle->handle; if (drmIoctl(qdws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) { @@ -403,7 +425,7 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws, } res->bo_handle = open_arg.handle; } - res->name = whandle->handle; + res->name = handle; memset(&info_arg, 0, sizeof(info_arg)); info_arg.bo_handle = res->bo_handle; @@ -422,7 +444,7 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws, pipe_reference_init(&res->reference, 1); res->num_cs_references = 0; - util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)whandle->handle, res); + util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)handle, res); done: pipe_mutex_unlock(qdws->bo_handles_mutex); @@ -452,7 +474,7 @@ static boolean virgl_drm_winsys_resource_get_handle(struct virgl_winsys *qws, res->flink = flink.name; pipe_mutex_lock(qdws->bo_handles_mutex); - util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)res->flink, res); + util_hash_table_set(qdws->bo_names, (void *)(uintptr_t)res->flink, res); pipe_mutex_unlock(qdws->bo_handles_mutex); } whandle->handle = res->flink; @@ -732,7 +754,7 @@ static void virgl_fence_reference(struct virgl_winsys *vws, } -struct virgl_winsys * +static struct virgl_winsys * virgl_drm_winsys_create(int drmFD) { struct virgl_drm_winsys *qdws; @@ -748,6 +770,7 @@ virgl_drm_winsys_create(int drmFD) pipe_mutex_init(qdws->mutex); pipe_mutex_init(qdws->bo_handles_mutex); qdws->bo_handles = util_hash_table_create(handle_hash, handle_compare); + qdws->bo_names = util_hash_table_create(handle_hash, handle_compare); qdws->base.destroy = virgl_drm_winsys_destroy; qdws->base.transfer_put = virgl_bo_transfer_put; @@ -772,3 +795,87 @@ virgl_drm_winsys_create(int drmFD) return &qdws->base; } + +static struct util_hash_table *fd_tab = NULL; +pipe_static_mutex(virgl_screen_mutex); + +static void +virgl_drm_screen_destroy(struct pipe_screen *pscreen) +{ + struct virgl_screen *screen = virgl_screen(pscreen); + boolean destroy; + + pipe_mutex_lock(virgl_screen_mutex); + destroy = --screen->refcnt == 0; + if (destroy) { + int fd = virgl_drm_winsys(screen->vws)->fd; + util_hash_table_remove(fd_tab, intptr_to_pointer(fd)); + } + pipe_mutex_unlock(virgl_screen_mutex); + + if (destroy) { + pscreen->destroy = screen->winsys_priv; + pscreen->destroy(pscreen); + } +} + +static unsigned hash_fd(void *key) +{ + int fd = pointer_to_intptr(key); + struct stat stat; + fstat(fd, &stat); + + return stat.st_dev ^ stat.st_ino ^ stat.st_rdev; +} + +static int compare_fd(void *key1, void *key2) +{ + int fd1 = pointer_to_intptr(key1); + int fd2 = pointer_to_intptr(key2); + struct stat stat1, stat2; + fstat(fd1, &stat1); + fstat(fd2, &stat2); + + return stat1.st_dev != stat2.st_dev || + stat1.st_ino != stat2.st_ino || + stat1.st_rdev != stat2.st_rdev; +} + +struct pipe_screen * +virgl_drm_screen_create(int fd) +{ + struct pipe_screen *pscreen = NULL; + + pipe_mutex_lock(virgl_screen_mutex); + if (!fd_tab) { + fd_tab = util_hash_table_create(hash_fd, compare_fd); + if (!fd_tab) + goto unlock; + } + + pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd)); + if (pscreen) { + virgl_screen(pscreen)->refcnt++; + } else { + struct virgl_winsys *vws; + int dup_fd = dup(fd); + + vws = virgl_drm_winsys_create(dup_fd); + + pscreen = virgl_create_screen(vws); + if (pscreen) { + util_hash_table_set(fd_tab, intptr_to_pointer(dup_fd), pscreen); + + /* Bit of a hack, to avoid circular linkage dependency, + * ie. pipe driver having to call in to winsys, we + * override the pipe drivers screen->destroy(): + */ + virgl_screen(pscreen)->winsys_priv = pscreen->destroy; + pscreen->destroy = virgl_drm_screen_destroy; + } + } + +unlock: + pipe_mutex_unlock(virgl_screen_mutex); + return pscreen; +} diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h index da85ff87d2a..ffd7658ca81 100644 --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h @@ -62,6 +62,7 @@ struct virgl_drm_winsys pipe_mutex mutex; struct util_hash_table *bo_handles; + struct util_hash_table *bo_names; pipe_mutex bo_handles_mutex; }; diff --git a/src/gallium/winsys/virgl/vtest/Android.mk b/src/gallium/winsys/virgl/vtest/Android.mk new file mode 100644 index 00000000000..3e084e44ceb --- /dev/null +++ b/src/gallium/winsys/virgl/vtest/Android.mk @@ -0,0 +1,33 @@ +# Copyright (C) 2014 Emil Velikov <[email protected]> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_MODULE := libmesa_winsys_virgl_vtest + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c index 651915aed71..77103492a4f 100644 --- a/src/glx/dri2_glx.c +++ b/src/glx/dri2_glx.c @@ -1102,9 +1102,14 @@ dri2BindExtensions(struct dri2_screen *psc, struct glx_display * priv, __glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context"); __glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile"); - if ((mask & (1 << __DRI_API_GLES2)) != 0) - __glXEnableDirectExtension(&psc->base, - "GLX_EXT_create_context_es2_profile"); + if ((mask & ((1 << __DRI_API_GLES) | + (1 << __DRI_API_GLES2) | + (1 << __DRI_API_GLES3))) != 0) { + __glXEnableDirectExtension(&psc->base, + "GLX_EXT_create_context_es_profile"); + __glXEnableDirectExtension(&psc->base, + "GLX_EXT_create_context_es2_profile"); + } } for (i = 0; extensions[i]; i++) { diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c index 8bdbb9caf56..6054ffc3dc1 100644 --- a/src/glx/dri3_glx.c +++ b/src/glx/dri3_glx.c @@ -665,9 +665,14 @@ dri3_bind_extensions(struct dri3_screen *psc, struct glx_display * priv, __glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context"); __glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile"); - if ((mask & (1 << __DRI_API_GLES2)) != 0) + if ((mask & ((1 << __DRI_API_GLES) | + (1 << __DRI_API_GLES2) | + (1 << __DRI_API_GLES3))) != 0) { + __glXEnableDirectExtension(&psc->base, + "GLX_EXT_create_context_es_profile"); __glXEnableDirectExtension(&psc->base, "GLX_EXT_create_context_es2_profile"); + } for (i = 0; extensions[i]; i++) { /* when on a different gpu than the server, the server pixmaps diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c index 8a56385c4bd..6728d38fa0a 100644 --- a/src/glx/dri_common.c +++ b/src/glx/dri_common.c @@ -547,9 +547,18 @@ dri2_convert_glx_attribs(unsigned num_attribs, const uint32_t *attribs, case GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB: *api = __DRI_API_OPENGL; break; - case GLX_CONTEXT_ES2_PROFILE_BIT_EXT: - *api = __DRI_API_GLES2; - break; + case GLX_CONTEXT_ES_PROFILE_BIT_EXT: + if (*major_ver >= 3) + *api = __DRI_API_GLES3; + else if (*major_ver == 2 && *minor_ver == 0) + *api = __DRI_API_GLES2; + else if (*major_ver == 1 && *minor_ver < 2) + *api = __DRI_API_GLES; + else { + *error = __DRI_CTX_ERROR_BAD_API; + return false; + } + break; default: *error = __DRI_CTX_ERROR_BAD_API; return false; @@ -580,19 +589,6 @@ dri2_convert_glx_attribs(unsigned num_attribs, const uint32_t *attribs, return false; } - /* The GLX_EXT_create_context_es2_profile spec says: - * - * "... If the version requested is 2.0, and the - * GLX_CONTEXT_ES2_PROFILE_BIT_EXT bit is set in the - * GLX_CONTEXT_PROFILE_MASK_ARB attribute (see below), then the context - * returned will implement OpenGL ES 2.0. This is the only way in which - * an implementation may request an OpenGL ES 2.0 context." - */ - if (*api == __DRI_API_GLES2 && (*major_ver != 2 || *minor_ver != 0)) { - *error = __DRI_CTX_ERROR_BAD_API; - return false; - } - *error = __DRI_CTX_ERROR_SUCCESS; return true; } diff --git a/src/glx/drisw_glx.c b/src/glx/drisw_glx.c index 76cc3214b7b..241ac7f6d2c 100644 --- a/src/glx/drisw_glx.c +++ b/src/glx/drisw_glx.c @@ -623,9 +623,11 @@ driswBindExtensions(struct drisw_screen *psc, const __DRIextension **extensions) __glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context"); __glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile"); - /* DRISW version >= 2 implies support for OpenGL ES 2.0. + /* DRISW version >= 2 implies support for OpenGL ES. */ __glXEnableDirectExtension(&psc->base, + "GLX_EXT_create_context_es_profile"); + __glXEnableDirectExtension(&psc->base, "GLX_EXT_create_context_es2_profile"); } diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c index 3b29aef1234..22b078ce484 100644 --- a/src/glx/glxextensions.c +++ b/src/glx/glxextensions.c @@ -146,6 +146,7 @@ static const struct extension_info known_glx_extensions[] = { { GLX(EXT_fbconfig_packed_float), VER(0,0), Y, Y, N, N }, { GLX(EXT_framebuffer_sRGB), VER(0,0), Y, Y, N, N }, { GLX(EXT_create_context_es2_profile), VER(0,0), Y, N, N, N }, + { GLX(EXT_create_context_es_profile), VER(0,0), Y, N, N, N }, { GLX(MESA_copy_sub_buffer), VER(0,0), Y, N, N, N }, { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N }, { GLX(MESA_query_renderer), VER(0,0), Y, N, N, Y }, diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h index 3a9bc823052..906b3fc16c0 100644 --- a/src/glx/glxextensions.h +++ b/src/glx/glxextensions.h @@ -45,6 +45,7 @@ enum EXT_import_context_bit, EXT_framebuffer_sRGB_bit, EXT_fbconfig_packed_float_bit, + EXT_create_context_es_profile_bit, EXT_create_context_es2_profile_bit, MESA_copy_sub_buffer_bit, MESA_depth_float_bit, diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index d7ab3bff4df..db98ac05fd9 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -8247,7 +8247,14 @@ <xi:include href="ARB_multi_bind.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> -<!-- ARB extensions 148 - 153 --> +<category name="GL_ARB_query_buffer_object" number="148"> + <enum name="QUERY_RESULT_NO_WAIT" value="0x9194"/> + <enum name="QUERY_BUFFER" value="0x9192"/> + <enum name="QUERY_BUFFER_BINDING" value="0x9193"/> + <enum name="QUERY_BUFFER_BARRIER_BIT" value="0x00008000"/> +</category> + +<!-- ARB extensions 149 - 153 --> <xi:include href="ARB_indirect_parameters.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> @@ -12661,6 +12668,12 @@ <enum name="FRAMEBUFFER_SRGB_CAPABLE_EXT" value="0x8DBA"/> </category> +<category name="GL_ATI_meminfo" number="359"> + <enum name="VBO_FREE_MEMORY_ATI" value="0x87FB" /> + <enum name="TEXTURE_FREE_MEMORY_ATI" value="0x87FC" /> + <enum name="RENDERBUFFER_FREE_MEMORY_ATI" value="0x87FD" /> +</category> + <xi:include href="AMD_performance_monitor.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> <category name="GL_APPLE_texture_range" number="367"> @@ -12714,6 +12727,14 @@ <enum name="EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD" value="0x9160"/> </category> +<category name="GL_NVX_gpu_memory_info" number="438"> + <enum name="GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX" value="0x9047" /> + <enum name="GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX" value="0x9048" /> + <enum name="GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX" value="0x9049" /> + <enum name="GPU_MEMORY_INFO_EVICTION_COUNT_NVX" value="0x904A" /> + <enum name="GPU_MEMORY_INFO_EVICTED_MEMORY_NVX" value="0x904B" /> +</category> + <xi:include href="INTEL_performance_query.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> <category name="GL_EXT_polygon_offset_clamp" number="460"> diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 7af8becd607..ffe560faa3d 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -377,6 +377,7 @@ VBO_FILES = \ vbo/vbo_exec_eval.c \ vbo/vbo_exec.h \ vbo/vbo.h \ + vbo/vbo_minmax_index.c \ vbo/vbo_noop.c \ vbo/vbo_noop.h \ vbo/vbo_primitive_restart.c \ @@ -393,6 +394,7 @@ VBO_FILES = \ STATETRACKER_FILES = \ state_tracker/st_atom_array.c \ + state_tracker/st_atom_atomicbuf.c \ state_tracker/st_atom_blend.c \ state_tracker/st_atom.c \ state_tracker/st_atom_clip.c \ @@ -409,6 +411,7 @@ STATETRACKER_FILES = \ state_tracker/st_atom_shader.c \ state_tracker/st_atom_shader.h \ state_tracker/st_atom_stipple.c \ + state_tracker/st_atom_storagebuf.c \ state_tracker/st_atom_tess.c \ state_tracker/st_atom_texture.c \ state_tracker/st_atom_viewport.c \ diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h index 55e926b239e..e5cbc465871 100644 --- a/src/mesa/drivers/dri/common/xmlpool/t_options.h +++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h @@ -363,3 +363,8 @@ DRI_CONF_OPT_END DRI_CONF_OPT_BEGIN_B(thread_submit, def) \ DRI_CONF_DESC(en,gettext("Use an additional thread to submit buffers.")) \ DRI_CONF_OPT_END + +#define DRI_CONF_NINE_OVERRIDEVENDOR(def) \ +DRI_CONF_OPT_BEGIN(override_vendorid, int, def) \ + DRI_CONF_DESC(en,"Define the vendor_id to report. This allows faking another hardware vendor.") \ +DRI_CONF_OPT_END diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index 0401e397031..00e44af2f8d 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -23,7 +23,7 @@ #include "brw_compiler.h" #include "brw_context.h" -#include "nir.h" +#include "compiler/nir/nir.h" #include "main/errors.h" #include "util/debug.h" diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 1032e5a8175..44d2fe4d9e4 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -167,6 +167,19 @@ intel_viewport(struct gl_context *ctx) } static void +intel_update_framebuffer(struct gl_context *ctx, + struct gl_framebuffer *fb) +{ + struct brw_context *brw = brw_context(ctx); + + /* Quantize the derived default number of samples + */ + fb->DefaultGeometry._NumSamples = + intel_quantize_num_samples(brw->intelScreen, + fb->DefaultGeometry.NumSamples); +} + +static void intel_update_state(struct gl_context * ctx, GLuint new_state) { struct brw_context *brw = brw_context(ctx); @@ -245,6 +258,12 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) } _mesa_lock_context_textures(ctx); + + if (new_state & _NEW_BUFFERS) { + intel_update_framebuffer(ctx, ctx->DrawBuffer); + if (ctx->DrawBuffer != ctx->ReadBuffer) + intel_update_framebuffer(ctx, ctx->ReadBuffer); + } } #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp index 994c699bb5a..d7a1456bce0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp @@ -268,7 +268,7 @@ fs_visitor::opt_combine_constants() qsort(table.imm, table.len, sizeof(struct imm), compare); /* Insert MOVs to load the constant values into GRFs. */ - fs_reg reg(VGRF, alloc.allocate(dispatch_width / 8)); + fs_reg reg(VGRF, alloc.allocate(1)); reg.stride = 0; for (int i = 0; i < table.len; i++) { struct imm *imm = &table.imm[i]; @@ -284,8 +284,8 @@ fs_visitor::opt_combine_constants() imm->subreg_offset = reg.subreg_offset; reg.subreg_offset += sizeof(float); - if ((unsigned)reg.subreg_offset == dispatch_width * sizeof(float)) { - reg.nr = alloc.allocate(dispatch_width / 8); + if ((unsigned)reg.subreg_offset == 8 * sizeof(float)) { + reg.nr = alloc.allocate(1); reg.subreg_offset = 0; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 6c3a8d70677..cd7f3fe851a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1144,16 +1144,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->predicate = BRW_PREDICATE_NORMAL; break; - case nir_op_extract_ubyte: - case nir_op_extract_ibyte: { + case nir_op_extract_u8: + case nir_op_extract_i8: { nir_const_value *byte = nir_src_as_const_value(instr->src[1].src); bld.emit(SHADER_OPCODE_EXTRACT_BYTE, result, op[0], brw_imm_ud(byte->u[0])); break; } - case nir_op_extract_uword: - case nir_op_extract_iword: { + case nir_op_extract_u16: + case nir_op_extract_i16: { nir_const_value *word = nir_src_as_const_value(instr->src[1].src); bld.emit(SHADER_OPCODE_EXTRACT_WORD, result, op[0], brw_imm_ud(word->u[0])); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 6b9bfcf0b85..c1690ad45c3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -939,7 +939,7 @@ fs_visitor::emit_barrier() /* Clear the message payload */ pbld.MOV(payload, brw_imm_ud(0u)); - /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */ + /* Copy the barrier id from r0.2 to the message payload reg.2 */ fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)); pbld.AND(component(payload, 2), r0_2, brw_imm_ud(barrier_id_mask)); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index c6f0b0d8a2a..6bd992882b8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -254,8 +254,8 @@ try_constant_propagate(const struct brw_device_info *devinfo, static bool try_copy_propagate(const struct brw_device_info *devinfo, - vec4_instruction *inst, - int arg, struct copy_entry *entry) + vec4_instruction *inst, int arg, + struct copy_entry *entry, int attributes_per_reg) { /* Build up the value we are propagating as if it were the source of a * single MOV @@ -320,7 +320,8 @@ try_copy_propagate(const struct brw_device_info *devinfo, unsigned composed_swizzle = brw_compose_swizzle(inst->src[arg].swizzle, value.swizzle); if (inst->is_3src() && - value.file == UNIFORM && + (value.file == UNIFORM || + (value.file == ATTR && attributes_per_reg != 1)) && !brw_is_single_value_swizzle(composed_swizzle)) return false; @@ -395,6 +396,11 @@ try_copy_propagate(const struct brw_device_info *devinfo, bool vec4_visitor::opt_copy_propagation(bool do_constant_prop) { + /* If we are in dual instanced or single mode, then attributes are going + * to be interleaved, so one register contains two attribute slots. + */ + const int attributes_per_reg = + prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; bool progress = false; struct copy_entry entries[alloc.total_size]; @@ -465,7 +471,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry)) progress = true; - if (try_copy_propagate(devinfo, inst, i, &entry)) + if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg)) progress = true; } diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 904950dfa07..0df25d2557c 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -210,7 +210,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw, { const unsigned depth = max_layer - min_layer; struct intel_mipmap_tree *aux_mt = NULL; - uint32_t aux_mode = 0; + uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE; uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; unsigned tiling_mode, pitch; @@ -425,7 +425,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_mipmap_tree *mt = irb->mt; struct intel_mipmap_tree *aux_mt = NULL; - uint32_t aux_mode = 0; + uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE; unsigned width = mt->logical_width0; unsigned height = mt->logical_height0; unsigned pitch = mt->pitch; diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 8ede1f06e4e..de1aba44c1b 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -32,6 +32,7 @@ #include <stdbool.h> #include <inttypes.h> /* for PRId64 macro */ +#include "util/debug.h" #include "glheader.h" #include "enums.h" #include "hash.h" @@ -120,6 +121,10 @@ get_buffer_target(struct gl_context *ctx, GLenum target) return &ctx->CopyReadBuffer; case GL_COPY_WRITE_BUFFER: return &ctx->CopyWriteBuffer; + case GL_QUERY_BUFFER: + if (_mesa_has_ARB_query_buffer_object(ctx)) + return &ctx->QueryBuffer; + break; case GL_DRAW_INDIRECT_BUFFER: if ((ctx->API == API_OPENGL_CORE && ctx->Extensions.ARB_draw_indirect) || @@ -458,6 +463,7 @@ _mesa_delete_buffer_object(struct gl_context *ctx, { (void) ctx; + vbo_delete_minmax_cache(bufObj); _mesa_align_free(bufObj->Data); /* assign strange values here to help w/ debugging */ @@ -520,6 +526,24 @@ _mesa_reference_buffer_object_(struct gl_context *ctx, /** + * Get the value of MESA_NO_MINMAX_CACHE. + */ +static bool +get_no_minmax_cache() +{ + static bool read = false; + static bool disable = false; + + if (!read) { + disable = env_var_as_boolean("MESA_NO_MINMAX_CACHE", false); + read = true; + } + + return disable; +} + + +/** * Initialize a buffer object to default values. */ void @@ -532,6 +556,9 @@ _mesa_initialize_buffer_object(struct gl_context *ctx, obj->RefCount = 1; obj->Name = name; obj->Usage = GL_STATIC_DRAW_ARB; + + if (get_no_minmax_cache()) + obj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE; } @@ -877,6 +904,9 @@ _mesa_init_buffer_objects( struct gl_context *ctx ) _mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer, ctx->Shared->NullBufferObj); + _mesa_reference_buffer_object(ctx, &ctx->QueryBuffer, + ctx->Shared->NullBufferObj); + for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) { _mesa_reference_buffer_object(ctx, &ctx->UniformBufferBindings[i].BufferObject, @@ -925,6 +955,8 @@ _mesa_free_buffer_objects( struct gl_context *ctx ) _mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer, NULL); + _mesa_reference_buffer_object(ctx, &ctx->QueryBuffer, NULL); + for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) { _mesa_reference_buffer_object(ctx, &ctx->UniformBufferBindings[i].BufferObject, @@ -1014,6 +1046,15 @@ bind_buffer_object(struct gl_context *ctx, GLenum target, GLuint buffer) return; } + /* record usage history */ + switch (target) { + case GL_PIXEL_PACK_BUFFER: + newBufObj->UsageHistory |= USAGE_PIXEL_PACK_BUFFER; + break; + default: + break; + } + /* bind new buffer */ _mesa_reference_buffer_object(ctx, bindTarget, newBufObj); } @@ -1348,6 +1389,11 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids) _mesa_BindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); } + /* unbind query buffer binding point */ + if (ctx->QueryBuffer == bufObj) { + _mesa_BindBuffer(GL_QUERY_BUFFER, 0); + } + /* The ID is immediately freed for re-use */ _mesa_HashRemove(ctx->Shared->BufferObjects, ids[i]); /* Make sure we do not run into the classic ABA problem on bind. @@ -1519,6 +1565,7 @@ _mesa_buffer_storage(struct gl_context *ctx, struct gl_buffer_object *bufObj, bufObj->Written = GL_TRUE; bufObj->Immutable = GL_TRUE; + bufObj->MinMaxCacheDirty = true; assert(ctx->Driver.BufferData); if (!ctx->Driver.BufferData(ctx, target, size, data, GL_DYNAMIC_DRAW, @@ -1632,6 +1679,7 @@ _mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj, FLUSH_VERTICES(ctx, _NEW_BUFFER_OBJECT); bufObj->Written = GL_TRUE; + bufObj->MinMaxCacheDirty = true; #ifdef VBO_DEBUG printf("glBufferDataARB(%u, sz %ld, from %p, usage 0x%x)\n", @@ -1744,6 +1792,7 @@ _mesa_buffer_sub_data(struct gl_context *ctx, struct gl_buffer_object *bufObj, } bufObj->Written = GL_TRUE; + bufObj->MinMaxCacheDirty = true; assert(ctx->Driver.BufferSubData); ctx->Driver.BufferSubData(ctx, offset, size, data, bufObj); @@ -1859,12 +1908,16 @@ _mesa_clear_buffer_sub_data(struct gl_context *ctx, return; } + /* Bail early. Negative size has already been checked. */ + if (size == 0) + return; + + bufObj->MinMaxCacheDirty = true; + if (data == NULL) { /* clear to zeros, per the spec */ - if (size > 0) { - ctx->Driver.ClearBufferSubData(ctx, offset, size, - NULL, clearValueSize, bufObj); - } + ctx->Driver.ClearBufferSubData(ctx, offset, size, + NULL, clearValueSize, bufObj); return; } @@ -1873,10 +1926,8 @@ _mesa_clear_buffer_sub_data(struct gl_context *ctx, return; } - if (size > 0) { - ctx->Driver.ClearBufferSubData(ctx, offset, size, - clearValue, clearValueSize, bufObj); - } + ctx->Driver.ClearBufferSubData(ctx, offset, size, + clearValue, clearValueSize, bufObj); } void GLAPIENTRY @@ -2276,6 +2327,8 @@ _mesa_copy_buffer_sub_data(struct gl_context *ctx, } } + dst->MinMaxCacheDirty = true; + ctx->Driver.CopyBufferSubData(ctx, src, dst, readOffset, writeOffset, size); } @@ -2480,8 +2533,10 @@ _mesa_map_buffer_range(struct gl_context *ctx, assert(bufObj->Mappings[MAP_USER].AccessFlags == access); } - if (access & GL_MAP_WRITE_BIT) + if (access & GL_MAP_WRITE_BIT) { bufObj->Written = GL_TRUE; + bufObj->MinMaxCacheDirty = true; + } #ifdef VBO_DEBUG if (strstr(func, "Range") == NULL) { /* If not MapRange */ diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index d4378e51159..19ef3042548 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -48,6 +48,7 @@ struct gl_shader; struct gl_shader_program; struct gl_texture_image; struct gl_texture_object; +struct gl_memory_info; /* GL_ARB_vertex_buffer_object */ /* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return @@ -726,6 +727,15 @@ struct dd_function_table { void (*EndQuery)(struct gl_context *ctx, struct gl_query_object *q); void (*CheckQuery)(struct gl_context *ctx, struct gl_query_object *q); void (*WaitQuery)(struct gl_context *ctx, struct gl_query_object *q); + /* + * \pname the value requested to be written (GL_QUERY_RESULT, etc) + * \ptype the type of the value requested to be written: + * GL_UNSIGNED_INT, GL_UNSIGNED_INT64_ARB, + * GL_INT, GL_INT64_ARB + */ + void (*StoreQueryResult)(struct gl_context *ctx, struct gl_query_object *q, + struct gl_buffer_object *buf, intptr_t offset, + GLenum pname, GLenum ptype); /*@}*/ /** @@ -939,6 +949,13 @@ struct dd_function_table { void (*DispatchCompute)(struct gl_context *ctx, const GLuint *num_groups); void (*DispatchComputeIndirect)(struct gl_context *ctx, GLintptr indirect); /*@}*/ + + /** + * Query information about memory. Device memory is e.g. VRAM. Staging + * memory is e.g. GART. All sizes are in kilobytes. + */ + void (*QueryMemoryInfo)(struct gl_context *ctx, + struct gl_memory_info *info); }; diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 11f4482f8d2..ded6f2c06dc 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -88,6 +88,7 @@ EXT(ARB_point_parameters , EXT_point_parameters EXT(ARB_point_sprite , ARB_point_sprite , GLL, GLC, x , x , 2003) EXT(ARB_program_interface_query , dummy_true , GLL, GLC, x , x , 2012) EXT(ARB_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009) +EXT(ARB_query_buffer_object , ARB_query_buffer_object , GLL, GLC, x , x , 2013) EXT(ARB_robustness , dummy_true , GLL, GLC, x , x , 2010) EXT(ARB_sample_shading , ARB_sample_shading , GLL, GLC, x , x , 2009) EXT(ARB_sampler_objects , dummy_true , GLL, GLC, x , x , 2009) @@ -165,6 +166,7 @@ EXT(ARB_window_pos , dummy_true EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003) EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002) EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001) +EXT(ATI_meminfo , ATI_meminfo , GLL, GLC, x , x , 2009) EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006) EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004) EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002) @@ -291,6 +293,7 @@ EXT(NV_texture_barrier , NV_texture_barrier EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999) EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000) EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010) +EXT(NVX_gpu_memory_info , NVX_gpu_memory_info , GLL, GLC, x , x , 2013) EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */ EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010) @@ -311,6 +314,7 @@ EXT(OES_element_index_uint , dummy_true EXT(OES_fbo_render_mipmap , dummy_true , x , x , ES1, ES2, 2005) EXT(OES_fixed_point , dummy_true , x , x , ES1, x , 2002) EXT(OES_framebuffer_object , dummy_true , x , x , ES1, x , 2005) +EXT(OES_geometry_point_size , OES_geometry_shader , x , x , x , 31, 2015) EXT(OES_geometry_shader , OES_geometry_shader , x , x , x , 31, 2015) EXT(OES_get_program_binary , dummy_true , x , x , x , ES2, 2008) EXT(OES_mapbuffer , dummy_true , x , x , ES1, ES2, 2005) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 3be216da234..2d4acb35bd6 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1414,6 +1414,9 @@ framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb, _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname); } + + invalidate_framebuffer(fb); + ctx->NewState |= _NEW_BUFFERS; } void GLAPIENTRY diff --git a/src/mesa/main/format_parser.py b/src/mesa/main/format_parser.py index 799b14f0b1c..a29f20754a8 100755 --- a/src/mesa/main/format_parser.py +++ b/src/mesa/main/format_parser.py @@ -532,7 +532,7 @@ def _parse_channels(fields, layout, colorspace, swizzle): return channels def parse(filename): - """Parse a format descrition in CSV format. + """Parse a format description in CSV format. This function parses the given CSV file and returns an iterable of channels.""" diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index bfc8a0836e7..fa434d447ae 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -26,7 +26,7 @@ #ifndef FRAMEBUFFER_H #define FRAMEBUFFER_H -#include "glheader.h" +#include "mtypes.h" struct gl_config; struct gl_context; @@ -97,7 +97,8 @@ static inline GLuint _mesa_geometric_samples(const struct gl_framebuffer *buffer) { return buffer->_HasAttachments ? - buffer->Visual.samples : buffer->DefaultGeometry.NumSamples; + buffer->Visual.samples : + buffer->DefaultGeometry._NumSamples; } static inline GLuint diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 95cb18c8ee8..8453a922549 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -147,11 +147,14 @@ enum value_extra { EXTRA_VALID_CLIP_DISTANCE, EXTRA_FLUSH_CURRENT, EXTRA_GLSL_130, - EXTRA_EXT_UBO_GS4, - EXTRA_EXT_ATOMICS_GS4, - EXTRA_EXT_SHADER_IMAGE_GS4, + EXTRA_EXT_UBO_GS, + EXTRA_EXT_ATOMICS_GS, + EXTRA_EXT_SHADER_IMAGE_GS, EXTRA_EXT_ATOMICS_TESS, EXTRA_EXT_SHADER_IMAGE_TESS, + EXTRA_EXT_SSBO_GS, + EXTRA_EXT_FB_NO_ATTACH_GS, + EXTRA_EXT_ES_GS, }; #define NO_EXTRA NULL @@ -308,7 +311,7 @@ static const int extra_ARB_transform_feedback2_api_es3[] = { }; static const int extra_ARB_uniform_buffer_object_and_geometry_shader[] = { - EXTRA_EXT_UBO_GS4, + EXTRA_EXT_UBO_GS, EXTRA_END }; @@ -343,12 +346,12 @@ static const int extra_EXT_texture_array_es3[] = { }; static const int extra_ARB_shader_atomic_counters_and_geometry_shader[] = { - EXTRA_EXT_ATOMICS_GS4, + EXTRA_EXT_ATOMICS_GS, EXTRA_END }; static const int extra_ARB_shader_image_load_store_and_geometry_shader[] = { - EXTRA_EXT_SHADER_IMAGE_GS4, + EXTRA_EXT_SHADER_IMAGE_GS, EXTRA_END }; @@ -375,6 +378,28 @@ static const int extra_ARB_shader_storage_buffer_object_es31[] = { EXTRA_END }; +static const int extra_ARB_shader_storage_buffer_object_and_geometry_shader[] = { + EXTRA_EXT_SSBO_GS, + EXTRA_END +}; + +static const int extra_ARB_framebuffer_no_attachments_and_geometry_shader[] = { + EXTRA_EXT_FB_NO_ATTACH_GS, + EXTRA_END +}; + +static const int extra_ARB_viewport_array_or_oes_geometry_shader[] = { + EXT(ARB_viewport_array), + EXTRA_EXT_ES_GS, + EXTRA_END +}; + +static const int extra_ARB_gpu_shader5_or_oes_geometry_shader[] = { + EXT(ARB_gpu_shader5), + EXTRA_EXT_ES_GS, + EXTRA_END +}; + EXTRA_EXT(ARB_texture_cube_map); EXTRA_EXT(EXT_texture_array); EXTRA_EXT(NV_fog_distance); @@ -414,6 +439,7 @@ EXTRA_EXT(ARB_shader_image_load_store); EXTRA_EXT(ARB_viewport_array); EXTRA_EXT(ARB_compute_shader); EXTRA_EXT(ARB_gpu_shader5); +EXTRA_EXT(ARB_query_buffer_object); EXTRA_EXT2(ARB_transform_feedback3, ARB_gpu_shader5); EXTRA_EXT(INTEL_performance_query); EXTRA_EXT(ARB_explicit_uniform_location); @@ -424,6 +450,8 @@ EXTRA_EXT(ARB_tessellation_shader); EXTRA_EXT(ARB_shader_subroutine); EXTRA_EXT(ARB_shader_storage_buffer_object); EXTRA_EXT(ARB_indirect_parameters); +EXTRA_EXT(ATI_meminfo); +EXTRA_EXT(NVX_gpu_memory_info); static const int extra_ARB_color_buffer_float_or_glcore[] = { @@ -455,6 +483,12 @@ static const int extra_gl32_es3[] = { EXTRA_END, }; +static const int extra_version_32_OES_geometry_shader[] = { + EXTRA_VERSION_32, + EXTRA_EXT_ES_GS, + EXTRA_END +}; + static const int extra_gl40_ARB_sample_shading[] = { EXTRA_VERSION_40, EXT(ARB_sample_shading), @@ -1006,6 +1040,10 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu case GL_SHADER_STORAGE_BUFFER_BINDING: v->value_int = ctx->ShaderStorageBuffer->Name; break; + /* GL_ARB_query_buffer_object */ + case GL_QUERY_BUFFER_BINDING: + v->value_int = ctx->QueryBuffer->Name; + break; /* GL_ARB_timer_query */ case GL_TIMESTAMP: if (ctx->Driver.GetTimestamp) { @@ -1049,6 +1087,60 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu case GL_DISPATCH_INDIRECT_BUFFER_BINDING: v->value_int = ctx->DispatchIndirectBuffer->Name; break; + /* GL_ARB_multisample */ + case GL_SAMPLES: + v->value_int = _mesa_geometric_samples(ctx->DrawBuffer); + break; + case GL_SAMPLE_BUFFERS: + v->value_int = _mesa_geometric_samples(ctx->DrawBuffer) > 0; + break; + /* GL_ATI_meminfo & GL_NVX_gpu_memory_info */ + case GL_VBO_FREE_MEMORY_ATI: + case GL_TEXTURE_FREE_MEMORY_ATI: + case GL_RENDERBUFFER_FREE_MEMORY_ATI: + case GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX: + case GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX: + case GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX: + case GL_GPU_MEMORY_INFO_EVICTION_COUNT_NVX: + case GL_GPU_MEMORY_INFO_EVICTED_MEMORY_NVX: + { + struct gl_memory_info info; + + ctx->Driver.QueryMemoryInfo(ctx, &info); + + if (d->pname == GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX) + v->value_int = info.total_device_memory; + else if (d->pname == GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX) + v->value_int = info.total_device_memory + + info.total_staging_memory; + else if (d->pname == GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX) + v->value_int = info.avail_device_memory; + else if (d->pname == GL_GPU_MEMORY_INFO_EVICTION_COUNT_NVX) + v->value_int = info.nr_device_memory_evictions; + else if (d->pname == GL_GPU_MEMORY_INFO_EVICTED_MEMORY_NVX) + v->value_int = info.device_memory_evicted; + else { + /* ATI free memory enums. + * + * Since the GPU memory is (usually) page-table based, every two + * consecutive elements are equal. From the GL_ATI_meminfo + * specification: + * + * "param[0] - total memory free in the pool + * param[1] - largest available free block in the pool + * param[2] - total auxiliary memory free + * param[3] - largest auxiliary free block" + * + * All three (VBO, TEXTURE, RENDERBUFFER) queries return + * the same numbers here. + */ + v->value_int_4[0] = info.avail_device_memory; + v->value_int_4[1] = info.avail_device_memory; + v->value_int_4[2] = info.avail_staging_memory; + v->value_int_4[3] = info.avail_staging_memory; + } + } + break; } } @@ -1154,20 +1246,23 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d if (ctx->Const.GLSLVersion >= 130) api_found = GL_TRUE; break; - case EXTRA_EXT_UBO_GS4: + case EXTRA_EXT_UBO_GS: api_check = GL_TRUE; - api_found = (ctx->Extensions.ARB_uniform_buffer_object && - _mesa_has_geometry_shaders(ctx)); + if (ctx->Extensions.ARB_uniform_buffer_object && + _mesa_has_geometry_shaders(ctx)) + api_found = GL_TRUE; break; - case EXTRA_EXT_ATOMICS_GS4: + case EXTRA_EXT_ATOMICS_GS: api_check = GL_TRUE; - api_found = (ctx->Extensions.ARB_shader_atomic_counters && - _mesa_has_geometry_shaders(ctx)); + if (ctx->Extensions.ARB_shader_atomic_counters && + _mesa_has_geometry_shaders(ctx)) + api_found = GL_TRUE; break; - case EXTRA_EXT_SHADER_IMAGE_GS4: + case EXTRA_EXT_SHADER_IMAGE_GS: api_check = GL_TRUE; - api_found = (ctx->Extensions.ARB_shader_image_load_store && - _mesa_has_geometry_shaders(ctx)); + if (ctx->Extensions.ARB_shader_image_load_store && + _mesa_has_geometry_shaders(ctx)) + api_found = GL_TRUE; break; case EXTRA_EXT_ATOMICS_TESS: api_check = GL_TRUE; @@ -1179,6 +1274,24 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d api_found = ctx->Extensions.ARB_shader_image_load_store && _mesa_has_tessellation(ctx); break; + case EXTRA_EXT_SSBO_GS: + api_check = GL_TRUE; + if (ctx->Extensions.ARB_shader_storage_buffer_object && + _mesa_has_geometry_shaders(ctx)) + api_found = GL_TRUE; + break; + case EXTRA_EXT_FB_NO_ATTACH_GS: + api_check = GL_TRUE; + if (ctx->Extensions.ARB_framebuffer_no_attachments && + (_mesa_is_desktop_gl(ctx) || + _mesa_has_OES_geometry_shader(ctx))) + api_found = GL_TRUE; + break; + case EXTRA_EXT_ES_GS: + api_check = GL_TRUE; + if (_mesa_has_OES_geometry_shader(ctx)) + api_found = GL_TRUE; + break; case EXTRA_END: break; default: /* *e is a offset into the extension struct */ diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index af7a8f4a906..164095c103c 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -80,8 +80,8 @@ descriptor=[ [ "SAMPLE_COVERAGE_ARB", "CONTEXT_BOOL(Multisample.SampleCoverage), NO_EXTRA" ], [ "SAMPLE_COVERAGE_VALUE_ARB", "CONTEXT_FLOAT(Multisample.SampleCoverageValue), NO_EXTRA" ], [ "SAMPLE_COVERAGE_INVERT_ARB", "CONTEXT_BOOL(Multisample.SampleCoverageInvert), NO_EXTRA" ], - [ "SAMPLE_BUFFERS_ARB", "BUFFER_INT(Visual.sampleBuffers), extra_new_buffers" ], - [ "SAMPLES_ARB", "BUFFER_INT(Visual.samples), extra_new_buffers" ], + [ "SAMPLE_BUFFERS_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_new_buffers" ], + [ "SAMPLES_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_new_buffers" ], # GL_ARB_sample_shading [ "SAMPLE_SHADING_ARB", "CONTEXT_BOOL(Multisample.SampleShading), extra_gl40_ARB_sample_shading" ], @@ -470,6 +470,9 @@ descriptor=[ ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"], ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"], +# GL_ARB_framebuffer_no_attachments / geometry shader + [ "MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments_and_geometry_shader" ], + # GL_ARB_explicit_uniform_location / GLES 3.1 [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ], @@ -499,6 +502,34 @@ descriptor=[ { "apis": ["GL_CORE", "GLES31"], "params": [ # GL_ARB_draw_indirect / GLES 3.1 [ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect" ], + +# GL 3.2 / GL OES_geometry_shader + [ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32_OES_geometry_shader" ], + [ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32_OES_geometry_shader" ], + [ "MAX_GEOMETRY_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits), extra_version_32_OES_geometry_shader" ], + [ "MAX_GEOMETRY_OUTPUT_VERTICES", "CONTEXT_INT(Const.MaxGeometryOutputVertices), extra_version_32_OES_geometry_shader" ], + [ "MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxGeometryTotalOutputComponents), extra_version_32_OES_geometry_shader" ], + [ "MAX_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents), extra_version_32_OES_geometry_shader" ], + +# GL_ARB_shader_image_load_store / geometry shader + [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader" ], + +# GL_ARB_shader_atomic_counters / geometry shader + [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader " ], + [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ], + +# GL_ARB_shader_storage_buffer_object / geometry shader + [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_and_geometry_shader" ], + +# GL_ARB_uniform_buffer_object / geometry shader + [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ], + [ "MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents), extra_ARB_uniform_buffer_object_and_geometry_shader" ], + +# GL_ARB_viewport_array / GL_OES_geometry_shader + [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array_or_oes_geometry_shader" ], + +# GL_ARB_gpu_shader5 / GL_OES_geometry_shader + [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5_or_oes_geometry_shader" ], ]}, # Remaining enums are only in OpenGL @@ -790,21 +821,10 @@ descriptor=[ # GL 3.2 [ "CONTEXT_PROFILE_MASK", "CONTEXT_INT(Const.ProfileMask), extra_version_32" ], - [ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32" ], - [ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32" ], - [ "MAX_GEOMETRY_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits), extra_version_32" ], - [ "MAX_GEOMETRY_OUTPUT_VERTICES", "CONTEXT_INT(Const.MaxGeometryOutputVertices), extra_version_32" ], - [ "MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxGeometryTotalOutputComponents), extra_version_32" ], - [ "MAX_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents), extra_version_32" ], # GL_ARB_robustness [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ], - -# GL_ARB_uniform_buffer_object - [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ], - [ "MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents), extra_ARB_uniform_buffer_object_and_geometry_shader" ], - # GL_ARB_timer_query [ "TIMESTAMP", "LOC_CUSTOM, TYPE_INT64, 0, extra_ARB_timer_query" ], @@ -817,25 +837,31 @@ descriptor=[ # GL_ARB_texture_gather [ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB", "CONTEXT_INT(Const.MaxProgramTextureGatherComponents), extra_ARB_texture_gather"], -# GL_ARB_shader_atomic_counters - [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ], - [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ], - # GL_ARB_shader_image_load_store [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_image_load_store" ], [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ], - [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"], - -# GL_ARB_framebuffer_no_attachments - ["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"], # GL_EXT_polygon_offset_clamp [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ], # GL_ARB_shader_storage_buffer_object - [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ], [ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ], [ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ], + +# GL_ARB_query_buffer_object + [ "QUERY_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_query_buffer_object" ], + +# GL_ATI_meminfo + [ "VBO_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ], + [ "TEXTURE_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ], + [ "RENDERBUFFER_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ], + +# GL_NVX_gpu_memory_info + [ "GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ], + [ "GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ], + [ "GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ], + [ "GPU_MEMORY_INFO_EVICTION_COUNT_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ], + [ "GPU_MEMORY_INFO_EVICTED_MEMORY_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ], ]}, # Enums restricted to OpenGL Core profile @@ -847,7 +873,6 @@ descriptor=[ [ "MAX_VIEWPORTS", "CONTEXT_INT(Const.MaxViewports), extra_ARB_viewport_array" ], [ "VIEWPORT_SUBPIXEL_BITS", "CONTEXT_INT(Const.ViewportSubpixelBits), extra_ARB_viewport_array" ], [ "VIEWPORT_BOUNDS_RANGE", "CONTEXT_FLOAT2(Const.ViewportBounds), extra_ARB_viewport_array" ], - [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Const.LayerAndVPIndexProvokingVertex), extra_ARB_viewport_array" ], [ "VIEWPORT_INDEX_PROVOKING_VERTEX", "CONTEXT_ENUM(Const.LayerAndVPIndexProvokingVertex), extra_ARB_viewport_array" ], # GL_ARB_gpu_shader5 diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c index 315b5d64004..ab1b9e907ae 100644 --- a/src/mesa/main/hash.c +++ b/src/mesa/main/hash.c @@ -496,14 +496,12 @@ _mesa_HashFindFreeKeyBlock(struct _mesa_HashTable *table, GLuint numKeys) GLuint _mesa_HashNumEntries(const struct _mesa_HashTable *table) { - struct hash_entry *entry; GLuint count = 0; if (table->deleted_key_data) count++; - hash_table_foreach(table->ht, entry) - count++; + count += _mesa_hash_table_num_entries(table->ht); return count; } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 56dce2d1b81..a66b56c62bf 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1253,6 +1253,9 @@ typedef enum { USAGE_TEXTURE_BUFFER = 0x2, USAGE_ATOMIC_COUNTER_BUFFER = 0x4, USAGE_SHADER_STORAGE_BUFFER = 0x8, + USAGE_TRANSFORM_FEEDBACK_BUFFER = 0x10, + USAGE_PIXEL_PACK_BUFFER = 0x20, + USAGE_DISABLE_MINMAX_CACHE = 0x40, } gl_buffer_usage; @@ -1280,6 +1283,12 @@ struct gl_buffer_object GLuint NumMapBufferWriteCalls; struct gl_buffer_mapping Mappings[MAP_COUNT]; + + /** Memoization of min/max index computations for static index buffers */ + struct hash_table *MinMaxCache; + unsigned MinMaxCacheHitIndices; + unsigned MinMaxCacheMissIndices; + bool MinMaxCacheDirty; }; @@ -1861,6 +1870,8 @@ typedef enum PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */ PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */ PROGRAM_UNDEFINED, /**< Invalid/TBD value */ + PROGRAM_IMMEDIATE, /**< Immediate value, used by TGSI */ + PROGRAM_BUFFER, /**< for shader buffers, compile-time only */ PROGRAM_FILE_MAX } gl_register_file; @@ -3217,6 +3228,10 @@ struct gl_framebuffer struct { GLuint Width, Height, Layers, NumSamples; GLboolean FixedSampleLocations; + /* Derived from NumSamples by the driver so that it can choose a valid + * value for the hardware. + */ + GLuint _NumSamples; } DefaultGeometry; /** \name Drawing bounds (Intersection of buffer size and scissor box) @@ -3785,6 +3800,7 @@ struct gl_extensions GLboolean ARB_occlusion_query2; GLboolean ARB_pipeline_statistics_query; GLboolean ARB_point_sprite; + GLboolean ARB_query_buffer_object; GLboolean ARB_sample_shading; GLboolean ARB_seamless_cube_map; GLboolean ARB_shader_atomic_counters; @@ -3880,6 +3896,7 @@ struct gl_extensions GLboolean AMD_vertex_shader_layer; GLboolean AMD_vertex_shader_viewport_index; GLboolean APPLE_object_purgeable; + GLboolean ATI_meminfo; GLboolean ATI_texture_compression_3dc; GLboolean ATI_texture_mirror_once; GLboolean ATI_texture_env_combine3; @@ -3900,6 +3917,7 @@ struct gl_extensions GLboolean NV_texture_env_combine4; GLboolean NV_texture_rectangle; GLboolean NV_vdpau_interop; + GLboolean NVX_gpu_memory_info; GLboolean TDFX_texture_compression_FXT1; GLboolean OES_EGL_image; GLboolean OES_draw_texture; @@ -4434,6 +4452,8 @@ struct gl_context struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */ struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */ + struct gl_buffer_object *QueryBuffer; /**< GL_ARB_query_buffer_object */ + /** * Current GL_ARB_uniform_buffer_object binding referenced by * GL_UNIFORM_BUFFER target for glBufferData, glMapBuffer, etc. @@ -4576,6 +4596,18 @@ struct gl_context GLboolean ShareGroupReset; }; +/** + * Information about memory usage. All sizes are in kilobytes. + */ +struct gl_memory_info +{ + unsigned total_device_memory; /**< size of device memory, e.g. VRAM */ + unsigned avail_device_memory; /**< free device memory at the moment */ + unsigned total_staging_memory; /**< size of staging memory, e.g. GART */ + unsigned avail_staging_memory; /**< free staging memory at the moment */ + unsigned device_memory_evicted; /**< size of memory evicted (monotonic counter) */ + unsigned nr_device_memory_evictions; /**< # of evictions (monotonic counter) */ +}; #ifdef DEBUG extern int MESA_VERBOSE; diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c index 41f370ce485..b622d6a2979 100644 --- a/src/mesa/main/objectlabel.c +++ b/src/mesa/main/objectlabel.c @@ -288,16 +288,18 @@ void GLAPIENTRY _mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label) { GET_CURRENT_CONTEXT(ctx); - struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr; + struct gl_sync_object *syncObj; const char *callerstr; char **labelPtr; + syncObj = _mesa_get_and_ref_sync(ctx, (void*)ptr, true); + if (_mesa_is_desktop_gl(ctx)) callerstr = "glObjectPtrLabel"; else callerstr = "glObjectPtrLabelKHR"; - if (!_mesa_validate_sync(ctx, syncObj)) { + if (!syncObj) { _mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)", callerstr); return; @@ -306,6 +308,7 @@ _mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label) labelPtr = &syncObj->Label; set_label(ctx, labelPtr, label, length, callerstr); + _mesa_unref_sync_object(ctx, syncObj, 1); } void GLAPIENTRY @@ -313,7 +316,7 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label) { GET_CURRENT_CONTEXT(ctx); - struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr; + struct gl_sync_object *syncObj; const char *callerstr; char **labelPtr; @@ -328,7 +331,8 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length, return; } - if (!_mesa_validate_sync(ctx, syncObj)) { + syncObj = _mesa_get_and_ref_sync(ctx, (void*)ptr, true); + if (!syncObj) { _mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)", callerstr); return; @@ -337,4 +341,5 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length, labelPtr = &syncObj->Label; copy_label(*labelPtr, label, length, bufSize); + _mesa_unref_sync_object(ctx, syncObj, 1); } diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c index 98366857f62..b86692a5f7e 100644 --- a/src/mesa/main/queryobj.c +++ b/src/mesa/main/queryobj.c @@ -23,6 +23,7 @@ */ +#include "bufferobj.h" #include "glheader.h" #include "context.h" #include "enums.h" @@ -732,14 +733,16 @@ _mesa_GetQueryiv(GLenum target, GLenum pname, GLint *params) _mesa_GetQueryIndexediv(target, 0, pname, params); } -void GLAPIENTRY -_mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params) +static void +get_query_object(struct gl_context *ctx, const char *func, + GLuint id, GLenum pname, GLenum ptype, + struct gl_buffer_object *buf, intptr_t offset) { struct gl_query_object *q = NULL; - GET_CURRENT_CONTEXT(ctx); + uint64_t value; if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glGetQueryObjectiv(%u, %s)\n", id, + _mesa_debug(ctx, "%s(%u, %s)\n", func, id, _mesa_enum_to_string(pname)); if (id) @@ -747,96 +750,114 @@ _mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params) if (!q || q->Active || !q->EverBound) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetQueryObjectivARB(id=%d is invalid or active)", id); + "%s(id=%d is invalid or active)", func, id); return; } - switch (pname) { - case GL_QUERY_RESULT_ARB: - if (!q->Ready) - ctx->Driver.WaitQuery(ctx, q); - /* if result is too large for returned type, clamp to max value */ - if (q->Target == GL_ANY_SAMPLES_PASSED - || q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE) { - if (q->Result) - *params = GL_TRUE; - else - *params = GL_FALSE; - } else { - if (q->Result > 0x7fffffff) { - *params = 0x7fffffff; - } - else { - *params = (GLint)q->Result; - } - } - break; - case GL_QUERY_RESULT_AVAILABLE_ARB: - if (!q->Ready) - ctx->Driver.CheckQuery( ctx, q ); - *params = q->Ready; - break; + if (buf && buf != ctx->Shared->NullBufferObj) { + bool is_64bit = ptype == GL_INT64_ARB || + ptype == GL_UNSIGNED_INT64_ARB; + if (!ctx->Extensions.ARB_query_buffer_object) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(not supported)", func); + return; + } + if (buf->Size < offset + 4 * (is_64bit ? 2 : 1)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(out of bounds)", func); + return; + } + + switch (pname) { + case GL_QUERY_RESULT: + case GL_QUERY_RESULT_NO_WAIT: + case GL_QUERY_RESULT_AVAILABLE: case GL_QUERY_TARGET: - *params = q->Target; - break; - default: - _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectivARB(pname)"); + ctx->Driver.StoreQueryResult(ctx, q, buf, offset, pname, ptype); return; + } + + /* fall through to get error below */ } -} + switch (pname) { + case GL_QUERY_RESULT: + if (!q->Ready) + ctx->Driver.WaitQuery(ctx, q); + value = q->Result; + break; + case GL_QUERY_RESULT_NO_WAIT: + if (!ctx->Extensions.ARB_query_buffer_object) + goto invalid_enum; + ctx->Driver.CheckQuery(ctx, q); + if (!q->Ready) + return; + value = q->Result; + break; + case GL_QUERY_RESULT_AVAILABLE: + if (!q->Ready) + ctx->Driver.CheckQuery(ctx, q); + value = q->Ready; + break; + case GL_QUERY_TARGET: + value = q->Target; + break; + default: +invalid_enum: + _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", + func, _mesa_enum_to_string(pname)); + return; + } + + /* TODO: Have the driver be required to handle this fixup. */ + if (q->Target == GL_ANY_SAMPLES_PASSED || + q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE) + value = !!value; + + switch (ptype) { + case GL_INT: { + GLint *param = (GLint *)offset; + if (value > 0x7fffffff) + *param = 0x7fffffff; + else + *param = value; + break; + } + case GL_UNSIGNED_INT: { + GLuint *param = (GLuint *)offset; + if (value > 0xffffffff) + *param = 0xffffffff; + else + *param = value; + break; + } + case GL_INT64_ARB: + case GL_UNSIGNED_INT64_ARB: { + GLuint64EXT *param = (GLuint64EXT *)offset; + *param = value; + break; + } + default: + unreachable("unexpected ptype"); + } +} void GLAPIENTRY -_mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params) +_mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params) { - struct gl_query_object *q = NULL; GET_CURRENT_CONTEXT(ctx); - if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glGetQueryObjectuiv(%u, %s)\n", id, - _mesa_enum_to_string(pname)); + get_query_object(ctx, "glGetQueryObjectiv", + id, pname, GL_INT, ctx->QueryBuffer, (intptr_t)params); +} - if (id) - q = _mesa_lookup_query_object(ctx, id); - if (!q || q->Active || !q->EverBound) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetQueryObjectuivARB(id=%d is invalid or active)", id); - return; - } +void GLAPIENTRY +_mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params) +{ + GET_CURRENT_CONTEXT(ctx); - switch (pname) { - case GL_QUERY_RESULT_ARB: - if (!q->Ready) - ctx->Driver.WaitQuery(ctx, q); - /* if result is too large for returned type, clamp to max value */ - if (q->Target == GL_ANY_SAMPLES_PASSED - || q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE) { - if (q->Result) - *params = GL_TRUE; - else - *params = GL_FALSE; - } else { - if (q->Result > 0xffffffff) { - *params = 0xffffffff; - } - else { - *params = (GLuint)q->Result; - } - } - break; - case GL_QUERY_RESULT_AVAILABLE_ARB: - if (!q->Ready) - ctx->Driver.CheckQuery( ctx, q ); - *params = q->Ready; - break; - case GL_QUERY_TARGET: - *params = q->Target; - break; - default: - _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectuivARB(pname)"); - return; - } + get_query_object(ctx, "glGetQueryObjectuiv", + id, pname, GL_UNSIGNED_INT, + ctx->QueryBuffer, (intptr_t)params); } @@ -846,40 +867,11 @@ _mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params) void GLAPIENTRY _mesa_GetQueryObjecti64v(GLuint id, GLenum pname, GLint64EXT *params) { - struct gl_query_object *q = NULL; GET_CURRENT_CONTEXT(ctx); - if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glGetQueryObjecti64v(%u, %s)\n", id, - _mesa_enum_to_string(pname)); - - if (id) - q = _mesa_lookup_query_object(ctx, id); - - if (!q || q->Active || !q->EverBound) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetQueryObjectui64vARB(id=%d is invalid or active)", id); - return; - } - - switch (pname) { - case GL_QUERY_RESULT_ARB: - if (!q->Ready) - ctx->Driver.WaitQuery(ctx, q); - *params = q->Result; - break; - case GL_QUERY_RESULT_AVAILABLE_ARB: - if (!q->Ready) - ctx->Driver.CheckQuery( ctx, q ); - *params = q->Ready; - break; - case GL_QUERY_TARGET: - *params = q->Target; - break; - default: - _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjecti64vARB(pname)"); - return; - } + get_query_object(ctx, "glGetQueryObjecti64v", + id, pname, GL_INT64_ARB, + ctx->QueryBuffer, (intptr_t)params); } @@ -889,40 +881,11 @@ _mesa_GetQueryObjecti64v(GLuint id, GLenum pname, GLint64EXT *params) void GLAPIENTRY _mesa_GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64EXT *params) { - struct gl_query_object *q = NULL; GET_CURRENT_CONTEXT(ctx); - if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glGetQueryObjectui64v(%u, %s)\n", id, - _mesa_enum_to_string(pname)); - - if (id) - q = _mesa_lookup_query_object(ctx, id); - - if (!q || q->Active || !q->EverBound) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetQueryObjectuui64vARB(id=%d is invalid or active)", id); - return; - } - - switch (pname) { - case GL_QUERY_RESULT_ARB: - if (!q->Ready) - ctx->Driver.WaitQuery(ctx, q); - *params = q->Result; - break; - case GL_QUERY_RESULT_AVAILABLE_ARB: - if (!q->Ready) - ctx->Driver.CheckQuery( ctx, q ); - *params = q->Ready; - break; - case GL_QUERY_TARGET: - *params = q->Target; - break; - default: - _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectui64vARB(pname)"); - return; - } + get_query_object(ctx, "glGetQueryObjectui64v", + id, pname, GL_UNSIGNED_INT64_ARB, + ctx->QueryBuffer, (intptr_t)params); } /** @@ -932,8 +895,15 @@ void GLAPIENTRY _mesa_GetQueryBufferObjectiv(GLuint id, GLuint buffer, GLenum pname, GLintptr offset) { + struct gl_buffer_object *buf; GET_CURRENT_CONTEXT(ctx); - _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectiv"); + + buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectiv"); + if (!buf) + return; + + get_query_object(ctx, "glGetQueryBufferObjectiv", + id, pname, GL_INT, buf, offset); } @@ -941,8 +911,15 @@ void GLAPIENTRY _mesa_GetQueryBufferObjectuiv(GLuint id, GLuint buffer, GLenum pname, GLintptr offset) { + struct gl_buffer_object *buf; GET_CURRENT_CONTEXT(ctx); - _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectuiv"); + + buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectuiv"); + if (!buf) + return; + + get_query_object(ctx, "glGetQueryBufferObjectuiv", + id, pname, GL_UNSIGNED_INT, buf, offset); } @@ -950,8 +927,15 @@ void GLAPIENTRY _mesa_GetQueryBufferObjecti64v(GLuint id, GLuint buffer, GLenum pname, GLintptr offset) { + struct gl_buffer_object *buf; GET_CURRENT_CONTEXT(ctx); - _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjecti64v"); + + buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjecti64v"); + if (!buf) + return; + + get_query_object(ctx, "glGetQueryBufferObjecti64v", + id, pname, GL_INT64_ARB, buf, offset); } @@ -959,8 +943,15 @@ void GLAPIENTRY _mesa_GetQueryBufferObjectui64v(GLuint id, GLuint buffer, GLenum pname, GLintptr offset) { + struct gl_buffer_object *buf; GET_CURRENT_CONTEXT(ctx); - _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectui64v"); + + buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectui64v"); + if (!buf) + return; + + get_query_object(ctx, "glGetQueryBufferObjectui64v", + id, pname, GL_UNSIGNED_INT64_ARB, buf, offset); } diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index c37b31d1753..b9f7bb65fb6 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -338,7 +338,7 @@ free_shared_state(struct gl_context *ctx, struct gl_shared_state *shared) struct set_entry *entry; set_foreach(shared->SyncObjects, entry) { - _mesa_unref_sync_object(ctx, (struct gl_sync_object *) entry->key); + _mesa_unref_sync_object(ctx, (struct gl_sync_object *) entry->key, 1); } } _mesa_set_destroy(shared->SyncObjects, NULL); diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 4043c4f2057..57f13411fdf 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -352,7 +352,7 @@ update_multisample(struct gl_context *ctx) ctx->Multisample._Enabled = GL_FALSE; if (ctx->Multisample.Enabled && ctx->DrawBuffer && - ctx->DrawBuffer->Visual.sampleBuffers) + _mesa_geometric_samples(ctx->DrawBuffer) > 0) ctx->Multisample._Enabled = GL_TRUE; } diff --git a/src/mesa/main/syncobj.c b/src/mesa/main/syncobj.c index c1b2d3bed54..be758dd1241 100644 --- a/src/mesa/main/syncobj.c +++ b/src/mesa/main/syncobj.c @@ -167,34 +167,42 @@ _mesa_free_sync_data(struct gl_context *ctx) * - not in sync objects hash table * - type is GL_SYNC_FENCE * - not marked as deleted + * + * Returns the internal gl_sync_object pointer if the sync object is valid + * or NULL if it isn't. + * + * If "incRefCount" is true, the reference count is incremented, which is + * normally what you want; otherwise, a glDeleteSync from another thread + * could delete the sync object while you are still working on it. */ -bool -_mesa_validate_sync(struct gl_context *ctx, - const struct gl_sync_object *syncObj) +struct gl_sync_object * +_mesa_get_and_ref_sync(struct gl_context *ctx, GLsync sync, bool incRefCount) { - return (syncObj != NULL) + struct gl_sync_object *syncObj = (struct gl_sync_object *) sync; + mtx_lock(&ctx->Shared->Mutex); + if (syncObj != NULL && _mesa_set_search(ctx->Shared->SyncObjects, syncObj) != NULL && (syncObj->Type == GL_SYNC_FENCE) - && !syncObj->DeletePending; -} - - -void -_mesa_ref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj) -{ - mtx_lock(&ctx->Shared->Mutex); - syncObj->RefCount++; + && !syncObj->DeletePending) { + if (incRefCount) { + syncObj->RefCount++; + } + } else { + syncObj = NULL; + } mtx_unlock(&ctx->Shared->Mutex); + return syncObj; } void -_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj) +_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj, + int amount) { struct set_entry *entry; mtx_lock(&ctx->Shared->Mutex); - syncObj->RefCount--; + syncObj->RefCount -= amount; if (syncObj->RefCount == 0) { entry = _mesa_set_search(ctx->Shared->SyncObjects, syncObj); assert (entry != NULL); @@ -212,10 +220,9 @@ GLboolean GLAPIENTRY _mesa_IsSync(GLsync sync) { GET_CURRENT_CONTEXT(ctx); - struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync; ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE); - return _mesa_validate_sync(ctx, syncObj) ? GL_TRUE : GL_FALSE; + return _mesa_get_and_ref_sync(ctx, sync, false) ? GL_TRUE : GL_FALSE; } @@ -223,7 +230,7 @@ void GLAPIENTRY _mesa_DeleteSync(GLsync sync) { GET_CURRENT_CONTEXT(ctx); - struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync; + struct gl_sync_object *syncObj; /* From the GL_ARB_sync spec: * @@ -235,16 +242,19 @@ _mesa_DeleteSync(GLsync sync) return; } - if (!_mesa_validate_sync(ctx, syncObj)) { + syncObj = _mesa_get_and_ref_sync(ctx, sync, true); + if (!syncObj) { _mesa_error(ctx, GL_INVALID_VALUE, "glDeleteSync (not a valid sync object)"); return; } /* If there are no client-waits or server-waits pending on this sync, delete - * the underlying object. + * the underlying object. Note that we double-unref the object, as + * _mesa_get_and_ref_sync above took an extra refcount to make sure the pointer + * is valid for us to manipulate. */ syncObj->DeletePending = GL_TRUE; - _mesa_unref_sync_object(ctx, syncObj); + _mesa_unref_sync_object(ctx, syncObj, 2); } @@ -299,21 +309,20 @@ GLenum GLAPIENTRY _mesa_ClientWaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout) { GET_CURRENT_CONTEXT(ctx); - struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync; + struct gl_sync_object *syncObj; GLenum ret; ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_WAIT_FAILED); - if (!_mesa_validate_sync(ctx, syncObj)) { - _mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync (not a valid sync object)"); - return GL_WAIT_FAILED; - } - if ((flags & ~GL_SYNC_FLUSH_COMMANDS_BIT) != 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync(flags=0x%x)", flags); return GL_WAIT_FAILED; } - _mesa_ref_sync_object(ctx, syncObj); + syncObj = _mesa_get_and_ref_sync(ctx, sync, true); + if (!syncObj) { + _mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync (not a valid sync object)"); + return GL_WAIT_FAILED; + } /* From the GL_ARB_sync spec: * @@ -335,7 +344,7 @@ _mesa_ClientWaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout) } } - _mesa_unref_sync_object(ctx, syncObj); + _mesa_unref_sync_object(ctx, syncObj, 1); return ret; } @@ -344,12 +353,7 @@ void GLAPIENTRY _mesa_WaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout) { GET_CURRENT_CONTEXT(ctx); - struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync; - - if (!_mesa_validate_sync(ctx, syncObj)) { - _mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync (not a valid sync object)"); - return; - } + struct gl_sync_object *syncObj; if (flags != 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync(flags=0x%x)", flags); @@ -362,7 +366,14 @@ _mesa_WaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout) return; } + syncObj = _mesa_get_and_ref_sync(ctx, sync, true); + if (!syncObj) { + _mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync (not a valid sync object)"); + return; + } + ctx->Driver.ServerWaitSync(ctx, syncObj, flags, timeout); + _mesa_unref_sync_object(ctx, syncObj, 1); } @@ -371,11 +382,12 @@ _mesa_GetSynciv(GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values) { GET_CURRENT_CONTEXT(ctx); - struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync; + struct gl_sync_object *syncObj; GLsizei size = 0; GLint v[1]; - if (!_mesa_validate_sync(ctx, syncObj)) { + syncObj = _mesa_get_and_ref_sync(ctx, sync, true); + if (!syncObj) { _mesa_error(ctx, GL_INVALID_VALUE, "glGetSynciv (not a valid sync object)"); return; } @@ -409,6 +421,7 @@ _mesa_GetSynciv(GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, default: _mesa_error(ctx, GL_INVALID_ENUM, "glGetSynciv(pname=0x%x)\n", pname); + _mesa_unref_sync_object(ctx, syncObj, 1); return; } @@ -421,4 +434,6 @@ _mesa_GetSynciv(GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, if (length != NULL) { *length = size; } + + _mesa_unref_sync_object(ctx, syncObj, 1); } diff --git a/src/mesa/main/syncobj.h b/src/mesa/main/syncobj.h index 5d510e873a9..ea4a71222c0 100644 --- a/src/mesa/main/syncobj.h +++ b/src/mesa/main/syncobj.h @@ -47,15 +47,12 @@ _mesa_init_sync(struct gl_context *); extern void _mesa_free_sync_data(struct gl_context *); -extern void -_mesa_ref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj); +struct gl_sync_object * +_mesa_get_and_ref_sync(struct gl_context *ctx, GLsync sync, bool incRefCount); extern void -_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj); - -extern bool -_mesa_validate_sync(struct gl_context *ctx, - const struct gl_sync_object *syncObj); +_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj, + int amount); extern GLboolean GLAPIENTRY _mesa_IsSync(GLsync sync); diff --git a/src/mesa/main/transformfeedback.h b/src/mesa/main/transformfeedback.h index bb9729cdbde..eb274ad6540 100644 --- a/src/mesa/main/transformfeedback.h +++ b/src/mesa/main/transformfeedback.h @@ -145,6 +145,9 @@ _mesa_set_transform_feedback_binding(struct gl_context *ctx, tfObj->BufferNames[index] = bufObj->Name; tfObj->Offset[index] = offset; tfObj->RequestedSize[index] = size; + + if (bufObj != ctx->Shared->NullBufferObj) + bufObj->UsageHistory |= USAGE_TRANSFORM_FEEDBACK_BUFFER; } /*** GL_ARB_direct_state_access ***/ diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 3c51d18ed62..0f17ed136da 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2293,6 +2293,10 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, (void) row_major; + /* atomics don't get real storage */ + if (type->contains_atomic()) + return; + if (type->is_vector() || type->is_scalar()) { size = type->vector_elements; if (type->is_double()) diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index e98946b9387..34183d4d95f 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -454,73 +454,3 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, *posOut = -1; return GL_FALSE; } - - -struct gl_program_parameter_list * -_mesa_clone_parameter_list(const struct gl_program_parameter_list *list) -{ - struct gl_program_parameter_list *clone; - GLuint i; - - clone = _mesa_new_parameter_list(); - if (!clone) - return NULL; - - /** Not too efficient, but correct */ - for (i = 0; i < list->NumParameters; i++) { - struct gl_program_parameter *p = list->Parameters + i; - struct gl_program_parameter *pCopy; - GLuint size = MIN2(p->Size, 4); - GLint j = _mesa_add_parameter(clone, p->Type, p->Name, size, p->DataType, - list->ParameterValues[i], NULL); - assert(j >= 0); - pCopy = clone->Parameters + j; - /* copy state indexes */ - if (p->Type == PROGRAM_STATE_VAR) { - GLint k; - for (k = 0; k < STATE_LENGTH; k++) { - pCopy->StateIndexes[k] = p->StateIndexes[k]; - } - } - else { - clone->Parameters[j].Size = p->Size; - } - - } - - clone->StateFlags = list->StateFlags; - - return clone; -} - - -/** - * Return a new parameter list which is listA + listB. - */ -struct gl_program_parameter_list * -_mesa_combine_parameter_lists(const struct gl_program_parameter_list *listA, - const struct gl_program_parameter_list *listB) -{ - struct gl_program_parameter_list *list; - - if (listA) { - list = _mesa_clone_parameter_list(listA); - if (list && listB) { - GLuint i; - for (i = 0; i < listB->NumParameters; i++) { - struct gl_program_parameter *param = listB->Parameters + i; - _mesa_add_parameter(list, param->Type, param->Name, param->Size, - param->DataType, - listB->ParameterValues[i], - param->StateIndexes); - } - } - } - else if (listB) { - list = _mesa_clone_parameter_list(listB); - } - else { - list = NULL; - } - return list; -} diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index 44700b710d7..c04d7a2e634 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -99,13 +99,6 @@ _mesa_new_parameter_list_sized(unsigned size); extern void _mesa_free_parameter_list(struct gl_program_parameter_list *paramList); -extern struct gl_program_parameter_list * -_mesa_clone_parameter_list(const struct gl_program_parameter_list *list); - -extern struct gl_program_parameter_list * -_mesa_combine_parameter_lists(const struct gl_program_parameter_list *a, - const struct gl_program_parameter_list *b); - static inline GLuint _mesa_num_parameters(const struct gl_program_parameter_list *list) { diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 12490d0c380..eed241271df 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -40,6 +40,7 @@ #include "prog_statevars.h" #include "prog_parameter.h" #include "main/samplerobj.h" +#include "framebuffer.h" #define ONE_DIV_SQRT_LN2 (1.201122408786449815) @@ -352,7 +353,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[], } return; case STATE_NUM_SAMPLES: - ((int *)value)[0] = ctx->DrawBuffer->Visual.samples; + ((int *)value)[0] = _mesa_geometric_samples(ctx->DrawBuffer); return; case STATE_DEPTH_RANGE: value[0] = ctx->ViewportArray[0].Near; /* near */ diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 0e78e6ab25d..27867c48d52 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -31,6 +31,7 @@ #include "main/glheader.h" #include "main/context.h" +#include "main/framebuffer.h" #include "main/hash.h" #include "main/macros.h" #include "program.h" @@ -534,14 +535,14 @@ _mesa_get_min_invocations_per_fragment(struct gl_context *ctx, * forces per-sample shading" */ if (prog->IsSample && !ignore_sample_qualifier) - return MAX2(ctx->DrawBuffer->Visual.samples, 1); + return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1); if (prog->Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID | SYSTEM_BIT_SAMPLE_POS)) - return MAX2(ctx->DrawBuffer->Visual.samples, 1); + return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1); else if (ctx->Multisample.SampleShading) return MAX2(ceil(ctx->Multisample.MinSampleShadingValue * - ctx->DrawBuffer->Visual.samples), 1); + _mesa_geometric_samples(ctx->DrawBuffer)), 1); else return 1; } diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index 03097225bb2..4b89ade1b15 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -75,6 +75,16 @@ static const struct st_tracked_state *atoms[] = &st_bind_tes_ubos, &st_bind_fs_ubos, &st_bind_gs_ubos, + &st_bind_vs_atomics, + &st_bind_tcs_atomics, + &st_bind_tes_atomics, + &st_bind_fs_atomics, + &st_bind_gs_atomics, + &st_bind_vs_ssbos, + &st_bind_tcs_ssbos, + &st_bind_tes_ssbos, + &st_bind_fs_ssbos, + &st_bind_gs_ssbos, &st_update_pixel_transfer, &st_update_tess, diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h index a24842baa4f..3a9153c80cb 100644 --- a/src/mesa/state_tracker/st_atom.h +++ b/src/mesa/state_tracker/st_atom.h @@ -78,6 +78,16 @@ extern const struct st_tracked_state st_bind_vs_ubos; extern const struct st_tracked_state st_bind_gs_ubos; extern const struct st_tracked_state st_bind_tcs_ubos; extern const struct st_tracked_state st_bind_tes_ubos; +extern const struct st_tracked_state st_bind_fs_atomics; +extern const struct st_tracked_state st_bind_vs_atomics; +extern const struct st_tracked_state st_bind_gs_atomics; +extern const struct st_tracked_state st_bind_tcs_atomics; +extern const struct st_tracked_state st_bind_tes_atomics; +extern const struct st_tracked_state st_bind_fs_ssbos; +extern const struct st_tracked_state st_bind_vs_ssbos; +extern const struct st_tracked_state st_bind_gs_ssbos; +extern const struct st_tracked_state st_bind_tcs_ssbos; +extern const struct st_tracked_state st_bind_tes_ssbos; extern const struct st_tracked_state st_update_pixel_transfer; extern const struct st_tracked_state st_update_tess; diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c b/src/mesa/state_tracker/st_atom_atomicbuf.c new file mode 100644 index 00000000000..1c30d1fb701 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_atomicbuf.c @@ -0,0 +1,158 @@ +/************************************************************************** + * + * Copyright 2014 Ilia Mirkin. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/imports.h" +#include "program/prog_parameter.h" +#include "program/prog_print.h" +#include "compiler/glsl/ir_uniform.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" + +#include "st_debug.h" +#include "st_cb_bufferobjects.h" +#include "st_context.h" +#include "st_atom.h" +#include "st_program.h" + +static void +st_bind_atomics(struct st_context *st, + struct gl_shader_program *prog, + unsigned shader_type) +{ + unsigned i; + + if (!prog || !st->pipe->set_shader_buffers) + return; + + for (i = 0; i < prog->NumAtomicBuffers; i++) { + struct gl_active_atomic_buffer *atomic = &prog->AtomicBuffers[i]; + struct gl_atomic_buffer_binding *binding = + &st->ctx->AtomicBufferBindings[atomic->Binding]; + struct st_buffer_object *st_obj = + st_buffer_object(binding->BufferObject); + struct pipe_shader_buffer sb = { 0 }; + + sb.buffer = st_obj->buffer; + sb.buffer_offset = binding->Offset; + sb.buffer_size = st_obj->buffer->width0 - binding->Offset; + + st->pipe->set_shader_buffers(st->pipe, shader_type, + atomic->Binding, 1, &sb); + } +} + +static void +bind_vs_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + + st_bind_atomics(st, prog, PIPE_SHADER_VERTEX); +} + +const struct st_tracked_state st_bind_vs_atomics = { + "st_bind_vs_atomics", + { + 0, + ST_NEW_VERTEX_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_vs_atomics +}; + +static void +bind_fs_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT]; + + st_bind_atomics(st, prog, PIPE_SHADER_FRAGMENT); +} + +const struct st_tracked_state st_bind_fs_atomics = { + "st_bind_fs_atomics", + { + 0, + ST_NEW_FRAGMENT_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_fs_atomics +}; + +static void +bind_gs_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; + + st_bind_atomics(st, prog, PIPE_SHADER_GEOMETRY); +} + +const struct st_tracked_state st_bind_gs_atomics = { + "st_bind_gs_atomics", + { + 0, + ST_NEW_GEOMETRY_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_gs_atomics +}; + +static void +bind_tcs_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; + + st_bind_atomics(st, prog, PIPE_SHADER_TESS_CTRL); +} + +const struct st_tracked_state st_bind_tcs_atomics = { + "st_bind_tcs_atomics", + { + 0, + ST_NEW_TESSCTRL_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_tcs_atomics +}; + +static void +bind_tes_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; + + st_bind_atomics(st, prog, PIPE_SHADER_TESS_EVAL); +} + +const struct st_tracked_state st_bind_tes_atomics = { + "st_bind_tes_atomics", + { + 0, + ST_NEW_TESSEVAL_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_tes_atomics +}; diff --git a/src/mesa/state_tracker/st_atom_storagebuf.c b/src/mesa/state_tracker/st_atom_storagebuf.c new file mode 100644 index 00000000000..f165cc3e0a1 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_storagebuf.c @@ -0,0 +1,196 @@ +/************************************************************************** + * + * Copyright 2014 Ilia Mirkin. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/imports.h" +#include "program/prog_parameter.h" +#include "program/prog_print.h" +#include "compiler/glsl/ir_uniform.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" + +#include "st_debug.h" +#include "st_cb_bufferobjects.h" +#include "st_context.h" +#include "st_atom.h" +#include "st_program.h" + +static void +st_bind_ssbos(struct st_context *st, struct gl_shader *shader, + unsigned shader_type) +{ + unsigned i; + struct pipe_shader_buffer buffers[MAX_SHADER_STORAGE_BUFFERS]; + struct gl_program_constants *c; + + if (!shader || !st->pipe->set_shader_buffers) + return; + + c = &st->ctx->Const.Program[shader->Stage]; + + for (i = 0; i < shader->NumShaderStorageBlocks; i++) { + struct gl_shader_storage_buffer_binding *binding; + struct st_buffer_object *st_obj; + struct pipe_shader_buffer *sb = &buffers[i]; + + binding = &st->ctx->ShaderStorageBufferBindings[ + shader->ShaderStorageBlocks[i]->Binding]; + st_obj = st_buffer_object(binding->BufferObject); + + sb->buffer = st_obj->buffer; + + if (sb->buffer) { + sb->buffer_offset = binding->Offset; + sb->buffer_size = sb->buffer->width0 - binding->Offset; + + /* AutomaticSize is FALSE if the buffer was set with BindBufferRange. + * Take the minimum just to be sure. + */ + if (!binding->AutomaticSize) + sb->buffer_size = MIN2(sb->buffer_size, (unsigned) binding->Size); + } + else { + sb->buffer_offset = 0; + sb->buffer_size = 0; + } + } + st->pipe->set_shader_buffers(st->pipe, shader_type, c->MaxAtomicBuffers, + shader->NumShaderStorageBlocks, buffers); + /* clear out any stale shader buffers */ + if (shader->NumShaderStorageBlocks < c->MaxShaderStorageBlocks) + st->pipe->set_shader_buffers( + st->pipe, shader_type, + c->MaxAtomicBuffers + shader->NumShaderStorageBlocks, + c->MaxShaderStorageBlocks - shader->NumShaderStorageBlocks, + NULL); +} + +static void bind_vs_ssbos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + + if (!prog) + return; + + st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_VERTEX], + PIPE_SHADER_VERTEX); +} + +const struct st_tracked_state st_bind_vs_ssbos = { + "st_bind_vs_ssbos", + { + 0, + ST_NEW_VERTEX_PROGRAM | ST_NEW_STORAGE_BUFFER, + }, + bind_vs_ssbos +}; + +static void bind_fs_ssbos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT]; + + if (!prog) + return; + + st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], + PIPE_SHADER_FRAGMENT); +} + +const struct st_tracked_state st_bind_fs_ssbos = { + "st_bind_fs_ssbos", + { + 0, + ST_NEW_FRAGMENT_PROGRAM | ST_NEW_STORAGE_BUFFER, + }, + bind_fs_ssbos +}; + +static void bind_gs_ssbos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; + + if (!prog) + return; + + st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], + PIPE_SHADER_GEOMETRY); +} + +const struct st_tracked_state st_bind_gs_ssbos = { + "st_bind_gs_ssbos", + { + 0, + ST_NEW_GEOMETRY_PROGRAM | ST_NEW_STORAGE_BUFFER, + }, + bind_gs_ssbos +}; + +static void bind_tcs_ssbos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; + + if (!prog) + return; + + st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL], + PIPE_SHADER_TESS_CTRL); +} + +const struct st_tracked_state st_bind_tcs_ssbos = { + "st_bind_tcs_ssbos", + { + 0, + ST_NEW_TESSCTRL_PROGRAM | ST_NEW_STORAGE_BUFFER, + }, + bind_tcs_ssbos +}; + +static void bind_tes_ssbos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; + + if (!prog) + return; + + st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL], + PIPE_SHADER_TESS_EVAL); +} + +const struct st_tracked_state st_bind_tes_ssbos = { + "st_bind_tes_ssbos", + { + 0, + ST_NEW_TESSEVAL_PROGRAM | ST_NEW_STORAGE_BUFFER, + }, + bind_tes_ssbos +}; diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index 68be8ba64ac..202b4eeeefa 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -237,6 +237,13 @@ st_bufferobj_data(struct gl_context *ctx, case GL_PARAMETER_BUFFER_ARB: bind = PIPE_BIND_COMMAND_ARGS_BUFFER; break; + case GL_ATOMIC_COUNTER_BUFFER: + case GL_SHADER_STORAGE_BUFFER: + bind = PIPE_BIND_SHADER_BUFFER; + break; + case GL_QUERY_BUFFER: + bind = PIPE_BIND_QUERY_BUFFER; + break; default: bind = 0; } diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c index aafae16b2df..fc239bc778c 100644 --- a/src/mesa/state_tracker/st_cb_queryobj.c +++ b/src/mesa/state_tracker/st_cb_queryobj.c @@ -39,9 +39,11 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" +#include "util/u_inlines.h" #include "st_context.h" #include "st_cb_queryobj.h" #include "st_cb_bitmap.h" +#include "st_cb_bufferobjects.h" static struct gl_query_object * @@ -271,7 +273,7 @@ st_WaitQuery(struct gl_context *ctx, struct gl_query_object *q) { /* nothing */ } - + q->Ready = GL_TRUE; } @@ -303,6 +305,98 @@ st_GetTimestamp(struct gl_context *ctx) } } +static void +st_StoreQueryResult(struct gl_context *ctx, struct gl_query_object *q, + struct gl_buffer_object *buf, intptr_t offset, + GLenum pname, GLenum ptype) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_query_object *stq = st_query_object(q); + struct st_buffer_object *stObj = st_buffer_object(buf); + boolean wait = pname == GL_QUERY_RESULT; + enum pipe_query_value_type result_type; + int index; + + /* GL_QUERY_TARGET is a bit of an extension since it has nothing to + * do with the GPU end of the query. Write it in "by hand". + */ + if (pname == GL_QUERY_TARGET) { + /* Assume that the data must be LE. The endianness situation wrt CPU and + * GPU is incredibly confusing, but the vast majority of GPUs are + * LE. When a BE one comes along, this needs some form of resolution. + */ + unsigned data[2] = { CPU_TO_LE32(q->Target), 0 }; + pipe_buffer_write(pipe, stObj->buffer, offset, + (ptype == GL_INT64_ARB || + ptype == GL_UNSIGNED_INT64_ARB) ? 8 : 4, + data); + return; + } + + switch (ptype) { + case GL_INT: + result_type = PIPE_QUERY_TYPE_I32; + break; + case GL_UNSIGNED_INT: + result_type = PIPE_QUERY_TYPE_U32; + break; + case GL_INT64_ARB: + result_type = PIPE_QUERY_TYPE_I64; + break; + case GL_UNSIGNED_INT64_ARB: + result_type = PIPE_QUERY_TYPE_U64; + break; + default: + unreachable("Unexpected result type"); + } + + if (pname == GL_QUERY_RESULT_AVAILABLE) { + index = -1; + } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) { + switch (q->Target) { + case GL_VERTICES_SUBMITTED_ARB: + index = 0; + break; + case GL_PRIMITIVES_SUBMITTED_ARB: + index = 1; + break; + case GL_VERTEX_SHADER_INVOCATIONS_ARB: + index = 2; + break; + case GL_GEOMETRY_SHADER_INVOCATIONS: + index = 3; + break; + case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: + index = 4; + break; + case GL_CLIPPING_INPUT_PRIMITIVES_ARB: + index = 5; + break; + case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: + index = 6; + break; + case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: + index = 7; + break; + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + index = 8; + break; + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: + index = 9; + break; + case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + index = 10; + break; + default: + unreachable("Unexpected target"); + } + } else { + index = 0; + } + + pipe->get_query_result_resource(pipe, stq->pq, wait, result_type, index, + stObj->buffer, offset); +} void st_init_query_functions(struct dd_function_table *functions) { @@ -313,4 +407,5 @@ void st_init_query_functions(struct dd_function_table *functions) functions->WaitQuery = st_WaitQuery; functions->CheckQuery = st_CheckQuery; functions->GetTimestamp = st_GetTimestamp; + functions->StoreQueryResult = st_StoreQueryResult; } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 0ceb37027e1..f2b607c3a1d 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -60,6 +60,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_upload_mgr.h" #include "pipe/p_shader_tokens.h" #include "util/u_tile.h" #include "util/u_format.h" @@ -67,6 +68,9 @@ #include "util/u_sampler.h" #include "util/u_math.h" #include "util/u_box.h" +#include "util/u_simple_shaders.h" +#include "cso_cache/cso_context.h" +#include "tgsi/tgsi_ureg.h" #define DBG if (0) printf @@ -686,6 +690,999 @@ st_get_blit_mask(GLenum srcFormat, GLenum dstFormat) } } +void +st_init_pbo_upload(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + + st->pbo_upload.enabled = + screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) && + screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 && + screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS); + if (!st->pbo_upload.enabled) + return; + + st->pbo_upload.rgba_only = + screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY); + + if (screen->get_param(screen, PIPE_CAP_TGSI_INSTANCEID)) { + if (screen->get_param(screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) { + st->pbo_upload.upload_layers = true; + } else if (screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) { + st->pbo_upload.upload_layers = true; + st->pbo_upload.use_gs = true; + } + } + + /* Blend state */ + memset(&st->pbo_upload.blend, 0, sizeof(struct pipe_blend_state)); + st->pbo_upload.blend.rt[0].colormask = PIPE_MASK_RGBA; + + /* Rasterizer state */ + memset(&st->pbo_upload.raster, 0, sizeof(struct pipe_rasterizer_state)); + st->pbo_upload.raster.half_pixel_center = 1; +} + +void +st_destroy_pbo_upload(struct st_context *st) +{ + if (st->pbo_upload.fs) { + cso_delete_fragment_shader(st->cso_context, st->pbo_upload.fs); + st->pbo_upload.fs = NULL; + } + + if (st->pbo_upload.gs) { + cso_delete_geometry_shader(st->cso_context, st->pbo_upload.gs); + st->pbo_upload.gs = NULL; + } + + if (st->pbo_upload.vs) { + cso_delete_vertex_shader(st->cso_context, st->pbo_upload.vs); + st->pbo_upload.vs = NULL; + } +} + +/** + * Converts format to a format with the same components, types + * and sizes, but with the components in RGBA order. + */ +static enum pipe_format +unswizzle_format(enum pipe_format format) +{ + switch (format) + { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A8B8G8R8_UNORM: + return PIPE_FORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return PIPE_FORMAT_R10G10B10A2_UNORM; + + case PIPE_FORMAT_B10G10R10A2_SNORM: + return PIPE_FORMAT_R10G10B10A2_SNORM; + + case PIPE_FORMAT_B10G10R10A2_UINT: + return PIPE_FORMAT_R10G10B10A2_UINT; + + default: + return format; + } +} + +/** + * Converts PIPE_FORMAT_A* to PIPE_FORMAT_R*. + */ +static enum pipe_format +alpha_to_red(enum pipe_format format) +{ + switch (format) + { + case PIPE_FORMAT_A8_UNORM: + return PIPE_FORMAT_R8_UNORM; + case PIPE_FORMAT_A8_SNORM: + return PIPE_FORMAT_R8_SNORM; + case PIPE_FORMAT_A8_UINT: + return PIPE_FORMAT_R8_UINT; + case PIPE_FORMAT_A8_SINT: + return PIPE_FORMAT_R8_SINT; + + case PIPE_FORMAT_A16_UNORM: + return PIPE_FORMAT_R16_UNORM; + case PIPE_FORMAT_A16_SNORM: + return PIPE_FORMAT_R16_SNORM; + case PIPE_FORMAT_A16_UINT: + return PIPE_FORMAT_R16_UINT; + case PIPE_FORMAT_A16_SINT: + return PIPE_FORMAT_R16_SINT; + case PIPE_FORMAT_A16_FLOAT: + return PIPE_FORMAT_R16_FLOAT; + + case PIPE_FORMAT_A32_UINT: + return PIPE_FORMAT_R32_UINT; + case PIPE_FORMAT_A32_SINT: + return PIPE_FORMAT_R32_SINT; + case PIPE_FORMAT_A32_FLOAT: + return PIPE_FORMAT_R32_FLOAT; + + default: + return format; + } +} + +/** + * Converts PIPE_FORMAT_R*A* to PIPE_FORMAT_R*G*. + */ +static enum pipe_format +red_alpha_to_red_green(enum pipe_format format) +{ + switch (format) + { + case PIPE_FORMAT_R8A8_UNORM: + return PIPE_FORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8A8_SNORM: + return PIPE_FORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8A8_UINT: + return PIPE_FORMAT_R8G8_UINT; + case PIPE_FORMAT_R8A8_SINT: + return PIPE_FORMAT_R8G8_SINT; + + case PIPE_FORMAT_R16A16_UNORM: + return PIPE_FORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16A16_SNORM: + return PIPE_FORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16A16_UINT: + return PIPE_FORMAT_R16G16_UINT; + case PIPE_FORMAT_R16A16_SINT: + return PIPE_FORMAT_R16G16_SINT; + case PIPE_FORMAT_R16A16_FLOAT: + return PIPE_FORMAT_R16G16_FLOAT; + + case PIPE_FORMAT_R32A32_UINT: + return PIPE_FORMAT_R32G32_UINT; + case PIPE_FORMAT_R32A32_SINT: + return PIPE_FORMAT_R32G32_SINT; + case PIPE_FORMAT_R32A32_FLOAT: + return PIPE_FORMAT_R32G32_FLOAT; + + default: + return format; + } +} + +/** + * Converts PIPE_FORMAT_L*A* to PIPE_FORMAT_R*G*. + */ +static enum pipe_format +luminance_alpha_to_red_green(enum pipe_format format) +{ + switch (format) + { + case PIPE_FORMAT_L8A8_UNORM: + return PIPE_FORMAT_R8G8_UNORM; + case PIPE_FORMAT_L8A8_SNORM: + return PIPE_FORMAT_R8G8_SNORM; + case PIPE_FORMAT_L8A8_UINT: + return PIPE_FORMAT_R8G8_UINT; + case PIPE_FORMAT_L8A8_SINT: + return PIPE_FORMAT_R8G8_SINT; + + case PIPE_FORMAT_L16A16_UNORM: + return PIPE_FORMAT_R16G16_UNORM; + case PIPE_FORMAT_L16A16_SNORM: + return PIPE_FORMAT_R16G16_SNORM; + case PIPE_FORMAT_L16A16_UINT: + return PIPE_FORMAT_R16G16_UINT; + case PIPE_FORMAT_L16A16_SINT: + return PIPE_FORMAT_R16G16_SINT; + case PIPE_FORMAT_L16A16_FLOAT: + return PIPE_FORMAT_R16G16_FLOAT; + + case PIPE_FORMAT_L32A32_UINT: + return PIPE_FORMAT_R32G32_UINT; + case PIPE_FORMAT_L32A32_SINT: + return PIPE_FORMAT_R32G32_SINT; + case PIPE_FORMAT_L32A32_FLOAT: + return PIPE_FORMAT_R32G32_FLOAT; + + default: + return format; + } +} + +/** + * Returns true if format is a PIPE_FORMAT_A* format, and false otherwise. + */ +static bool +format_is_alpha(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if (desc->nr_channels == 1 && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_X) + return true; + + return false; +} + +/** + * Returns true if format is a PIPE_FORMAT_R* format, and false otherwise. + */ +static bool +format_is_red(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if (desc->nr_channels == 1 && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1) + return true; + + return false; +} + + +/** + * Returns true if format is a PIPE_FORMAT_L* format, and false otherwise. + */ +static bool +format_is_luminance(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if (desc->nr_channels == 1 && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1) + return true; + + return false; +} + +/** + * Returns true if format is a PIPE_FORMAT_R*A* format, and false otherwise. + */ +static bool +format_is_red_alpha(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if (desc->nr_channels == 2 && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_Y) + return true; + + return false; +} + +static bool +format_is_swizzled_rgba(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if ((desc->swizzle[0] == TGSI_SWIZZLE_X || desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0) && + (desc->swizzle[1] == TGSI_SWIZZLE_Y || desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0) && + (desc->swizzle[2] == TGSI_SWIZZLE_Z || desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0) && + (desc->swizzle[3] == TGSI_SWIZZLE_W || desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)) + return false; + + return true; +} + +struct format_table +{ + unsigned char swizzle[4]; + enum pipe_format format; +}; + +static const struct format_table table_8888_unorm[] = { + { { 0, 1, 2, 3 }, PIPE_FORMAT_R8G8B8A8_UNORM }, + { { 2, 1, 0, 3 }, PIPE_FORMAT_B8G8R8A8_UNORM }, + { { 3, 0, 1, 2 }, PIPE_FORMAT_A8R8G8B8_UNORM }, + { { 3, 2, 1, 0 }, PIPE_FORMAT_A8B8G8R8_UNORM } +}; + +static const struct format_table table_1010102_unorm[] = { + { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_UNORM }, + { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_UNORM } +}; + +static const struct format_table table_1010102_snorm[] = { + { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_SNORM }, + { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_SNORM } +}; + +static const struct format_table table_1010102_uint[] = { + { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_UINT }, + { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_UINT } +}; + +static enum pipe_format +swizzle_format(enum pipe_format format, const int * const swizzle) +{ + unsigned i; + + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A8B8G8R8_UNORM: + for (i = 0; i < ARRAY_SIZE(table_8888_unorm); i++) { + if (swizzle[0] == table_8888_unorm[i].swizzle[0] && + swizzle[1] == table_8888_unorm[i].swizzle[1] && + swizzle[2] == table_8888_unorm[i].swizzle[2] && + swizzle[3] == table_8888_unorm[i].swizzle[3]) + return table_8888_unorm[i].format; + } + break; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + for (i = 0; i < ARRAY_SIZE(table_1010102_unorm); i++) { + if (swizzle[0] == table_1010102_unorm[i].swizzle[0] && + swizzle[1] == table_1010102_unorm[i].swizzle[1] && + swizzle[2] == table_1010102_unorm[i].swizzle[2] && + swizzle[3] == table_1010102_unorm[i].swizzle[3]) + return table_1010102_unorm[i].format; + } + break; + + case PIPE_FORMAT_R10G10B10A2_SNORM: + case PIPE_FORMAT_B10G10R10A2_SNORM: + for (i = 0; i < ARRAY_SIZE(table_1010102_snorm); i++) { + if (swizzle[0] == table_1010102_snorm[i].swizzle[0] && + swizzle[1] == table_1010102_snorm[i].swizzle[1] && + swizzle[2] == table_1010102_snorm[i].swizzle[2] && + swizzle[3] == table_1010102_snorm[i].swizzle[3]) + return table_1010102_snorm[i].format; + } + break; + + case PIPE_FORMAT_R10G10B10A2_UINT: + case PIPE_FORMAT_B10G10R10A2_UINT: + for (i = 0; i < ARRAY_SIZE(table_1010102_uint); i++) { + if (swizzle[0] == table_1010102_uint[i].swizzle[0] && + swizzle[1] == table_1010102_uint[i].swizzle[1] && + swizzle[2] == table_1010102_uint[i].swizzle[2] && + swizzle[3] == table_1010102_uint[i].swizzle[3]) + return table_1010102_uint[i].format; + } + break; + + default: + break; + } + + return PIPE_FORMAT_NONE; +} + +static bool +reinterpret_formats(enum pipe_format *src_format, enum pipe_format *dst_format) +{ + enum pipe_format src = *src_format; + enum pipe_format dst = *dst_format; + + /* Note: dst_format has already been transformed from luminance/intensity + * to red when this function is called. The source format will never + * be an intensity format, because GL_INTENSITY is not a legal value + * for the format parameter in glTex(Sub)Image(). */ + + if (format_is_alpha(src)) { + if (!format_is_alpha(dst)) + return false; + + src = alpha_to_red(src); + dst = alpha_to_red(dst); + } else if (format_is_luminance(src)) { + if (!format_is_red(dst) && !format_is_red_alpha(dst)) + return false; + + src = util_format_luminance_to_red(src); + } else if (util_format_is_luminance_alpha(src)) { + src = luminance_alpha_to_red_green(src); + + if (format_is_red_alpha(dst)) { + dst = red_alpha_to_red_green(dst); + } else if (!format_is_red(dst)) + return false; + } else if (format_is_swizzled_rgba(src)) { + const struct util_format_description *src_desc = util_format_description(src); + const struct util_format_description *dst_desc = util_format_description(dst); + int swizzle[4]; + unsigned i; + + /* Make sure the format is an RGBA and not an RGBX format */ + if (src_desc->nr_channels != 4 || src_desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1) + return false; + + if (dst_desc->nr_channels != 4 || dst_desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1) + return false; + + for (i = 0; i < 4; i++) + swizzle[i] = dst_desc->swizzle[src_desc->swizzle[i]]; + + dst = swizzle_format(dst, swizzle); + if (dst == PIPE_FORMAT_NONE) + return false; + + src = unswizzle_format(src); + } + + *src_format = src; + *dst_format = dst; + return true; +} + +static void * +create_pbo_upload_vs(struct st_context *st) +{ + struct ureg_program *ureg; + struct ureg_src in_pos; + struct ureg_src in_instanceid; + struct ureg_dst out_pos; + struct ureg_dst out_layer; + + ureg = ureg_create(TGSI_PROCESSOR_VERTEX); + + in_pos = ureg_DECL_vs_input(ureg, TGSI_SEMANTIC_POSITION); + + out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); + + if (st->pbo_upload.upload_layers) { + in_instanceid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0); + + if (!st->pbo_upload.use_gs) + out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0); + } + + /* out_pos = in_pos */ + ureg_MOV(ureg, out_pos, in_pos); + + if (st->pbo_upload.upload_layers) { + if (st->pbo_upload.use_gs) { + /* out_pos.z = i2f(gl_InstanceID) */ + ureg_I2F(ureg, ureg_writemask(out_pos, TGSI_WRITEMASK_Z), + ureg_scalar(in_instanceid, TGSI_SWIZZLE_X)); + } else { + /* out_layer = gl_InstanceID */ + ureg_MOV(ureg, out_layer, in_instanceid); + } + } + + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, st->pipe); +} + +static void * +create_pbo_upload_gs(struct st_context *st) +{ + static const int zero = 0; + struct ureg_program *ureg; + struct ureg_dst out_pos; + struct ureg_dst out_layer; + struct ureg_src in_pos; + struct ureg_src imm; + unsigned i; + + ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY); + if (!ureg) + return NULL; + + ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES); + ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP); + ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3); + + out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); + out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0); + + in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1); + + imm = ureg_DECL_immediate_int(ureg, &zero, 1); + + for (i = 0; i < 3; ++i) { + struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i); + + /* out_pos = in_pos[i] */ + ureg_MOV(ureg, out_pos, in_pos_vertex); + + /* out_layer.x = f2i(in_pos[i].z) */ + ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X), + ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z)); + + ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X)); + } + + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, st->pipe); +} + +static void * +create_pbo_upload_fs(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct ureg_program *ureg; + struct ureg_dst out; + struct ureg_src sampler; + struct ureg_src pos; + struct ureg_src layer; + struct ureg_src const0; + struct ureg_dst temp0; + + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!ureg) + return NULL; + + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); + sampler = ureg_DECL_sampler(ureg, 0); + if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) { + pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); + } else { + pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, + TGSI_INTERPOLATE_LINEAR); + } + if (st->pbo_upload.upload_layers) { + layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0, + TGSI_INTERPOLATE_CONSTANT); + } + const0 = ureg_DECL_constant(ureg, 0); + temp0 = ureg_DECL_temporary(ureg); + + /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */ + + /* temp0.xy = f2i(temp0.xy) */ + ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), + ureg_swizzle(pos, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y)); + + /* temp0.xy = temp0.xy + const0.xy */ + ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), + ureg_swizzle(ureg_src(temp0), + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y), + ureg_swizzle(const0, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y)); + + /* temp0.x = const0.z * temp0.y + temp0.x */ + ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X), + ureg_scalar(const0, TGSI_SWIZZLE_Z), + ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); + + if (st->pbo_upload.upload_layers) { + /* temp0.x = const0.w * layer + temp0.x */ + ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X), + ureg_scalar(const0, TGSI_SWIZZLE_W), + ureg_scalar(layer, TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); + } + + /* out = txf(sampler, temp0.x) */ + ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, + ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X), + sampler); + + ureg_release_temporary(ureg, temp0); + + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, pipe); +} + +static bool +try_pbo_upload_common(struct gl_context *ctx, + struct pipe_surface *surface, + int xoffset, int yoffset, + unsigned upload_width, unsigned upload_height, + struct pipe_resource *buffer, + enum pipe_format src_format, + intptr_t buf_offset, + unsigned bytes_per_pixel, + unsigned stride, + unsigned image_height) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1; + unsigned skip_pixels = 0; + bool success = false; + + /* Check alignment. */ + { + unsigned ofs = (buf_offset * bytes_per_pixel) % ctx->Const.TextureBufferOffsetAlignment; + if (ofs != 0) { + if (ofs % bytes_per_pixel != 0) + return false; + + skip_pixels = ofs / bytes_per_pixel; + buf_offset -= skip_pixels; + } + } + + /* Create the shaders */ + if (!st->pbo_upload.vs) { + st->pbo_upload.vs = create_pbo_upload_vs(st); + if (!st->pbo_upload.vs) + return false; + } + + if (depth != 1 && st->pbo_upload.use_gs && !st->pbo_upload.gs) { + st->pbo_upload.gs = create_pbo_upload_gs(st); + if (!st->pbo_upload.gs) + return false; + } + + if (!st->pbo_upload.fs) { + st->pbo_upload.fs = create_pbo_upload_fs(st); + if (!st->pbo_upload.fs) + return false; + } + + /* Set up the sampler_view */ + { + unsigned first_element = buf_offset; + unsigned last_element = buf_offset + skip_pixels + upload_width - 1 + + (upload_height - 1 + (depth - 1) * image_height) * stride; + struct pipe_sampler_view templ; + struct pipe_sampler_view *sampler_view; + + /* This should be ensured by Mesa before calling our callbacks */ + assert((last_element + 1) * bytes_per_pixel <= buffer->width0); + + if (last_element - first_element > ctx->Const.MaxTextureBufferSize - 1) + return false; + + memset(&templ, 0, sizeof(templ)); + templ.format = src_format; + templ.u.buf.first_element = first_element; + templ.u.buf.last_element = last_element; + templ.swizzle_r = PIPE_SWIZZLE_RED; + templ.swizzle_g = PIPE_SWIZZLE_GREEN; + templ.swizzle_b = PIPE_SWIZZLE_BLUE; + templ.swizzle_a = PIPE_SWIZZLE_ALPHA; + + sampler_view = pipe->create_sampler_view(pipe, buffer, &templ); + if (sampler_view == NULL) + return false; + + cso_save_fragment_sampler_views(st->cso_context); + cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1, + &sampler_view); + + pipe_sampler_view_reference(&sampler_view, NULL); + } + + /* Upload vertices */ + { + struct pipe_vertex_buffer vbo; + struct pipe_vertex_element velem; + + float x0 = (float) xoffset / surface->width * 2.0f - 1.0f; + float y0 = (float) yoffset / surface->height * 2.0f - 1.0f; + float x1 = (float) (xoffset + upload_width) / surface->width * 2.0f - 1.0f; + float y1 = (float) (yoffset + upload_height) / surface->height * 2.0f - 1.0f; + + float *verts = NULL; + + vbo.user_buffer = NULL; + vbo.buffer = NULL; + vbo.stride = 2 * sizeof(float); + + u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4, + &vbo.buffer_offset, &vbo.buffer, (void **) &verts); + if (!verts) + goto fail_vertex_upload; + + verts[0] = x0; + verts[1] = y0; + verts[2] = x0; + verts[3] = y1; + verts[4] = x1; + verts[5] = y0; + verts[6] = x1; + verts[7] = y1; + + u_upload_unmap(st->uploader); + + velem.src_offset = 0; + velem.instance_divisor = 0; + velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(st->cso_context); + velem.src_format = PIPE_FORMAT_R32G32_FLOAT; + + cso_save_vertex_elements(st->cso_context); + cso_set_vertex_elements(st->cso_context, 1, &velem); + + cso_save_aux_vertex_buffer_slot(st->cso_context); + cso_set_vertex_buffers(st->cso_context, velem.vertex_buffer_index, + 1, &vbo); + + pipe_resource_reference(&vbo.buffer, NULL); + } + + /* Upload constants */ + { + struct pipe_constant_buffer cb; + + struct { + int32_t xoffset; + int32_t yoffset; + int32_t stride; + int32_t image_size; + } constants; + + constants.xoffset = -xoffset + skip_pixels; + constants.yoffset = -yoffset; + constants.stride = stride; + constants.image_size = stride * image_height; + + if (st->constbuf_uploader) { + cb.buffer = NULL; + cb.user_buffer = NULL; + u_upload_data(st->constbuf_uploader, 0, sizeof(constants), + st->ctx->Const.UniformBufferOffsetAlignment, + &constants, &cb.buffer_offset, &cb.buffer); + if (!cb.buffer) + goto fail_constant_upload; + + u_upload_unmap(st->constbuf_uploader); + } else { + cb.buffer = NULL; + cb.user_buffer = &constants; + cb.buffer_offset = 0; + } + cb.buffer_size = sizeof(constants); + + cso_save_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT); + cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, &cb); + + pipe_resource_reference(&cb.buffer, NULL); + } + + /* Framebuffer_state */ + { + struct pipe_framebuffer_state fb; + memset(&fb, 0, sizeof(fb)); + fb.width = surface->width; + fb.height = surface->height; + fb.nr_cbufs = 1; + pipe_surface_reference(&fb.cbufs[0], surface); + + cso_save_framebuffer(st->cso_context); + cso_set_framebuffer(st->cso_context, &fb); + + pipe_surface_reference(&fb.cbufs[0], NULL); + } + + /* Viewport state */ + { + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * surface->width; + vp.scale[1] = 0.5f * surface->height; + vp.scale[2] = 1.0f; + vp.translate[0] = 0.5f * surface->width; + vp.translate[1] = 0.5f * surface->height; + vp.translate[2] = 0.0f; + + cso_save_viewport(st->cso_context); + cso_set_viewport(st->cso_context, &vp); + } + + /* Blend state */ + cso_save_blend(st->cso_context); + cso_set_blend(st->cso_context, &st->pbo_upload.blend); + + /* Rasterizer state */ + cso_save_rasterizer(st->cso_context); + cso_set_rasterizer(st->cso_context, &st->pbo_upload.raster); + + /* Set up the shaders */ + cso_save_vertex_shader(st->cso_context); + cso_set_vertex_shader_handle(st->cso_context, st->pbo_upload.vs); + + cso_save_geometry_shader(st->cso_context); + cso_set_geometry_shader_handle(st->cso_context, + depth != 1 ? st->pbo_upload.gs : NULL); + + cso_save_tessctrl_shader(st->cso_context); + cso_set_tessctrl_shader_handle(st->cso_context, NULL); + + cso_save_tesseval_shader(st->cso_context); + cso_set_tesseval_shader_handle(st->cso_context, NULL); + + cso_save_fragment_shader(st->cso_context); + cso_set_fragment_shader_handle(st->cso_context, st->pbo_upload.fs); + + /* Disable stream output */ + cso_save_stream_outputs(st->cso_context); + cso_set_stream_outputs(st->cso_context, 0, NULL, 0); + + if (depth == 1) { + cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + } else { + cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, + 0, 4, 0, depth); + } + + success = true; + + cso_restore_framebuffer(st->cso_context); + cso_restore_viewport(st->cso_context); + cso_restore_blend(st->cso_context); + cso_restore_rasterizer(st->cso_context); + cso_restore_vertex_shader(st->cso_context); + cso_restore_geometry_shader(st->cso_context); + cso_restore_tessctrl_shader(st->cso_context); + cso_restore_tesseval_shader(st->cso_context); + cso_restore_fragment_shader(st->cso_context); + cso_restore_stream_outputs(st->cso_context); + cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT); +fail_constant_upload: + cso_restore_vertex_elements(st->cso_context); + cso_restore_aux_vertex_buffer_slot(st->cso_context); +fail_vertex_upload: + cso_restore_fragment_sampler_views(st->cso_context); + + return success; +} + +static bool +try_pbo_upload(struct gl_context *ctx, GLuint dims, + struct gl_texture_image *texImage, + GLenum format, GLenum type, + enum pipe_format dst_format, + GLint xoffset, GLint yoffset, GLint zoffset, + GLint width, GLint height, GLint depth, + const void *pixels, + const struct gl_pixelstore_attrib *unpack) +{ + struct st_context *st = st_context(ctx); + struct st_texture_image *stImage = st_texture_image(texImage); + struct st_texture_object *stObj = st_texture_object(texImage->TexObject); + struct pipe_resource *texture = stImage->pt; + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *surface = NULL; + enum pipe_format src_format; + const struct util_format_description *desc; + GLenum gl_target = texImage->TexObject->Target; + intptr_t buf_offset; + unsigned bytes_per_pixel; + unsigned stride, image_height; + bool success; + + if (!st->pbo_upload.enabled) + return false; + + /* From now on, we need the gallium representation of dimensions. */ + if (gl_target == GL_TEXTURE_1D_ARRAY) { + depth = height; + height = 1; + zoffset = yoffset; + yoffset = 0; + image_height = 1; + } else { + image_height = unpack->ImageHeight > 0 ? unpack->ImageHeight : height; + } + + if (depth != 1 && !st->pbo_upload.upload_layers) + return false; + + /* Choose the source format. Initially, we do so without checking driver + * support at all because of the remapping we later perform and because + * at least the Radeon driver actually supports some formats for texture + * buffers which it doesn't support for regular textures. */ + src_format = st_choose_matching_format(st, 0, format, type, unpack->SwapBytes); + if (!src_format) { + return false; + } + + src_format = util_format_linear(src_format); + desc = util_format_description(src_format); + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + return false; + + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) + return false; + + if (st->pbo_upload.rgba_only) { + enum pipe_format orig_dst_format = dst_format; + + if (!reinterpret_formats(&src_format, &dst_format)) { + return false; + } + + if (dst_format != orig_dst_format && + !screen->is_format_supported(screen, dst_format, PIPE_TEXTURE_2D, 0, + PIPE_BIND_RENDER_TARGET)) { + return false; + } + } + + if (!src_format || + !screen->is_format_supported(screen, src_format, PIPE_BUFFER, 0, + PIPE_BIND_SAMPLER_VIEW)) { + return false; + } + + /* Check if the offset satisfies the alignment requirements */ + buf_offset = (intptr_t) pixels; + bytes_per_pixel = desc->block.bits / 8; + + if (buf_offset % bytes_per_pixel) { + return false; + } + + /* Convert to texels */ + buf_offset = buf_offset / bytes_per_pixel; + + /* Compute the stride, taking unpack->Alignment into account */ + { + unsigned pixels_per_row = unpack->RowLength > 0 ? + unpack->RowLength : width; + unsigned bytes_per_row = pixels_per_row * bytes_per_pixel; + unsigned remainder = bytes_per_row % unpack->Alignment; + unsigned offset_rows; + + if (remainder > 0) + bytes_per_row += (unpack->Alignment - remainder); + + if (bytes_per_row % bytes_per_pixel) { + return false; + } + + stride = bytes_per_row / bytes_per_pixel; + + offset_rows = unpack->SkipRows; + if (dims == 3) + offset_rows += image_height * unpack->SkipImages; + + buf_offset += unpack->SkipPixels + stride * offset_rows; + } + + /* Set up the surface */ + { + unsigned level = stObj->pt != stImage->pt ? 0 : texImage->TexObject->MinLevel + texImage->Level; + unsigned max_layer = util_max_layer(texture, level); + + zoffset += texImage->Face + texImage->TexObject->MinLayer; + + struct pipe_surface templ; + memset(&templ, 0, sizeof(templ)); + templ.format = dst_format; + templ.u.tex.level = level; + templ.u.tex.first_layer = MIN2(zoffset, max_layer); + templ.u.tex.last_layer = MIN2(zoffset + depth - 1, max_layer); + + surface = pipe->create_surface(pipe, texture, &templ); + if (!surface) + return false; + } + + success = try_pbo_upload_common(ctx, surface, + xoffset, yoffset, width, height, + st_buffer_object(unpack->BufferObj)->buffer, + src_format, + buf_offset, + bytes_per_pixel, stride, image_height); + + pipe_surface_reference(&surface, NULL); + + return success; +} static void st_TexSubImage(struct gl_context *ctx, GLuint dims, @@ -735,21 +1732,15 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, goto fallback; } - /* See if the texture format already matches the format and type, - * in which case the memcpy-based fast path will likely be used and - * we don't have to blit. */ - if (_mesa_format_matches_format_and_type(texImage->TexFormat, format, - type, unpack->SwapBytes, NULL)) { - goto fallback; - } + /* See if the destination format is supported. */ if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL) bind = PIPE_BIND_DEPTH_STENCIL; else bind = PIPE_BIND_RENDER_TARGET; - /* See if the destination format is supported. - * For luminance and intensity, only the red channel is stored there. */ + /* For luminance and intensity, only the red channel is stored + * in the destination. */ dst_format = util_format_linear(dst->format); dst_format = util_format_luminance_to_red(dst_format); dst_format = util_format_intensity_to_red(dst_format); @@ -760,6 +1751,21 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, goto fallback; } + if (_mesa_is_bufferobj(unpack->BufferObj)) { + if (try_pbo_upload(ctx, dims, texImage, format, type, dst_format, + xoffset, yoffset, zoffset, + width, height, depth, pixels, unpack)) + return; + } + + /* See if the texture format already matches the format and type, + * in which case the memcpy-based fast path will likely be used and + * we don't have to blit. */ + if (_mesa_format_matches_format_and_type(texImage->TexFormat, format, + type, unpack->SwapBytes, NULL)) { + goto fallback; + } + /* Choose the source format. */ src_format = st_choose_matching_format(st, PIPE_BIND_SAMPLER_VIEW, format, type, unpack->SwapBytes); @@ -849,18 +1855,18 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, /* 1D array textures. * We need to convert gallium coords to GL coords. */ - GLvoid *src = _mesa_image_address3d(unpack, pixels, + GLvoid *src = _mesa_image_address2d(unpack, pixels, width, depth, format, - type, 0, slice, 0); + type, slice, 0); memcpy(map, src, bytesPerRow); } else { ubyte *slice_map = map; for (row = 0; row < (unsigned) height; row++) { - GLvoid *src = _mesa_image_address3d(unpack, pixels, - width, height, format, - type, slice, row, 0); + GLvoid *src = _mesa_image_address(dims, unpack, pixels, + width, height, format, + type, slice, row, 0); memcpy(slice_map, src, bytesPerRow); slice_map += transfer->stride; } @@ -928,12 +1934,165 @@ st_TexImage(struct gl_context * ctx, GLuint dims, static void +st_CompressedTexSubImage(struct gl_context *ctx, GLuint dims, + struct gl_texture_image *texImage, + GLint x, GLint y, GLint z, + GLsizei w, GLsizei h, GLsizei d, + GLenum format, GLsizei imageSize, const GLvoid *data) +{ + struct st_context *st = st_context(ctx); + struct st_texture_image *stImage = st_texture_image(texImage); + struct st_texture_object *stObj = st_texture_object(texImage->TexObject); + struct pipe_resource *texture = stImage->pt; + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_resource *dst = stImage->pt; + struct pipe_surface *surface = NULL; + struct compressed_pixelstore store; + enum pipe_format copy_format; + unsigned bytes_per_block; + unsigned bw, bh; + intptr_t buf_offset; + bool success = false; + + /* Check basic pre-conditions for PBO upload */ + if (!st->prefer_blit_based_texture_transfer) { + goto fallback; + } + + if (!_mesa_is_bufferobj(ctx->Unpack.BufferObj)) + goto fallback; + + if ((_mesa_is_format_etc2(texImage->TexFormat) && !st->has_etc2) || + (texImage->TexFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1)) { + /* ETC isn't supported and is represented by uncompressed formats. */ + goto fallback; + } + + if (!dst) { + goto fallback; + } + + if (!st->pbo_upload.enabled || + !screen->get_param(screen, PIPE_CAP_SURFACE_REINTERPRET_BLOCKS)) { + goto fallback; + } + + /* Choose the pipe format for the upload. */ + bytes_per_block = util_format_get_blocksize(dst->format); + bw = util_format_get_blockwidth(dst->format); + bh = util_format_get_blockheight(dst->format); + + switch (bytes_per_block) { + case 8: + copy_format = PIPE_FORMAT_R16G16B16A16_UINT; + break; + case 16: + copy_format = PIPE_FORMAT_R32G32B32A32_UINT; + break; + default: + goto fallback; + } + + if (!screen->is_format_supported(screen, copy_format, PIPE_BUFFER, 0, + PIPE_BIND_SAMPLER_VIEW)) { + goto fallback; + } + + if (!screen->is_format_supported(screen, copy_format, dst->target, + dst->nr_samples, PIPE_BIND_RENDER_TARGET)) { + goto fallback; + } + + /* Interpret the pixelstore settings. */ + _mesa_compute_compressed_pixelstore(dims, texImage->TexFormat, w, h, d, + &ctx->Unpack, &store); + assert(store.CopyBytesPerRow % bytes_per_block == 0); + assert(store.SkipBytes % bytes_per_block == 0); + + /* Compute the offset into the buffer */ + buf_offset = (intptr_t)data + store.SkipBytes; + + if (buf_offset % bytes_per_block) { + goto fallback; + } + + buf_offset = buf_offset / bytes_per_block; + + /* Set up the surface. */ + { + unsigned level = stObj->pt != stImage->pt ? 0 : texImage->TexObject->MinLevel + texImage->Level; + unsigned max_layer = util_max_layer(texture, level); + + z += texImage->Face + texImage->TexObject->MinLayer; + + struct pipe_surface templ; + memset(&templ, 0, sizeof(templ)); + templ.format = copy_format; + templ.u.tex.level = level; + templ.u.tex.first_layer = MIN2(z, max_layer); + templ.u.tex.last_layer = MIN2(z + d - 1, max_layer); + + surface = pipe->create_surface(pipe, texture, &templ); + if (!surface) + goto fallback; + } + + success = try_pbo_upload_common(ctx, surface, + x / bw, y / bh, + store.CopyBytesPerRow / bytes_per_block, + store.CopyRowsPerSlice, + st_buffer_object(ctx->Unpack.BufferObj)->buffer, + copy_format, + buf_offset, + bytes_per_block, + store.TotalBytesPerRow / bytes_per_block, + store.TotalRowsPerSlice); + + pipe_surface_reference(&surface, NULL); + + if (success) + return; + +fallback: + _mesa_store_compressed_texsubimage(ctx, dims, texImage, + x, y, z, w, h, d, + format, imageSize, data); +} + +static void st_CompressedTexImage(struct gl_context *ctx, GLuint dims, struct gl_texture_image *texImage, GLsizei imageSize, const GLvoid *data) { prep_teximage(ctx, texImage, GL_NONE, GL_NONE); - _mesa_store_compressed_teximage(ctx, dims, texImage, imageSize, data); + + /* only 2D and 3D compressed images are supported at this time */ + if (dims == 1) { + _mesa_problem(ctx, "Unexpected glCompressedTexImage1D call"); + return; + } + + /* This is pretty simple, because unlike the general texstore path we don't + * have to worry about the usual image unpacking or image transfer + * operations. + */ + assert(texImage); + assert(texImage->Width > 0); + assert(texImage->Height > 0); + assert(texImage->Depth > 0); + + /* allocate storage for texture data */ + if (!st_AllocTextureImageBuffer(ctx, texImage)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage%uD", dims); + return; + } + + st_CompressedTexSubImage(ctx, dims, texImage, + 0, 0, 0, + texImage->Width, texImage->Height, texImage->Depth, + texImage->TexFormat, + imageSize, data); } @@ -1958,7 +3117,7 @@ st_init_texture_functions(struct dd_function_table *functions) functions->QuerySamplesForFormat = st_QuerySamplesForFormat; functions->TexImage = st_TexImage; functions->TexSubImage = st_TexSubImage; - functions->CompressedTexSubImage = _mesa_store_compressed_texsubimage; + functions->CompressedTexSubImage = st_CompressedTexSubImage; functions->CopyTexSubImage = st_CopyTexSubImage; functions->GenerateMipmap = st_generate_mipmap; diff --git a/src/mesa/state_tracker/st_cb_texture.h b/src/mesa/state_tracker/st_cb_texture.h index 1b685298c5f..55c86c401e2 100644 --- a/src/mesa/state_tracker/st_cb_texture.h +++ b/src/mesa/state_tracker/st_cb_texture.h @@ -53,5 +53,10 @@ st_finalize_texture(struct gl_context *ctx, extern void st_init_texture_functions(struct dd_function_table *functions); +extern void +st_init_pbo_upload(struct st_context *st); + +extern void +st_destroy_pbo_upload(struct st_context *st); #endif /* ST_CB_TEXTURE_H */ diff --git a/src/mesa/state_tracker/st_cb_texturebarrier.c b/src/mesa/state_tracker/st_cb_texturebarrier.c index dd4dde74c86..2de150ba13a 100644 --- a/src/mesa/state_tracker/st_cb_texturebarrier.c +++ b/src/mesa/state_tracker/st_cb_texturebarrier.c @@ -65,6 +65,13 @@ st_MemoryBarrier(struct gl_context *ctx, GLbitfield barriers) if (barriers & GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT) flags |= PIPE_BARRIER_MAPPED_BUFFER; + if (barriers & GL_ATOMIC_COUNTER_BARRIER_BIT) + flags |= PIPE_BARRIER_SHADER_BUFFER; + if (barriers & GL_SHADER_STORAGE_BARRIER_BIT) + flags |= PIPE_BARRIER_SHADER_BUFFER; + + if (barriers & GL_QUERY_BUFFER_BARRIER_BIT) + flags |= PIPE_BARRIER_QUERY_BUFFER; if (flags && pipe->memory_barrier) pipe->memory_barrier(pipe, flags); diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index ce1e97aacb5..446ebfb563f 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -97,6 +97,30 @@ static void st_Enable(struct gl_context * ctx, GLenum cap, GLboolean state) /** + * Called via ctx->Driver.QueryMemoryInfo() + */ +static void +st_query_memory_info(struct gl_context *ctx, struct gl_memory_info *out) +{ + struct pipe_screen *screen = st_context(ctx)->pipe->screen; + struct pipe_memory_info info; + + assert(screen->query_memory_info); + if (!screen->query_memory_info) + return; + + screen->query_memory_info(screen, &info); + + out->total_device_memory = info.total_device_memory; + out->avail_device_memory = info.avail_device_memory; + out->total_staging_memory = info.total_staging_memory; + out->avail_staging_memory = info.avail_staging_memory; + out->device_memory_evicted = info.device_memory_evicted; + out->nr_device_memory_evictions = info.nr_device_memory_evictions; +} + + +/** * Called via ctx->Driver.UpdateState() */ void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state) @@ -136,6 +160,7 @@ st_destroy_context_priv(struct st_context *st) st_destroy_drawpix(st); st_destroy_drawtex(st); st_destroy_perfmon(st); + st_destroy_pbo_upload(st); for (shader = 0; shader < ARRAY_SIZE(st->state.sampler_views); shader++) { for (i = 0; i < ARRAY_SIZE(st->state.sampler_views[0]); i++) { @@ -209,6 +234,7 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, st_init_bitmap(st); st_init_clear(st); st_init_draw( st ); + st_init_pbo_upload(st); /* Choose texture target for glDrawPixels, glBitmap, renderbuffers */ if (pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES)) @@ -350,6 +376,8 @@ static void st_init_driver_flags(struct gl_driver_flags *f) f->NewUniformBuffer = ST_NEW_UNIFORM_BUFFER; f->NewDefaultTessLevels = ST_NEW_TESS_STATE; f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS; + f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER; + f->NewShaderStorageBuffer = ST_NEW_STORAGE_BUFFER; } struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, @@ -487,4 +515,5 @@ void st_init_driver_functions(struct pipe_screen *screen, functions->Enable = st_Enable; functions->UpdateState = st_invalidate_state; + functions->QueryMemoryInfo = st_query_memory_info; } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 9db5f11beb5..57076ad0d18 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -62,6 +62,8 @@ struct u_upload_mgr; #define ST_NEW_TESSCTRL_PROGRAM (1 << 9) #define ST_NEW_TESSEVAL_PROGRAM (1 << 10) #define ST_NEW_SAMPLER_VIEWS (1 << 11) +#define ST_NEW_ATOMIC_BUFFER (1 << 12) +#define ST_NEW_STORAGE_BUFFER (1 << 13) struct st_state_flags { @@ -202,6 +204,19 @@ struct st_context void *gs_layered; } clear; + /* For gl(Compressed)Tex(Sub)Image */ + struct { + struct pipe_rasterizer_state raster; + struct pipe_blend_state blend; + void *vs; + void *gs; + void *fs; + bool enabled; + bool rgba_only; + bool upload_layers; + bool use_gs; + } pbo_upload; + /** used for anything using util_draw_vertex_buffer */ struct pipe_vertex_element velems_util_draw[3]; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 53ea6767395..f25bd742f79 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -218,6 +218,11 @@ void st_init_limits(struct pipe_screen *screen, c->MaxUniformBlockSize / 4 * pc->MaxUniformBlocks); + pc->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; + pc->MaxAtomicBuffers = screen->get_shader_param( + screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2; + pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers; + /* Gallium doesn't really care about local vs. env parameters so use the * same limits. */ @@ -333,6 +338,31 @@ void st_init_limits(struct pipe_screen *screen, screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL); c->GLSLFrontFacingIsSysVal = screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL); + + c->MaxAtomicBufferBindings = + c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + c->MaxCombinedAtomicBuffers = + c->Program[MESA_SHADER_VERTEX].MaxAtomicBuffers + + c->Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers + + c->Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers + + c->Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers + + c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS); + + if (c->MaxCombinedAtomicBuffers > 0) + extensions->ARB_shader_atomic_counters = GL_TRUE; + + c->MaxCombinedShaderOutputResources = c->MaxDrawBuffers; + c->ShaderStorageBufferOffsetAlignment = + screen->get_param(screen, PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT); + if (c->ShaderStorageBufferOffsetAlignment) { + c->MaxCombinedShaderStorageBlocks = c->MaxShaderStorageBufferBindings = + c->MaxCombinedAtomicBuffers; + c->MaxCombinedShaderOutputResources += + c->MaxCombinedShaderStorageBlocks; + c->MaxShaderStorageBlockSize = 1 << 27; + extensions->ARB_shader_storage_buffer_object = GL_TRUE; + } } @@ -465,6 +495,7 @@ void st_init_extensions(struct pipe_screen *screen, { o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY }, { o(ARB_pipeline_statistics_query), PIPE_CAP_QUERY_PIPELINE_STATISTICS }, { o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE }, + { o(ARB_query_buffer_object), PIPE_CAP_QUERY_BUFFER_OBJECT }, { o(ARB_sample_shading), PIPE_CAP_SAMPLE_SHADING }, { o(ARB_seamless_cube_map), PIPE_CAP_SEAMLESS_CUBE_MAP }, { o(ARB_shader_draw_parameters), PIPE_CAP_DRAW_PARAMETERS }, @@ -496,12 +527,14 @@ void st_init_extensions(struct pipe_screen *screen, { o(EXT_transform_feedback), PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS }, { o(AMD_pinned_memory), PIPE_CAP_RESOURCE_FROM_USER_MEMORY }, + { o(ATI_meminfo), PIPE_CAP_QUERY_MEMORY_INFO }, { o(AMD_seamless_cubemap_per_texture), PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE }, { o(ATI_separate_stencil), PIPE_CAP_TWO_SIDED_STENCIL }, { o(ATI_texture_mirror_once), PIPE_CAP_TEXTURE_MIRROR_CLAMP }, { o(NV_conditional_render), PIPE_CAP_CONDITIONAL_RENDER }, { o(NV_primitive_restart), PIPE_CAP_PRIMITIVE_RESTART }, { o(NV_texture_barrier), PIPE_CAP_TEXTURE_BARRIER }, + { o(NVX_gpu_memory_info), PIPE_CAP_QUERY_MEMORY_INFO }, /* GL_NV_point_sprite is not supported by gallium because we don't * support the GL_POINT_SPRITE_R_MODE_NV option. */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index cf91d39ff92..b8182de0be8 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -52,7 +52,6 @@ #include "st_mesa_to_tgsi.h" -#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) @@ -267,6 +266,9 @@ public: unsigned tex_offset_num_offset; int dead_mask; /**< Used in dead code elimination */ + st_src_reg buffer; /**< buffer register */ + unsigned buffer_access; /**< buffer access type */ + class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ const struct tgsi_opcode_info *info; }; @@ -391,6 +393,7 @@ public: int samplers_used; glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ + int buffers_used; bool indirect_addr_consts; int wpos_transform_const; @@ -444,6 +447,10 @@ public: virtual void visit(ir_barrier *); /*@}*/ + void visit_atomic_counter_intrinsic(ir_call *); + void visit_ssbo_intrinsic(ir_call *); + void visit_membar_intrinsic(ir_call *); + st_src_reg result; /** List of variable_storage */ @@ -557,6 +564,28 @@ swizzle_for_size(int size) return size_swizzles[size - 1]; } +static bool +is_resource_instruction(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_RESQ: + case TGSI_OPCODE_LOAD: + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + return true; + default: + return false; + } +} + static unsigned num_inst_dst_regs(const glsl_to_tgsi_instruction *op) { @@ -566,7 +595,8 @@ num_inst_dst_regs(const glsl_to_tgsi_instruction *op) static unsigned num_inst_src_regs(const glsl_to_tgsi_instruction *op) { - return op->info->is_tex ? op->info->num_src - 1 : op->info->num_src; + return op->info->is_tex || is_resource_instruction(op->op) ? + op->info->num_src - 1 : op->info->num_src; } glsl_to_tgsi_instruction * @@ -661,8 +691,6 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, } } - this->instructions.push_tail(inst); - /* * This section contains the double processing. * GLSL just represents doubles as single channel values, @@ -698,7 +726,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, int initial_src_swz[4], initial_src_idx[4]; int initial_dst_idx[2], initial_dst_writemask[2]; /* select the writemask for dst0 or dst1 */ - unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? inst->dst[1].writemask : inst->dst[0].writemask; + unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask; /* copy out the writemask, index and swizzles for all src/dsts. */ for (j = 0; j < 2; j++) { @@ -715,10 +743,22 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, * scan all the components in the dst writemask * generate an instruction for each of them if required. */ + st_src_reg addr; while (writemask) { int i = u_bit_scan(&writemask); + /* before emitting the instruction, see if we have to adjust store + * address */ + if (i > 1 && inst->op == TGSI_OPCODE_STORE && + addr.file == PROGRAM_UNDEFINED) { + /* We have to advance the buffer address by 16 */ + addr = get_temp(glsl_type::uint_type); + emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr), + inst->src[0], st_src_reg_for_int(16)); + } + + /* first time use previous instruction */ if (dinst == NULL) { dinst = inst; @@ -728,16 +768,21 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, *dinst = *inst; dinst->next = NULL; dinst->prev = NULL; - this->instructions.push_tail(dinst); } + this->instructions.push_tail(dinst); /* modify the destination if we are splitting */ for (j = 0; j < 2; j++) { if (dst_is_double[j]) { dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY; dinst->dst[j].index = initial_dst_idx[j]; - if (i > 1) + if (i > 1) { + if (dinst->op == TGSI_OPCODE_STORE) { + dinst->src[0] = addr; + } else { dinst->dst[j].index++; + } + } } else { /* if we aren't writing to a double, just get the bit of the initial writemask for this channel */ @@ -773,6 +818,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, } } inst = dinst; + } else { + this->instructions.push_tail(inst); } @@ -807,7 +854,9 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, assert(src1.type != GLSL_TYPE_ARRAY); assert(src1.type != GLSL_TYPE_STRUCT); - if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE) + if (is_resource_instruction(op)) + type = src1.type; + else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE) type = GLSL_TYPE_DOUBLE; else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) type = GLSL_TYPE_FLOAT; @@ -891,6 +940,9 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, case3fid(FLR, FLR, DFLR); case3fid(ROUND, ROUND, DROUND); + case2iu(ATOMIMAX, ATOMUMAX); + case2iu(ATOMIMIN, ATOMUMIN); + default: break; } @@ -2170,6 +2222,22 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]); break; + case ir_unop_get_buffer_size: { + ir_constant *const_offset = ir->operands[0]->as_constant(); + st_src_reg buffer( + PROGRAM_BUFFER, + ctx->Const.Program[shader->Stage].MaxAtomicBuffers + + (const_offset ? const_offset->value.u[0] : 0), + GLSL_TYPE_UINT); + if (!const_offset) { + buffer.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr)); + emit_arl(ir, sampler_reladdr, op[0]); + } + emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer; + break; + } + case ir_unop_pack_snorm_2x16: case ir_unop_pack_unorm_2x16: case ir_unop_pack_snorm_4x8: @@ -2190,10 +2258,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) */ assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); break; - - case ir_unop_get_buffer_size: - assert(!"Not implemented yet"); - break; } this->result = result_src; @@ -3071,13 +3135,241 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) } void +glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + ir_dereference *deref = static_cast<ir_dereference *>( + ir->actual_parameters.get_head()); + ir_variable *location = deref->variable_referenced(); + + st_src_reg buffer( + PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT); + + /* Calculate the surface offset */ + st_src_reg offset; + ir_dereference_array *deref_array = deref->as_dereference_array(); + + if (deref_array) { + offset = get_temp(glsl_type::uint_type); + + deref_array->array_index->accept(this); + + emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), + this->result, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), + offset, st_src_reg_for_int(location->data.offset)); + } else { + offset = st_src_reg_for_int(location->data.offset); + } + + ir->return_deref->accept(this); + st_dst_reg dst(this->result); + dst.writemask = WRITEMASK_X; + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_atomic_read", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset); + inst->buffer = buffer; + } else if (!strcmp("__intrinsic_atomic_increment", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, + st_src_reg_for_int(1)); + inst->buffer = buffer; + } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, + st_src_reg_for_int(-1)); + inst->buffer = buffer; + emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1)); + } +} + +void +glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + exec_node *param = ir->actual_parameters.get_head(); + + ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + ir_constant *const_block = block->as_constant(); + + st_src_reg buffer( + PROGRAM_BUFFER, + ctx->Const.Program[shader->Stage].MaxAtomicBuffers + + (const_block ? const_block->value.u[0] : 0), + GLSL_TYPE_UINT); + + if (!const_block) { + block->accept(this); + emit_arl(ir, sampler_reladdr, this->result); + buffer.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr)); + } + + /* Calculate the surface offset */ + offset->accept(this); + st_src_reg off = this->result; + + st_dst_reg dst = undef_dst; + if (ir->return_deref) { + ir->return_deref->accept(this); + dst = st_dst_reg(this->result); + dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; + } + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_load_ssbo", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); + if (dst.type == GLSL_TYPE_BOOL) + emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0)); + } else if (!strcmp("__intrinsic_store_ssbo", callee)) { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + dst.writemask = write_mask->value.u[0]; + + dst.type = this->result.type; + inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); + } else { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + st_src_reg data = this->result, data2 = undef_src; + unsigned opcode; + if (!strcmp("__intrinsic_atomic_add_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMUADD; + else if (!strcmp("__intrinsic_atomic_min_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMIMIN; + else if (!strcmp("__intrinsic_atomic_max_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMIMAX; + else if (!strcmp("__intrinsic_atomic_and_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMAND; + else if (!strcmp("__intrinsic_atomic_or_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMOR; + else if (!strcmp("__intrinsic_atomic_xor_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMXOR; + else if (!strcmp("__intrinsic_atomic_exchange_ssbo", callee)) + opcode = TGSI_OPCODE_ATOMXCHG; + else if (!strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) { + opcode = TGSI_OPCODE_ATOMCAS; + param = param->get_next(); + val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + data2 = this->result; + } else { + assert(!"Unexpected intrinsic"); + return; + } + + inst = emit_asm(ir, opcode, dst, off, data, data2); + } + + param = param->get_next(); + ir_constant *access = NULL; + if (!param->is_tail_sentinel()) { + access = ((ir_instruction *)param)->as_constant(); + assert(access); + } + + /* The emit_asm() might have actually split the op into pieces, e.g. for + * double stores. We have to go back and fix up all the generated ops. + */ + unsigned op = inst->op; + do { + inst->buffer = buffer; + if (access) + inst->buffer_access = access->value.u[0]; + inst = (glsl_to_tgsi_instruction *)inst->get_prev(); + if (inst->op == TGSI_OPCODE_UADD) + inst = (glsl_to_tgsi_instruction *)inst->get_prev(); + } while (inst && inst->buffer.file == PROGRAM_UNDEFINED && inst->op == op); +} + +void +glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + + if (!strcmp("__intrinsic_memory_barrier", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | + TGSI_MEMBAR_ATOMIC_BUFFER | + TGSI_MEMBAR_SHADER_IMAGE | + TGSI_MEMBAR_SHARED)); + else if (!strcmp("__intrinsic_memory_barrier_atomic_counter", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER)); + else if (!strcmp("__intrinsic_memory_barrier_buffer", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER)); + else if (!strcmp("__intrinsic_memory_barrier_image", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE)); + else if (!strcmp("__intrinsic_memory_barrier_shared", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_SHARED)); + else if (!strcmp("__intrinsic_group_memory_barrier", callee)) + emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, + st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | + TGSI_MEMBAR_ATOMIC_BUFFER | + TGSI_MEMBAR_SHADER_IMAGE | + TGSI_MEMBAR_SHARED | + TGSI_MEMBAR_THREAD_GROUP)); + else + assert(!"Unexpected memory barrier intrinsic"); +} + +void glsl_to_tgsi_visitor::visit(ir_call *ir) { glsl_to_tgsi_instruction *call_inst; ir_function_signature *sig = ir->callee; - function_entry *entry = get_function_signature(sig); + const char *callee = sig->function_name(); + function_entry *entry; int i; + /* Filter out intrinsics */ + if (!strcmp("__intrinsic_atomic_read", callee) || + !strcmp("__intrinsic_atomic_increment", callee) || + !strcmp("__intrinsic_atomic_predecrement", callee)) { + visit_atomic_counter_intrinsic(ir); + return; + } + + if (!strcmp("__intrinsic_load_ssbo", callee) || + !strcmp("__intrinsic_store_ssbo", callee) || + !strcmp("__intrinsic_atomic_add_ssbo", callee) || + !strcmp("__intrinsic_atomic_min_ssbo", callee) || + !strcmp("__intrinsic_atomic_max_ssbo", callee) || + !strcmp("__intrinsic_atomic_and_ssbo", callee) || + !strcmp("__intrinsic_atomic_or_ssbo", callee) || + !strcmp("__intrinsic_atomic_xor_ssbo", callee) || + !strcmp("__intrinsic_atomic_exchange_ssbo", callee) || + !strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) { + visit_ssbo_intrinsic(ir); + return; + } + + if (!strcmp("__intrinsic_memory_barrier", callee) || + !strcmp("__intrinsic_memory_barrier_atomic_counter", callee) || + !strcmp("__intrinsic_memory_barrier_buffer", callee) || + !strcmp("__intrinsic_memory_barrier_image", callee) || + !strcmp("__intrinsic_memory_barrier_shared", callee) || + !strcmp("__intrinsic_group_memory_barrier", callee)) { + visit_membar_intrinsic(ir); + return; + } + + entry = get_function_signature(sig); /* Process in parameters. */ foreach_two_lists(formal_node, &sig->parameters, actual_node, &ir->actual_parameters) { @@ -3583,6 +3875,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() current_function = NULL; num_address_regs = 0; samplers_used = 0; + buffers_used = 0; indirect_addr_consts = false; wpos_transform_const = -1; glsl_version = 0; @@ -3617,6 +3910,7 @@ static void count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; + v->buffers_used = 0; foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { if (inst->info->is_tex) { @@ -3634,6 +3928,12 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) } } } + if (inst->buffer.file != PROGRAM_UNDEFINED && ( + is_resource_instruction(inst->op) || + inst->op == TGSI_OPCODE_STORE)) { + if (inst->buffer.file == PROGRAM_BUFFER) + v->buffers_used |= 1 << inst->buffer.index; + } } prog->SamplersUsed = v->samplers_used; @@ -3822,9 +4122,11 @@ glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int * last_reads[inst->src[j].index] = (depth == 0) ? i : -2; } for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY) + if (inst->dst[j].file == PROGRAM_TEMPORARY) { if (first_writes[inst->dst[j].index] == -1) first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; + last_reads[inst->dst[j].index] = (depth == 0) ? i : -2; + } } for (j = 0; j < inst->tex_offset_num_offset; j++) { if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) @@ -4229,7 +4531,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) { if (!inst->dead_mask || !inst->dst[0].writemask) continue; - else if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) { + /* No amount of dead masks should remove memory stores */ + if (inst->info->is_store) + continue; + + if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) { inst->remove(); delete inst; removed++; @@ -4338,6 +4644,7 @@ glsl_to_tgsi_visitor::merge_registers(void) /* Update the first_writes and last_reads arrays with the new * values for the merged register index, and mark the newly unused * register index as such. */ + assert(last_reads[j] >= last_reads[i]); last_reads[i] = last_reads[j]; first_writes[j] = -1; last_reads[j] = -1; @@ -4407,6 +4714,7 @@ struct st_translate { struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; struct ureg_dst address[3]; struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned *array_sizes; @@ -4814,13 +5122,13 @@ compile_tgsi_instruction(struct st_translate *t, const glsl_to_tgsi_instruction *inst) { struct ureg_program *ureg = t->ureg; - GLuint i; + int i; struct ureg_dst dst[2]; struct ureg_src src[4]; struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; - unsigned num_dst; - unsigned num_src; + int num_dst; + int num_src; unsigned tex_target; num_dst = num_inst_dst_regs(inst); @@ -4868,7 +5176,7 @@ compile_tgsi_instruction(struct st_translate *t, src[num_src] = ureg_src_indirect(src[num_src], ureg_src(t->address[2])); num_src++; - for (i = 0; i < inst->tex_offset_num_offset; i++) { + for (i = 0; i < (int)inst->tex_offset_num_offset; i++) { texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i); } tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); @@ -4881,6 +5189,38 @@ compile_tgsi_instruction(struct st_translate *t, src, num_src); return; + case TGSI_OPCODE_RESQ: + case TGSI_OPCODE_LOAD: + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + for (i = num_src - 1; i >= 0; i--) + src[i + 1] = src[i]; + num_src++; + src[0] = t->buffers[inst->buffer.index]; + if (inst->buffer.reladdr) + src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2])); + assert(src[0].File != TGSI_FILE_NULL); + ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, + inst->buffer_access); + break; + + case TGSI_OPCODE_STORE: + dst[0] = ureg_writemask(ureg_dst(t->buffers[inst->buffer.index]), inst->dst[0].writemask); + if (inst->buffer.reladdr) + dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2])); + assert(dst[0].File != TGSI_FILE_NULL); + ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, + inst->buffer_access); + break; + case TGSI_OPCODE_SCS: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); @@ -5170,6 +5510,8 @@ st_translate_program( { struct st_translate *t; unsigned i; + struct gl_program_constants *frag_const = + &ctx->Const.Program[MESA_SHADER_FRAGMENT]; enum pipe_error ret = PIPE_OK; assert(numInputs <= ARRAY_SIZE(t->inputs)); @@ -5485,7 +5827,7 @@ st_translate_program( assert(i == program->num_immediates); /* texture samplers */ - for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) { + for (i = 0; i < frag_const->MaxTextureImageUnits; i++) { if (program->samplers_used & (1 << i)) { unsigned type; @@ -5510,6 +5852,21 @@ st_translate_program( } } + for (i = 0; i < frag_const->MaxAtomicBuffers; i++) { + if (program->buffers_used & (1 << i)) { + t->buffers[i] = ureg_DECL_buffer(ureg, i, true); + } + } + + for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks; + i++) { + if (program->buffers_used & (1 << i)) { + t->buffers[i] = ureg_DECL_buffer(ureg, i, false); + } + } + + + /* Emit each instruction in turn: */ foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) { diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 0b8b6a9de56..6494aa518a2 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -181,6 +181,9 @@ vbo_sizeof_ib_type(GLenum type) } void +vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj); + +void vbo_get_minmax_indices(struct gl_context *ctx, const struct _mesa_prim *prim, const struct _mesa_index_buffer *ib, GLuint *min_index, GLuint *max_index, GLuint nr_prims); diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 02139ef881f..f0245fd08cc 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -37,8 +37,6 @@ #include "main/enums.h" #include "main/macros.h" #include "main/transformfeedback.h" -#include "main/sse_minmax.h" -#include "x86/common_x86_asm.h" #include "vbo_context.h" @@ -80,152 +78,6 @@ vbo_check_buffers_are_unmapped(struct gl_context *ctx) } - -/** - * Compute min and max elements by scanning the index buffer for - * glDraw[Range]Elements() calls. - * If primitive restart is enabled, we need to ignore restart - * indexes when computing min/max. - */ -static void -vbo_get_minmax_index(struct gl_context *ctx, - const struct _mesa_prim *prim, - const struct _mesa_index_buffer *ib, - GLuint *min_index, GLuint *max_index, - const GLuint count) -{ - const GLboolean restart = ctx->Array._PrimitiveRestart; - const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type); - const int index_size = vbo_sizeof_ib_type(ib->type); - const char *indices; - GLuint i; - - indices = (char *) ib->ptr + prim->start * index_size; - if (_mesa_is_bufferobj(ib->obj)) { - GLsizeiptr size = MIN2(count * index_size, ib->obj->Size); - indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size, - GL_MAP_READ_BIT, ib->obj, - MAP_INTERNAL); - } - - switch (ib->type) { - case GL_UNSIGNED_INT: { - const GLuint *ui_indices = (const GLuint *)indices; - GLuint max_ui = 0; - GLuint min_ui = ~0U; - if (restart) { - for (i = 0; i < count; i++) { - if (ui_indices[i] != restartIndex) { - if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; - if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; - } - } - } - else { -#if defined(USE_SSE41) - if (cpu_has_sse4_1) { - _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count); - } - else -#endif - for (i = 0; i < count; i++) { - if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; - if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; - } - } - *min_index = min_ui; - *max_index = max_ui; - break; - } - case GL_UNSIGNED_SHORT: { - const GLushort *us_indices = (const GLushort *)indices; - GLuint max_us = 0; - GLuint min_us = ~0U; - if (restart) { - for (i = 0; i < count; i++) { - if (us_indices[i] != restartIndex) { - if (us_indices[i] > max_us) max_us = us_indices[i]; - if (us_indices[i] < min_us) min_us = us_indices[i]; - } - } - } - else { - for (i = 0; i < count; i++) { - if (us_indices[i] > max_us) max_us = us_indices[i]; - if (us_indices[i] < min_us) min_us = us_indices[i]; - } - } - *min_index = min_us; - *max_index = max_us; - break; - } - case GL_UNSIGNED_BYTE: { - const GLubyte *ub_indices = (const GLubyte *)indices; - GLuint max_ub = 0; - GLuint min_ub = ~0U; - if (restart) { - for (i = 0; i < count; i++) { - if (ub_indices[i] != restartIndex) { - if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; - if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; - } - } - } - else { - for (i = 0; i < count; i++) { - if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; - if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; - } - } - *min_index = min_ub; - *max_index = max_ub; - break; - } - default: - unreachable("not reached"); - } - - if (_mesa_is_bufferobj(ib->obj)) { - ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL); - } -} - -/** - * Compute min and max elements for nr_prims - */ -void -vbo_get_minmax_indices(struct gl_context *ctx, - const struct _mesa_prim *prims, - const struct _mesa_index_buffer *ib, - GLuint *min_index, - GLuint *max_index, - GLuint nr_prims) -{ - GLuint tmp_min, tmp_max; - GLuint i; - GLuint count; - - *min_index = ~0; - *max_index = 0; - - for (i = 0; i < nr_prims; i++) { - const struct _mesa_prim *start_prim; - - start_prim = &prims[i]; - count = start_prim->count; - /* Do combination if possible to reduce map/unmap count */ - while ((i + 1 < nr_prims) && - (prims[i].start + prims[i].count == prims[i+1].start)) { - count += prims[i+1].count; - i++; - } - vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count); - *min_index = MIN2(*min_index, tmp_min); - *max_index = MAX2(*max_index, tmp_max); - } -} - - /** * Check that element 'j' of the array has reasonable data. * Map VBO if needed. diff --git a/src/mesa/vbo/vbo_minmax_index.c b/src/mesa/vbo/vbo_minmax_index.c new file mode 100644 index 00000000000..0f75a87f3f3 --- /dev/null +++ b/src/mesa/vbo/vbo_minmax_index.c @@ -0,0 +1,378 @@ +/* + * Mesa 3-D graphics library + * + * Copyright 2003 VMware, Inc. + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/varray.h" +#include "main/macros.h" +#include "main/sse_minmax.h" +#include "x86/common_x86_asm.h" +#include "util/hash_table.h" + + +struct minmax_cache_key { + GLintptr offset; + GLuint count; + GLenum type; +}; + + +struct minmax_cache_entry { + struct minmax_cache_key key; + GLuint min; + GLuint max; +}; + + +static uint32_t +vbo_minmax_cache_hash(const struct minmax_cache_key *key) +{ + return _mesa_hash_data(key, sizeof(*key)); +} + + +static bool +vbo_minmax_cache_key_equal(const struct minmax_cache_key *a, + const struct minmax_cache_key *b) +{ + return (a->offset == b->offset) && (a->count == b->count) && (a->type == b->type); +} + + +static void +vbo_minmax_cache_delete_entry(struct hash_entry *entry) +{ + free(entry->data); +} + + +static GLboolean +vbo_use_minmax_cache(struct gl_buffer_object *bufferObj) +{ + if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER | + USAGE_ATOMIC_COUNTER_BUFFER | + USAGE_SHADER_STORAGE_BUFFER | + USAGE_TRANSFORM_FEEDBACK_BUFFER | + USAGE_PIXEL_PACK_BUFFER | + USAGE_DISABLE_MINMAX_CACHE)) + return GL_FALSE; + + if ((bufferObj->Mappings[MAP_USER].AccessFlags & + (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) == + (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) + return GL_FALSE; + + return GL_TRUE; +} + + +void +vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj) +{ + _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry); + bufferObj->MinMaxCache = NULL; +} + + +static GLboolean +vbo_get_minmax_cached(struct gl_buffer_object *bufferObj, + GLenum type, GLintptr offset, GLuint count, + GLuint *min_index, GLuint *max_index) +{ + GLboolean found = GL_FALSE; + struct minmax_cache_key key; + uint32_t hash; + struct hash_entry *result; + + if (!bufferObj->MinMaxCache) + return GL_FALSE; + if (!vbo_use_minmax_cache(bufferObj)) + return GL_FALSE; + + mtx_lock(&bufferObj->Mutex); + + if (bufferObj->MinMaxCacheDirty) { + /* Disable the cache permanently for this BO if the number of hits + * is asymptotically less than the number of misses. This happens when + * applications use the BO for streaming. + * + * However, some initial optimism allows applications that interleave + * draw calls with glBufferSubData during warmup. + */ + unsigned optimism = bufferObj->Size; + if (bufferObj->MinMaxCacheMissIndices > optimism && + bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) { + bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE; + vbo_delete_minmax_cache(bufferObj); + goto out_disable; + } + + _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry); + bufferObj->MinMaxCacheDirty = false; + goto out_invalidate; + } + + key.type = type; + key.offset = offset; + key.count = count; + hash = vbo_minmax_cache_hash(&key); + result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key); + if (result) { + struct minmax_cache_entry *entry = result->data; + *min_index = entry->min; + *max_index = entry->max; + found = GL_TRUE; + } + +out_invalidate: + if (found) { + /* The hit counter saturates so that we don't accidently disable the + * cache in a long-running program. + */ + unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count; + + if (new_hit_count >= bufferObj->MinMaxCacheHitIndices) + bufferObj->MinMaxCacheHitIndices = new_hit_count; + else + bufferObj->MinMaxCacheHitIndices = ~(unsigned)0; + } else { + bufferObj->MinMaxCacheMissIndices += count; + } + +out_disable: + mtx_unlock(&bufferObj->Mutex); + return found; +} + + +static void +vbo_minmax_cache_store(struct gl_context *ctx, + struct gl_buffer_object *bufferObj, + GLenum type, GLintptr offset, GLuint count, + GLuint min, GLuint max) +{ + struct minmax_cache_entry *entry; + struct hash_entry *table_entry; + uint32_t hash; + + if (!vbo_use_minmax_cache(bufferObj)) + return; + + mtx_lock(&bufferObj->Mutex); + + if (!bufferObj->MinMaxCache) { + bufferObj->MinMaxCache = + _mesa_hash_table_create(NULL, + (uint32_t (*)(const void *))vbo_minmax_cache_hash, + (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal); + if (!bufferObj->MinMaxCache) + goto out; + } + + entry = MALLOC_STRUCT(minmax_cache_entry); + if (!entry) + goto out; + + entry->key.offset = offset; + entry->key.count = count; + entry->key.type = type; + entry->min = min; + entry->max = max; + hash = vbo_minmax_cache_hash(&entry->key); + + table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, + hash, &entry->key); + if (table_entry) { + /* It seems like this could happen when two contexts are rendering using + * the same buffer object from multiple threads. + */ + _mesa_debug(ctx, "duplicate entry in minmax cache\n"); + free(entry); + goto out; + } + + table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache, + hash, &entry->key, entry); + if (!table_entry) + free(entry); + +out: + mtx_unlock(&bufferObj->Mutex); +} + + +/** + * Compute min and max elements by scanning the index buffer for + * glDraw[Range]Elements() calls. + * If primitive restart is enabled, we need to ignore restart + * indexes when computing min/max. + */ +static void +vbo_get_minmax_index(struct gl_context *ctx, + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint *min_index, GLuint *max_index, + const GLuint count) +{ + const GLboolean restart = ctx->Array._PrimitiveRestart; + const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type); + const int index_size = vbo_sizeof_ib_type(ib->type); + const char *indices; + GLuint i; + + indices = (char *) ib->ptr + prim->start * index_size; + if (_mesa_is_bufferobj(ib->obj)) { + GLsizeiptr size = MIN2(count * index_size, ib->obj->Size); + + if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices, count, + min_index, max_index)) + return; + + indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size, + GL_MAP_READ_BIT, ib->obj, + MAP_INTERNAL); + } + + switch (ib->type) { + case GL_UNSIGNED_INT: { + const GLuint *ui_indices = (const GLuint *)indices; + GLuint max_ui = 0; + GLuint min_ui = ~0U; + if (restart) { + for (i = 0; i < count; i++) { + if (ui_indices[i] != restartIndex) { + if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; + if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; + } + } + } + else { +#if defined(USE_SSE41) + if (cpu_has_sse4_1) { + _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count); + } + else +#endif + for (i = 0; i < count; i++) { + if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; + if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; + } + } + *min_index = min_ui; + *max_index = max_ui; + break; + } + case GL_UNSIGNED_SHORT: { + const GLushort *us_indices = (const GLushort *)indices; + GLuint max_us = 0; + GLuint min_us = ~0U; + if (restart) { + for (i = 0; i < count; i++) { + if (us_indices[i] != restartIndex) { + if (us_indices[i] > max_us) max_us = us_indices[i]; + if (us_indices[i] < min_us) min_us = us_indices[i]; + } + } + } + else { + for (i = 0; i < count; i++) { + if (us_indices[i] > max_us) max_us = us_indices[i]; + if (us_indices[i] < min_us) min_us = us_indices[i]; + } + } + *min_index = min_us; + *max_index = max_us; + break; + } + case GL_UNSIGNED_BYTE: { + const GLubyte *ub_indices = (const GLubyte *)indices; + GLuint max_ub = 0; + GLuint min_ub = ~0U; + if (restart) { + for (i = 0; i < count; i++) { + if (ub_indices[i] != restartIndex) { + if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; + if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; + } + } + } + else { + for (i = 0; i < count; i++) { + if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; + if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; + } + } + *min_index = min_ub; + *max_index = max_ub; + break; + } + default: + unreachable("not reached"); + } + + if (_mesa_is_bufferobj(ib->obj)) { + vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count, + *min_index, *max_index); + ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL); + } +} + +/** + * Compute min and max elements for nr_prims + */ +void +vbo_get_minmax_indices(struct gl_context *ctx, + const struct _mesa_prim *prims, + const struct _mesa_index_buffer *ib, + GLuint *min_index, + GLuint *max_index, + GLuint nr_prims) +{ + GLuint tmp_min, tmp_max; + GLuint i; + GLuint count; + + *min_index = ~0; + *max_index = 0; + + for (i = 0; i < nr_prims; i++) { + const struct _mesa_prim *start_prim; + + start_prim = &prims[i]; + count = start_prim->count; + /* Do combination if possible to reduce map/unmap count */ + while ((i + 1 < nr_prims) && + (prims[i].start + prims[i].count == prims[i+1].start)) { + count += prims[i+1].count; + i++; + } + vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count); + *min_index = MIN2(*min_index, tmp_min); + *max_index = MAX2(*max_index, tmp_max); + } +} diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S index c185f62099e..b0aca19c8b0 100644 --- a/src/mesa/x86-64/xform4.S +++ b/src/mesa/x86-64/xform4.S @@ -69,7 +69,7 @@ _mesa_x86_64_transform_points4_general: movq V4F_START(%rdx), %rdx /* ptr to first src vertex */ movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */ - prefetch 16(%rdx) + prefetcht1 16(%rdx) movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */ movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */ @@ -80,7 +80,7 @@ _mesa_x86_64_transform_points4_general: p4_general_loop: movups (%rdx), %xmm8 /* ox | oy | oz | ow */ - prefetchw 16(%rdi) + prefetcht1 16(%rdi) pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */ addq %rax, %rdx @@ -93,7 +93,7 @@ p4_general_loop: addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */ mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */ - prefetch 16(%rdx) + prefetcht1 16(%rdx) addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ @@ -150,7 +150,7 @@ _mesa_x86_64_transform_points4_3d: movq V4F_START(%rdx), %rdx /* ptr to first src vertex */ movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */ - prefetch 16(%rdx) + prefetcht1 16(%rdx) movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */ movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */ @@ -166,7 +166,7 @@ _mesa_x86_64_transform_points4_3d: p4_3d_loop: movups (%rdx), %xmm8 /* ox | oy | oz | ow */ - prefetchw 16(%rdi) + prefetcht1 16(%rdi) pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */ addq %rax, %rdx @@ -179,7 +179,7 @@ p4_3d_loop: addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */ mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */ - prefetch 16(%rdx) + prefetcht1 16(%rdx) addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ @@ -210,8 +210,8 @@ _mesa_x86_64_transform_points4_identity: movq V4F_START(%rdx), %rsi /* ptr to first src vertex */ movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */ - prefetch 64(%rsi) - prefetchw 64(%rdi) + prefetcht1 64(%rsi) + prefetcht1 64(%rdi) add %ecx, %ecx @@ -242,7 +242,7 @@ _mesa_3dnow_transform_points4_3d_no_rot: movq V4F_START(%rdx), %rdx /* ptr to first src vertex */ movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */ - prefetch (%rdx) + prefetcht1 (%rdx) movd (%rsi), %mm0 /* | m00 */ .byte 0x66, 0x66, 0x90 /* manual align += 3 */ @@ -255,7 +255,7 @@ _mesa_3dnow_transform_points4_3d_no_rot: p4_3d_no_rot_loop: - prefetchw 32(%rdi) + prefetcht1 32(%rdi) movq (%rdx), %mm4 /* x1 | x0 */ movq 8(%rdx), %mm5 /* x3 | x2 */ @@ -279,7 +279,7 @@ p4_3d_no_rot_loop: addq $16, %rdi decl %ecx - prefetch 32(%rdx) + prefetcht1 32(%rdx) jnz p4_3d_no_rot_loop p4_3d_no_rot_done: @@ -311,7 +311,7 @@ _mesa_3dnow_transform_points4_perspective: punpckldq 20(%rsi), %mm0 /* m11 | m00 */ movq 32(%rsi), %mm2 /* m21 | m20 */ - prefetch (%rdx) + prefetcht1 (%rdx) movd 40(%rsi), %mm1 /* | m22 */ @@ -321,7 +321,7 @@ _mesa_3dnow_transform_points4_perspective: p4_perspective_loop: - prefetchw 32(%rdi) /* prefetch 2 vertices ahead */ + prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */ movq (%rdx), %mm4 /* x1 | x0 */ movq 8(%rdx), %mm5 /* x3 | x2 */ @@ -347,7 +347,7 @@ p4_perspective_loop: addq $16, %rdi decl %ecx - prefetch 32(%rdx) /* hopefully stride is zero */ + prefetcht1 32(%rdx) /* hopefully stride is zero */ jnz p4_perspective_loop p4_perspective_done: @@ -374,14 +374,14 @@ _mesa_3dnow_transform_points4_2d_no_rot: movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */ movd (%rsi), %mm0 /* | m00 */ - prefetch (%rdx) + prefetcht1 (%rdx) punpckldq 20(%rsi), %mm0 /* m11 | m00 */ movq 48(%rsi), %mm1 /* m31 | m30 */ p4_2d_no_rot_loop: - prefetchw 32(%rdi) /* prefetch 2 vertices ahead */ + prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */ movq (%rdx), %mm4 /* x1 | x0 */ movq 8(%rdx), %mm5 /* x3 | x2 */ @@ -394,7 +394,7 @@ p4_2d_no_rot_loop: addq %rax, %rdx pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */ - prefetch 32(%rdx) /* hopefully stride is zero */ + prefetcht1 32(%rdx) /* hopefully stride is zero */ pfadd %mm4, %mm6 /* x1*m11+x3*m31 | x0*m00+x3*m30 */ movq %mm6, (%rdi) /* write r0, r1 */ @@ -433,7 +433,7 @@ _mesa_3dnow_transform_points4_2d: movd (%rsi), %mm0 /* | m00 */ movd 4(%rsi), %mm1 /* | m01 */ - prefetch (%rdx) + prefetcht1 (%rdx) punpckldq 16(%rsi), %mm0 /* m10 | m00 */ .byte 0x66, 0x66, 0x90 /* manual align += 4 */ @@ -443,7 +443,7 @@ _mesa_3dnow_transform_points4_2d: p4_2d_loop: - prefetchw 32(%rdi) /* prefetch 2 vertices ahead */ + prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */ movq (%rdx), %mm3 /* x1 | x0 */ movq 8(%rdx), %mm5 /* x3 | x2 */ @@ -460,7 +460,7 @@ p4_2d_loop: pfacc %mm4, %mm3 /* x0*m01+x1*m11 | x0*m00+x1*m10 */ pfmul %mm2, %mm6 /* x3*m31 | x3*m30 */ - prefetch 32(%rdx) /* hopefully stride is zero */ + prefetcht1 32(%rdx) /* hopefully stride is zero */ pfadd %mm6, %mm3 /* r1 | r0 */ diff --git a/src/util/hash_table.c b/src/util/hash_table.c index 3247593c1f6..4cfe3d93251 100644 --- a/src/util/hash_table.c +++ b/src/util/hash_table.c @@ -163,6 +163,32 @@ _mesa_hash_table_destroy(struct hash_table *ht, ralloc_free(ht); } +/** + * Deletes all entries of the given hash table without deleting the table + * itself or changing its structure. + * + * If delete_function is passed, it gets called on each entry present. + */ +void +_mesa_hash_table_clear(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)) +{ + struct hash_entry *entry; + + for (entry = ht->table; entry != ht->table + ht->size; entry++) { + if (entry->key == NULL) + continue; + + if (delete_function != NULL && entry->key != ht->deleted_key) + delete_function(entry); + + entry->key = NULL; + } + + ht->entries = 0; + ht->deleted_entries = 0; +} + /** Sets the value of the key pointer used for deleted entries in the table. * * The assumption is that usually keys are actual pointers, so we use a @@ -300,7 +326,8 @@ hash_table_insert(struct hash_table *ht, uint32_t hash, * required to avoid memory leaks, perform a search * before inserting. */ - if (entry->hash == hash && + if (!entry_is_deleted(ht, entry) && + entry->hash == hash && ht->key_equals_function(key, entry->key)) { entry->key = key; entry->data = data; diff --git a/src/util/hash_table.h b/src/util/hash_table.h index eb9dbc333ec..85b013cac24 100644 --- a/src/util/hash_table.h +++ b/src/util/hash_table.h @@ -64,9 +64,16 @@ _mesa_hash_table_create(void *mem_ctx, const void *b)); void _mesa_hash_table_destroy(struct hash_table *ht, void (*delete_function)(struct hash_entry *entry)); +void _mesa_hash_table_clear(struct hash_table *ht, + void (*delete_function)(struct hash_entry *entry)); void _mesa_hash_table_set_deleted_key(struct hash_table *ht, const void *deleted_key); +static inline uint32_t _mesa_hash_table_num_entries(struct hash_table *ht) +{ + return ht->entries; +} + struct hash_entry * _mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data); struct hash_entry * diff --git a/src/util/set.c b/src/util/set.c index f01f8699ac2..99abefd0632 100644 --- a/src/util/set.c +++ b/src/util/set.c @@ -282,7 +282,8 @@ set_add(struct set *ht, uint32_t hash, const void *key) * If freeing of old keys is required to avoid memory leaks, * perform a search before inserting. */ - if (entry->hash == hash && + if (!entry_is_deleted(entry) && + entry->hash == hash && ht->key_equals_function(key, entry->key)) { entry->key = key; return entry; diff --git a/src/util/tests/hash_table/Makefile.am b/src/util/tests/hash_table/Makefile.am index 04a77e30df1..8f12240cede 100644 --- a/src/util/tests/hash_table/Makefile.am +++ b/src/util/tests/hash_table/Makefile.am @@ -29,6 +29,7 @@ LDADD = \ $(DLOPEN_LIBS) TESTS = \ + clear \ collision \ delete_and_lookup \ delete_management \ diff --git a/src/util/tests/hash_table/clear.c b/src/util/tests/hash_table/clear.c new file mode 100644 index 00000000000..526700bfb0f --- /dev/null +++ b/src/util/tests/hash_table/clear.c @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "hash_table.h" + +static void *make_key(uint32_t i) +{ + return (void *)(uintptr_t)(1 + i); +} + +static uint32_t key_id(const void *key) +{ + return (uintptr_t)key - 1; +} + +static uint32_t key_hash(const void *key) +{ + return (uintptr_t)key; +} + +static bool key_equal(const void *a, const void *b) +{ + return a == b; +} + +static void delete_function(struct hash_entry *entry) +{ + bool *deleted = (bool *)entry->data; + assert(!*deleted); + *deleted = true; +} + +int main() +{ + struct hash_table *ht; + struct hash_entry *entry; + const uint32_t size = 1000; + bool flags[size]; + uint32_t i; + + ht = _mesa_hash_table_create(NULL, key_hash, key_equal); + + for (i = 0; i < size; ++i) { + flags[i] = false; + _mesa_hash_table_insert(ht, make_key(i), &flags[i]); + } + + _mesa_hash_table_clear(ht, delete_function); + assert(_mesa_hash_table_next_entry(ht, NULL) == NULL); + + /* Check that delete_function was called and that repopulating the table + * works. */ + for (i = 0; i < size; ++i) { + assert(flags[i]); + flags[i] = false; + _mesa_hash_table_insert(ht, make_key(i), &flags[i]); + } + + /* Check that exactly the right set of entries is in the table. */ + for (i = 0; i < size; ++i) { + assert(_mesa_hash_table_search(ht, make_key(i))); + } + + hash_table_foreach(ht, entry) { + assert(key_id(entry->key) < size); + } + + _mesa_hash_table_destroy(ht, NULL); + + return 0; +} |