diff options
358 files changed, 12445 insertions, 4764 deletions
diff --git a/Android.common.mk b/Android.common.mk index 948561c6b1b..c4823f7dbf1 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -21,13 +21,8 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -# use c99 compiler by default -ifeq ($(LOCAL_CC),) ifeq ($(LOCAL_IS_HOST_MODULE),true) -LOCAL_CC := $(HOST_CC) -std=c99 -D_GNU_SOURCE -else -LOCAL_CC := $(TARGET_CC) -std=c99 -endif +LOCAL_CFLAGS += -D_GNU_SOURCE endif LOCAL_C_INCLUDES += \ @@ -37,6 +32,7 @@ LOCAL_C_INCLUDES += \ MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION) # define ANDROID_VERSION (e.g., 4.0.x => 0x0400) LOCAL_CFLAGS += \ + -Wno-unused-parameter \ -DPACKAGE_VERSION=\"$(MESA_VERSION)\" \ -DPACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\" \ -DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION) @@ -60,6 +56,10 @@ LOCAL_CFLAGS += \ -fvisibility=hidden \ -Wno-sign-compare +# mesa requires at least c99 compiler +LOCAL_CONLYFLAGS += \ + -std=c99 + ifeq ($(strip $(MESA_ENABLE_ASM)),true) ifeq ($(TARGET_ARCH),x86) LOCAL_CFLAGS += \ @@ -1 +1 @@ -11.2.0-devel +11.3.0-devel diff --git a/configure.ac b/configure.ac index b4e2539e66f..5f686f5602a 100644 --- a/configure.ac +++ b/configure.ac @@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63 LIBDRM_INTEL_REQUIRED=2.4.61 LIBDRM_NVVIEUX_REQUIRED=2.4.66 LIBDRM_NOUVEAU_REQUIRED=2.4.66 -LIBDRM_FREEDRENO_REQUIRED=2.4.65 +LIBDRM_FREEDRENO_REQUIRED=2.4.67 DRI2PROTO_REQUIRED=2.6 DRI3PROTO_REQUIRED=1.0 PRESENTPROTO_REQUIRED=1.0 @@ -2297,6 +2297,9 @@ dnl in LLVM_LIBS. if test "x$MESA_LLVM" != x0; then + if ! $LLVM_CONFIG --libs ${LLVM_COMPONENTS} >/dev/null; then + AC_MSG_ERROR([Calling ${LLVM_CONFIG} failed]) + fi LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`" dnl llvm-config may not give the right answer when llvm is a built as a diff --git a/docs/GL3.txt b/docs/GL3.txt index e2aa52cc388..8c043e9b5ff 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -215,10 +215,10 @@ GLES3.1, GLSL ES 3.1 GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) GL_ARB_framebuffer_no_attachments DONE (i965) GL_ARB_program_interface_query DONE (all drivers) - GL_ARB_shader_atomic_counters DONE (i965) + GL_ARB_shader_atomic_counters DONE (i965, nvc0) GL_ARB_shader_image_load_store DONE (i965) GL_ARB_shader_image_size DONE (i965) - GL_ARB_shader_storage_buffer_object DONE (i965) + GL_ARB_shader_storage_buffer_object DONE (i965, nvc0) GL_ARB_shading_language_packing DONE (all drivers) GL_ARB_separate_shader_objects DONE (all drivers) GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) @@ -249,14 +249,14 @@ GLES3.2, GLSL ES 3.2 GL_OES_primitive_bounding box not started GL_OES_sample_shading not started (based on parts of GL_ARB_sample_shading, which is done for some drivers) GL_OES_sample_variables not started (based on parts of GL_ARB_sample_shading, which is done for some drivers) - GL_OES_shader_image_atomic not started (based on parts of GL_ARB_shader_image_load_store, which is done for some drivers) + GL_OES_shader_image_atomic DONE (all drivers that support GL_ARB_shader_image_load_store) GL_OES_shader_io_blocks not started (based on parts of GLSL 1.50, which is done) GL_OES_shader_multisample_interpolation not started (based on parts of GL_ARB_gpu_shader5, which is done) GL_OES_tessellation_shader not started (based on GL_ARB_tessellation_shader, which is done for some drivers) - GL_OES_texture_border_clamp not started (based on GL_ARB_texture_border_clamp, which is done) + GL_OES_texture_border_clamp DONE (all drivers) GL_OES_texture_buffer not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done) GL_OES_texture_cube_map_array not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers) - GL_OES_texture_stencil8 not started (based on GL_ARB_texture_stencil8, which is done for some drivers) + GL_OES_texture_stencil8 DONE (all drivers that support GL_ARB_texture_stencil8) GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample) More info about these features and the work involved can be found at diff --git a/docs/envvars.html b/docs/envvars.html index ba83335d0b0..06aa0ac9369 100644 --- a/docs/envvars.html +++ b/docs/envvars.html @@ -91,6 +91,14 @@ This is only valid for versions >= 3.0. <li> Mesa may not really implement all the features of the given version. (for developers only) </ul> +<li>MESA_GLES_VERSION_OVERRIDE - changes the value returned by +glGetString(GL_VERSION) for OpenGL ES. +<ul> +<li> The format should be MAJOR.MINOR +<li> Examples: 2.0, 3.0, 3.1 +<li> Mesa may not really implement all the features of the given version. +(for developers only) +</ul> <li>MESA_GLSL_VERSION_OVERRIDE - changes the value returned by glGetString(GL_SHADING_LANGUAGE_VERSION). Valid values are integers, such as "130". Mesa will not really implement all the features of the given language version @@ -224,7 +232,7 @@ See src/mesa/state_tracker/st_debug.c for other options. <li>LP_PERF - a comma-separated list of options to selectively no-op various parts of the driver. See the source code for details. <li>LP_NUM_THREADS - an integer indicating how many threads to use for rendering. - Zero turns of threading completely. The default value is the number of CPU + Zero turns off threading completely. The default value is the number of CPU cores present. </ul> @@ -245,6 +253,25 @@ for details. </ul> +<h3>VC4 driver environment variables</h3> +<ul> +<li>VC4_DEBUG - a comma-separated list of named flags, which do various things: +<ul> + <li>cl - dump command list during creation</li> + <li>qpu - dump generated QPU instructions</li> + <li>qir - dump QPU IR during program compile</li> + <li>nir - dump NIR during program compile</li> + <li>tgsi - dump TGSI during program compile</li> + <li>shaderdb - dump program compile information for shader-db analysis</li> + <li>perf - print during performance-related events</li> + <li>norast - skip actual hardware execution of commands</li> + <li>always_flush - flush after each draw call</li> + <li>always_sync - wait for finish after each flush</li> + <li>dump - write a GPU command stream trace file (VC4 simulator only)</li> +</ul> +</ul> + + <p> Other Gallium drivers have their own environment variables. These may change frequently so the source code should be consulted for details. diff --git a/docs/install.html b/docs/install.html index c826d642001..ae911d5347b 100644 --- a/docs/install.html +++ b/docs/install.html @@ -58,6 +58,9 @@ On Windows with MinGW, install flex and bison with: For MSVC on Windows, install <a href="http://winflexbison.sourceforge.net/">Win flex-bison</a>. </li> +<br> +<li>For building on Windows, Microsoft Visual Studio 2013 or later is required. +</li> </ul> diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html index 069eca2b70c..baedb4ca756 100644 --- a/docs/relnotes/11.2.0.html +++ b/docs/relnotes/11.2.0.html @@ -74,6 +74,10 @@ TBD. <h2>Changes</h2> +Microsoft Visual Studio 2013 or later is now required for building +on Windows. +Previously, Visual Studio 2008 and later were supported. + TBD. </div> diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html new file mode 100644 index 00000000000..fa650830e23 --- /dev/null +++ b/docs/relnotes/11.3.0.html @@ -0,0 +1,61 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 11.3.0 Release Notes / TBD</h1> + +<p> +Mesa 11.3.0 is a new development release. +People who are concerned with stability and reliability should stick +with a previous release or wait for Mesa 11.3.1. +</p> +<p> +Mesa 11.3.0 implements the OpenGL 4.1 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.1. OpenGL +4.1 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +TBD. +</pre> + + +<h2>New features</h2> + +<p> +Note: some of the new features are only available with certain drivers. +</p> + +<ul> +<li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li> +<li>GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store</li> +</ul> + +<h2>Bug fixes</h2> + +TBD. + +<h2>Changes</h2> + +TBD. + +</div> +</body> +</html> diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h index 5139e279bcc..bdfbefe0b75 100644 --- a/include/pci_ids/i965_pci_ids.h +++ b/include/pci_ids/i965_pci_ids.h @@ -112,6 +112,7 @@ CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3") CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)") CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)") CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1") +CHIPSET(0x190B, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)") CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1") CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)") CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f") @@ -122,16 +123,17 @@ CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2") CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)") CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)") CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)") -CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2") -CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)") -CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)") +CHIPSET(0x1921, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)") +CHIPSET(0x1923, skl_gt3, "Intel(R) Skylake GT3e") +CHIPSET(0x1926, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)") CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)") CHIPSET(0x192A, skl_gt4, "Intel(R) Skylake GT4") -CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)") -CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4") -CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4") -CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4") -CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4") +CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics 555 (Skylake GT3e)") +CHIPSET(0x192D, skl_gt3, "Intel(R) Iris Graphics P555 (Skylake GT3e)") +CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)") +CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)") +CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)") +CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)") CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1") CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1") CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1") diff --git a/src/compiler/builtin_type_macros.h b/src/compiler/builtin_type_macros.h index 7bd2e4e6558..da3f19e7ab2 100644 --- a/src/compiler/builtin_type_macros.h +++ b/src/compiler/builtin_type_macros.h @@ -78,6 +78,7 @@ DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3) DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4) DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4) +DECL_TYPE(sampler, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_VOID) DECL_TYPE(sampler1D, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_FLOAT) DECL_TYPE(sampler2D, GL_SAMPLER_2D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_FLOAT) DECL_TYPE(sampler3D, GL_SAMPLER_3D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT) diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h index 03df6c08b2b..9aa5bb99f49 100644 --- a/src/compiler/glsl/ast.h +++ b/src/compiler/glsl/ast.h @@ -685,18 +685,6 @@ struct ast_type_qualifier { */ bool has_auxiliary_storage() const; - /** - * \brief Return string representation of interpolation qualifier. - * - * If an interpolation qualifier is present, then return that qualifier's - * string representation. Otherwise, return null. For example, if the - * noperspective bit is set, then this returns "noperspective". - * - * If multiple interpolation qualifiers are somehow present, then the - * returned string is undefined but not null. - */ - const char *interpolation_string() const; - bool merge_qualifier(YYLTYPE *loc, _mesa_glsl_parse_state *state, const ast_type_qualifier &q, diff --git a/src/compiler/glsl/ast_function.cpp b/src/compiler/glsl/ast_function.cpp index c7fdcb24379..1a440203cfc 100644 --- a/src/compiler/glsl/ast_function.cpp +++ b/src/compiler/glsl/ast_function.cpp @@ -1405,9 +1405,9 @@ emit_inline_matrix_constructor(const glsl_type *type, zero.d[i] = 0.0; ir_instruction *inst = - new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var), - new(ctx) ir_constant(rhs_var->type, &zero), - NULL); + new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var), + new(ctx) ir_constant(rhs_var->type, &zero), + NULL); instructions->push_tail(inst); ir_dereference *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); @@ -1422,36 +1422,36 @@ emit_inline_matrix_constructor(const glsl_type *type, * columns than rows). */ static const unsigned rhs_swiz[4][4] = { - { 0, 1, 1, 1 }, - { 1, 0, 1, 1 }, - { 1, 1, 0, 1 }, - { 1, 1, 1, 0 } + { 0, 1, 1, 1 }, + { 1, 0, 1, 1 }, + { 1, 1, 0, 1 }, + { 1, 1, 1, 0 } }; const unsigned cols_to_init = MIN2(type->matrix_columns, - type->vector_elements); + type->vector_elements); for (unsigned i = 0; i < cols_to_init; i++) { - ir_constant *const col_idx = new(ctx) ir_constant(i); - ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx); + ir_constant *const col_idx = new(ctx) ir_constant(i); + ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx); - ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); - ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i], - type->vector_elements); + ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); + ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i], + type->vector_elements); - inst = new(ctx) ir_assignment(col_ref, rhs, NULL); - instructions->push_tail(inst); + inst = new(ctx) ir_assignment(col_ref, rhs, NULL); + instructions->push_tail(inst); } for (unsigned i = cols_to_init; i < type->matrix_columns; i++) { - ir_constant *const col_idx = new(ctx) ir_constant(i); - ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx); + ir_constant *const col_idx = new(ctx) ir_constant(i); + ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx); - ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); - ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1, - type->vector_elements); + ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); + ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1, + type->vector_elements); - inst = new(ctx) ir_assignment(col_ref, rhs, NULL); - instructions->push_tail(inst); + inst = new(ctx) ir_assignment(col_ref, rhs, NULL); + instructions->push_tail(inst); } } else if (first_param->type->is_matrix()) { /* From page 50 (56 of the PDF) of the GLSL 1.50 spec: @@ -1469,36 +1469,43 @@ emit_inline_matrix_constructor(const glsl_type *type, /* If the source matrix is smaller, pre-initialize the relavent parts of * the destination matrix to the identity matrix. */ - if ((src_matrix->type->matrix_columns < var->type->matrix_columns) - || (src_matrix->type->vector_elements < var->type->vector_elements)) { - - /* If the source matrix has fewer rows, every column of the destination - * must be initialized. Otherwise only the columns in the destination - * that do not exist in the source must be initialized. - */ - unsigned col = - (src_matrix->type->vector_elements < var->type->vector_elements) - ? 0 : src_matrix->type->matrix_columns; - - const glsl_type *const col_type = var->type->column_type(); - for (/* empty */; col < var->type->matrix_columns; col++) { - ir_constant_data ident; + if ((src_matrix->type->matrix_columns < var->type->matrix_columns) || + (src_matrix->type->vector_elements < var->type->vector_elements)) { - ident.f[0] = 0.0; - ident.f[1] = 0.0; - ident.f[2] = 0.0; - ident.f[3] = 0.0; - - ident.f[col] = 1.0; + /* If the source matrix has fewer rows, every column of the destination + * must be initialized. Otherwise only the columns in the destination + * that do not exist in the source must be initialized. + */ + unsigned col = + (src_matrix->type->vector_elements < var->type->vector_elements) + ? 0 : src_matrix->type->matrix_columns; + + const glsl_type *const col_type = var->type->column_type(); + for (/* empty */; col < var->type->matrix_columns; col++) { + ir_constant_data ident; + + if (!col_type->is_double()) { + ident.f[0] = 0.0f; + ident.f[1] = 0.0f; + ident.f[2] = 0.0f; + ident.f[3] = 0.0f; + ident.f[col] = 1.0f; + } else { + ident.d[0] = 0.0; + ident.d[1] = 0.0; + ident.d[2] = 0.0; + ident.d[3] = 0.0; + ident.d[col] = 1.0; + } - ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident); + ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident); - ir_rvalue *const lhs = - new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col)); + ir_rvalue *const lhs = + new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col)); - ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL); - instructions->push_tail(inst); - } + ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL); + instructions->push_tail(inst); + } } /* Assign columns from the source matrix to the destination matrix. @@ -1507,51 +1514,51 @@ emit_inline_matrix_constructor(const glsl_type *type, * generate a temporary and copy the paramter there. */ ir_variable *const rhs_var = - new(ctx) ir_variable(first_param->type, "mat_ctor_mat", - ir_var_temporary); + new(ctx) ir_variable(first_param->type, "mat_ctor_mat", + ir_var_temporary); instructions->push_tail(rhs_var); ir_dereference *const rhs_var_ref = - new(ctx) ir_dereference_variable(rhs_var); + new(ctx) ir_dereference_variable(rhs_var); ir_instruction *const inst = - new(ctx) ir_assignment(rhs_var_ref, first_param, NULL); + new(ctx) ir_assignment(rhs_var_ref, first_param, NULL); instructions->push_tail(inst); const unsigned last_row = MIN2(src_matrix->type->vector_elements, - var->type->vector_elements); + var->type->vector_elements); const unsigned last_col = MIN2(src_matrix->type->matrix_columns, - var->type->matrix_columns); + var->type->matrix_columns); unsigned swiz[4] = { 0, 0, 0, 0 }; for (unsigned i = 1; i < last_row; i++) - swiz[i] = i; + swiz[i] = i; - const unsigned write_mask = (1U << last_row) - 1; + const unsigned write_mask = (1U << last_row) - 1; for (unsigned i = 0; i < last_col; i++) { - ir_dereference *const lhs = - new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i)); - ir_rvalue *const rhs_col = - new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i)); - - /* If one matrix has columns that are smaller than the columns of the - * other matrix, wrap the column access of the larger with a swizzle - * so that the LHS and RHS of the assignment have the same size (and - * therefore have the same type). - * - * It would be perfectly valid to unconditionally generate the - * swizzles, this this will typically result in a more compact IR tree. - */ - ir_rvalue *rhs; - if (lhs->type->vector_elements != rhs_col->type->vector_elements) { - rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row); - } else { - rhs = rhs_col; - } + ir_dereference *const lhs = + new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i)); + ir_rvalue *const rhs_col = + new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i)); + + /* If one matrix has columns that are smaller than the columns of the + * other matrix, wrap the column access of the larger with a swizzle + * so that the LHS and RHS of the assignment have the same size (and + * therefore have the same type). + * + * It would be perfectly valid to unconditionally generate the + * swizzles, this this will typically result in a more compact IR tree. + */ + ir_rvalue *rhs; + if (lhs->type->vector_elements != rhs_col->type->vector_elements) { + rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row); + } else { + rhs = rhs_col; + } - ir_instruction *inst = - new(ctx) ir_assignment(lhs, rhs, NULL, write_mask); - instructions->push_tail(inst); + ir_instruction *inst = + new(ctx) ir_assignment(lhs, rhs, NULL, write_mask); + instructions->push_tail(inst); } } else { const unsigned cols = type->matrix_columns; diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 9e811661a2e..75abef6a8f9 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -1133,9 +1133,9 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_FUNCTION: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_FUNCTION: /* I assume a comparison of a struct containing a sampler just * ignores the sampler present in the type. */ @@ -2268,7 +2268,7 @@ get_type_name_for_precision_qualifier(const glsl_type *type) type->sampler_array + 2 * type->sampler_shadow; const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4; assert(type_idx < 4); - switch (type->sampler_type) { + switch (type->sampled_type) { case GLSL_TYPE_FLOAT: switch (type->sampler_dimensionality) { case GLSL_SAMPLER_DIM_1D: { @@ -2750,6 +2750,17 @@ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual, "vertex shader inputs or fragment shader outputs", interpolation_string(interpolation)); } + } else if (state->es_shader && + ((mode == ir_var_shader_in && + state->stage != MESA_SHADER_VERTEX) || + (mode == ir_var_shader_out && + state->stage != MESA_SHADER_FRAGMENT))) { + /* Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says: + * + * "When no interpolation qualifier is present, smooth interpolation + * is used." + */ + interpolation = INTERP_QUALIFIER_SMOOTH; } return interpolation; @@ -2954,7 +2965,7 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual, "used on image function parameters"); } - if (qual->image_base_type != base_type->sampler_type) { + if (qual->image_base_type != base_type->sampled_type) { _mesa_glsl_error(loc, state, "format qualifier doesn't match the " "base data type of the image"); } @@ -4679,8 +4690,7 @@ ast_declarator_list::hir(exec_list *instructions, && this->type->qualifier.has_interpolation() && this->type->qualifier.flags.q.varying) { - const char *i = this->type->qualifier.interpolation_string(); - assert(i != NULL); + const char *i = interpolation_string(var->data.interpolation); const char *s; if (this->type->qualifier.flags.q.centroid) s = "centroid varying"; @@ -4710,9 +4720,7 @@ ast_declarator_list::hir(exec_list *instructions, if (state->is_version(130, 300) && this->type->qualifier.has_interpolation()) { - const char *i = this->type->qualifier.interpolation_string(); - assert(i != NULL); - + const char *i = interpolation_string(var->data.interpolation); switch (state->stage) { case MESA_SHADER_VERTEX: if (this->type->qualifier.flags.q.in) { @@ -6259,7 +6267,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions, decl_count); bool first_member = true; - bool first_member_has_explicit_location; + bool first_member_has_explicit_location = false; unsigned i = 0; foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { diff --git a/src/compiler/glsl/ast_type.cpp b/src/compiler/glsl/ast_type.cpp index e0e331152dd..dcd83efa6ff 100644 --- a/src/compiler/glsl/ast_type.cpp +++ b/src/compiler/glsl/ast_type.cpp @@ -102,19 +102,6 @@ ast_type_qualifier::has_auxiliary_storage() const || this->flags.q.patch; } -const char* -ast_type_qualifier::interpolation_string() const -{ - if (this->flags.q.smooth) - return "smooth"; - else if (this->flags.q.flat) - return "flat"; - else if (this->flags.q.noperspective) - return "noperspective"; - else - return NULL; -} - /** * This function merges both duplicate identifies within a single layout and * multiple layout qualifiers on a single variable declaration. The diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 5512a33f114..bbb237a102c 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -448,8 +448,16 @@ shader_image_load_store(const _mesa_glsl_parse_state *state) static bool shader_image_atomic(const _mesa_glsl_parse_state *state) { - return (state->is_version(420, 0) || - state->ARB_shader_image_load_store_enable); + return (state->is_version(420, 320) || + state->ARB_shader_image_load_store_enable || + state->OES_shader_image_atomic_enable); +} + +static bool +shader_image_atomic_exchange_float(const _mesa_glsl_parse_state *state) +{ + return (state->is_version(450, 320) || + state->OES_shader_image_atomic_enable); } static bool @@ -577,17 +585,6 @@ private: unsigned num_arguments, unsigned flags); - enum image_function_flags { - IMAGE_FUNCTION_EMIT_STUB = (1 << 0), - IMAGE_FUNCTION_RETURNS_VOID = (1 << 1), - IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2), - IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3), - IMAGE_FUNCTION_READ_ONLY = (1 << 4), - IMAGE_FUNCTION_WRITE_ONLY = (1 << 5), - IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6), - IMAGE_FUNCTION_MS_ONLY = (1 << 7), - }; - /** * Create a new image built-in function for all known image types. * \p flags is a bitfield of \c image_function_flags flags. @@ -836,6 +833,18 @@ private: /** @} */ }; +enum image_function_flags { + IMAGE_FUNCTION_EMIT_STUB = (1 << 0), + IMAGE_FUNCTION_RETURNS_VOID = (1 << 1), + IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2), + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3), + IMAGE_FUNCTION_READ_ONLY = (1 << 4), + IMAGE_FUNCTION_WRITE_ONLY = (1 << 5), + IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6), + IMAGE_FUNCTION_MS_ONLY = (1 << 7), + IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE = (1 << 8) +}; + } /* anonymous namespace */ /** @@ -2921,7 +2930,7 @@ builtin_builder::add_image_function(const char *name, ir_function *f = new(mem_ctx) ir_function(name); for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) { - if ((types[i]->sampler_type != GLSL_TYPE_FLOAT || + if ((types[i]->sampled_type != GLSL_TYPE_FLOAT || (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) && (types[i]->sampler_dimensionality == GLSL_SAMPLER_DIM_MS || !(flags & IMAGE_FUNCTION_MS_ONLY))) @@ -2981,7 +2990,9 @@ builtin_builder::add_image_functions(bool glsl) add_image_function((glsl ? "imageAtomicExchange" : "__intrinsic_image_atomic_exchange"), "__intrinsic_image_atomic_exchange", - &builtin_builder::_image_prototype, 1, atom_flags); + &builtin_builder::_image_prototype, 1, + (flags | IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE | + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)); add_image_function((glsl ? "imageAtomicCompSwap" : "__intrinsic_image_atomic_comp_swap"), @@ -5232,13 +5243,28 @@ builtin_builder::_mid3(const glsl_type *type) return sig; } +static builtin_available_predicate +get_image_available_predicate(const glsl_type *type, unsigned flags) +{ + if ((flags & IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE) && + type->sampled_type == GLSL_TYPE_FLOAT) + return shader_image_atomic_exchange_float; + + else if (flags & (IMAGE_FUNCTION_AVAIL_ATOMIC_EXCHANGE | + IMAGE_FUNCTION_AVAIL_ATOMIC)) + return shader_image_atomic; + + else + return shader_image_load_store; +} + ir_function_signature * builtin_builder::_image_prototype(const glsl_type *image_type, unsigned num_arguments, unsigned flags) { const glsl_type *data_type = glsl_type::get_instance( - image_type->sampler_type, + image_type->sampled_type, (flags & IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE ? 4 : 1), 1); const glsl_type *ret_type = (flags & IMAGE_FUNCTION_RETURNS_VOID ? @@ -5249,10 +5275,9 @@ builtin_builder::_image_prototype(const glsl_type *image_type, ir_variable *coord = in_var( glsl_type::ivec(image_type->coordinate_components()), "coord"); - const builtin_available_predicate avail = - (flags & IMAGE_FUNCTION_AVAIL_ATOMIC ? shader_image_atomic : - shader_image_load_store); - ir_function_signature *sig = new_sig(ret_type, avail, 2, image, coord); + ir_function_signature *sig = new_sig( + ret_type, get_image_available_predicate(image_type, flags), + 2, image, coord); /* Sample index for multisample images. */ if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp index 6db74f1c634..d20fc4a816c 100644 --- a/src/compiler/glsl/builtin_variables.cpp +++ b/src/compiler/glsl/builtin_variables.cpp @@ -770,11 +770,16 @@ builtin_variable_generator::generate_constants() } if (state->is_version(430, 310) || state->ARB_compute_shader_enable) { - add_const("gl_MaxComputeAtomicCounterBuffers", MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS); - add_const("gl_MaxComputeAtomicCounters", MAX_COMPUTE_ATOMIC_COUNTERS); - add_const("gl_MaxComputeImageUniforms", MAX_COMPUTE_IMAGE_UNIFORMS); - add_const("gl_MaxComputeTextureImageUnits", MAX_COMPUTE_TEXTURE_IMAGE_UNITS); - add_const("gl_MaxComputeUniformComponents", MAX_COMPUTE_UNIFORM_COMPONENTS); + add_const("gl_MaxComputeAtomicCounterBuffers", + state->Const.MaxComputeAtomicCounterBuffers); + add_const("gl_MaxComputeAtomicCounters", + state->Const.MaxComputeAtomicCounters); + add_const("gl_MaxComputeImageUniforms", + state->Const.MaxComputeImageUniforms); + add_const("gl_MaxComputeTextureImageUnits", + state->Const.MaxComputeTextureImageUnits); + add_const("gl_MaxComputeUniformComponents", + state->Const.MaxComputeUniformComponents); add_const_ivec3("gl_MaxComputeWorkGroupCount", state->Const.MaxComputeWorkGroupCount[0], diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 43a1aa94aff..b03e1910758 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -2096,6 +2096,9 @@ _check_for_reserved_macro_name (glcpp_parser_t *parser, YYLTYPE *loc, if (strncmp(identifier, "GL_", 3) == 0) { glcpp_error (loc, parser, "Macro names starting with \"GL_\" are reserved.\n"); } + if (strcmp(identifier, "defined") == 0) { + glcpp_error (loc, parser, "\"defined\" cannot be used as a macro name"); + } } static int @@ -2388,6 +2391,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "GL_EXT_blend_func_extended", 1); if (version >= 310) { + if (extensions->ARB_shader_image_load_store) + add_builtin_define(parser, "GL_OES_shader_image_atomic", 1); + if (extensions->OES_geometry_shader) { add_builtin_define(parser, "GL_OES_geometry_point_size", 1); add_builtin_define(parser, "GL_OES_geometry_shader", 1); diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll index e59f93e10ef..9704fc7ac4f 100644 --- a/src/compiler/glsl/glsl_lexer.ll +++ b/src/compiler/glsl/glsl_lexer.ll @@ -113,11 +113,7 @@ literal_integer(char *text, int len, struct _mesa_glsl_parse_state *state, if (base == 16) digits += 2; -#ifdef _MSC_VER - unsigned __int64 value = _strtoui64(digits, NULL, base); -#else unsigned long long value = strtoull(digits, NULL, base); -#endif lval->n = (int)value; diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 86cf091b4fe..8ccbefc3f71 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -120,6 +120,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters; this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters; this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters; + this->Const.MaxComputeAtomicCounters = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters; this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters; this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings; this->Const.MaxVertexAtomicCounterBuffers = @@ -132,6 +133,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers; this->Const.MaxFragmentAtomicCounterBuffers = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + this->Const.MaxComputeAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers; this->Const.MaxCombinedAtomicCounterBuffers = ctx->Const.MaxCombinedAtomicBuffers; this->Const.MaxAtomicCounterBufferSize = @@ -143,6 +146,9 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupSize); i++) this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i]; + this->Const.MaxComputeTextureImageUnits = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; + this->Const.MaxComputeUniformComponents = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents; + this->Const.MaxImageUnits = ctx->Const.MaxImageUnits; this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources; this->Const.MaxImageSamples = ctx->Const.MaxImageSamples; @@ -151,6 +157,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms; this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms; this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms; + this->Const.MaxComputeImageUniforms = ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms; this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms; /* ARB_viewport_array */ @@ -601,6 +608,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(OES_EGL_image_external, false, true, OES_EGL_image_external), EXT(OES_geometry_point_size, false, true, OES_geometry_shader), EXT(OES_geometry_shader, false, true, OES_geometry_shader), + EXT(OES_shader_image_atomic, false, true, ARB_shader_image_load_store), EXT(OES_standard_derivatives, false, true, OES_standard_derivatives), EXT(OES_texture_3D, false, true, dummy_true), EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample), @@ -946,27 +954,11 @@ _mesa_ast_process_interface_block(YYLTYPE *locp, "the interface block"); } - /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks": - * - * "GLSL ES 3.0 does not support interface blocks for shader inputs or - * outputs." - * - * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":. - * - * "Only variables output from a shader can be candidates for - * invariance." - * - * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks": - * - * "If optional qualifiers are used, they can include interpolation - * qualifiers, auxiliary storage qualifiers, and storage qualifiers - * and they must declare an input, output, or uniform member - * consistent with the interface qualifier of the block" - */ - if (qualifier.flags.q.invariant) + if (!(q.flags.q.in || q.flags.q.out) && qualifier.flags.q.invariant) _mesa_glsl_error(locp, state, - "invariant qualifiers cannot be used " - "with interface blocks members"); + "invariant qualifiers can be used only " + "in interface block members for shader " + "inputs or outputs"); } } diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h index 4dacc2ac62b..86ec057f288 100644 --- a/src/compiler/glsl/glsl_parser_extras.h +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -422,6 +422,11 @@ struct _mesa_glsl_parse_state { unsigned MaxAtomicCounterBufferSize; /* ARB_compute_shader */ + unsigned MaxComputeAtomicCounterBuffers; + unsigned MaxComputeAtomicCounters; + unsigned MaxComputeImageUniforms; + unsigned MaxComputeTextureImageUnits; + unsigned MaxComputeUniformComponents; unsigned MaxComputeWorkGroupCount[3]; unsigned MaxComputeWorkGroupSize[3]; @@ -588,6 +593,8 @@ struct _mesa_glsl_parse_state { bool OES_geometry_point_size_warn; bool OES_geometry_shader_enable; bool OES_geometry_shader_warn; + bool OES_shader_image_atomic_enable; + bool OES_shader_image_atomic_warn; bool OES_standard_derivatives_enable; bool OES_standard_derivatives_warn; bool OES_texture_3D_enable; diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp index 5debca32411..750f61744e7 100644 --- a/src/compiler/glsl/ir.cpp +++ b/src/compiler/glsl/ir.cpp @@ -1442,7 +1442,7 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type) assert(sampler->type->base_type == GLSL_TYPE_SAMPLER); assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS); } else { - assert(sampler->type->sampler_type == (int) type->base_type); + assert(sampler->type->sampled_type == (int) type->base_type); if (sampler->type->sampler_shadow) assert(type->vector_elements == 4 || type->vector_elements == 1); else @@ -1696,21 +1696,6 @@ interpolation_string(unsigned interpolation) return ""; } - -glsl_interp_qualifier -ir_variable::determine_interpolation_mode(bool flat_shade) -{ - if (this->data.interpolation != INTERP_QUALIFIER_NONE) - return (glsl_interp_qualifier) this->data.interpolation; - int location = this->data.location; - bool is_gl_Color = - location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1; - if (flat_shade && is_gl_Color) - return INTERP_QUALIFIER_FLAT; - else - return INTERP_QUALIFIER_SMOOTH; -} - const char *const ir_variable::warn_extension_table[] = { "", "GL_ARB_shader_stencil_export", diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index bf9b7caffae..93c893d36fe 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -432,17 +432,6 @@ public: /** - * Determine how this variable should be interpolated based on its - * interpolation qualifier (if present), whether it is gl_Color or - * gl_SecondaryColor, and whether flatshading is enabled in the current GL - * state. - * - * The return value will always be either INTERP_QUALIFIER_SMOOTH, - * INTERP_QUALIFIER_NOPERSPECTIVE, or INTERP_QUALIFIER_FLAT. - */ - glsl_interp_qualifier determine_interpolation_mode(bool flat_shade); - - /** * Determine whether or not a variable is part of a uniform or * shader storage block. */ diff --git a/src/compiler/glsl/ir_clone.cpp b/src/compiler/glsl/ir_clone.cpp index b32ec17f1af..43ffffb0a38 100644 --- a/src/compiler/glsl/ir_clone.cpp +++ b/src/compiler/glsl/ir_clone.cpp @@ -366,7 +366,6 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const return c; } - case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: @@ -374,6 +373,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const case GLSL_TYPE_ERROR: case GLSL_TYPE_SUBROUTINE: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: assert(!"Should not get here."); break; } diff --git a/src/compiler/glsl/link_uniform_initializers.cpp b/src/compiler/glsl/link_uniform_initializers.cpp index cdc1d3ac7be..3609f81771e 100644 --- a/src/compiler/glsl/link_uniform_initializers.cpp +++ b/src/compiler/glsl/link_uniform_initializers.cpp @@ -88,9 +88,9 @@ copy_constant_to_storage(union gl_constant_value *storage, case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_FUNCTION: case GLSL_TYPE_VOID: case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_ERROR: /* All other types should have already been filtered by other * paths in the caller. diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp index 7072c16cb28..deaba94df1c 100644 --- a/src/compiler/glsl/link_uniforms.cpp +++ b/src/compiler/glsl/link_uniforms.cpp @@ -649,15 +649,15 @@ private: current_var->data.image_write_only ? GL_WRITE_ONLY : GL_READ_WRITE); - for (unsigned j = 0; j < MAX2(1, uniform->array_elements); ++j) - prog->_LinkedShaders[shader_type]-> - ImageAccess[this->next_image + j] = access; + const unsigned first = this->next_image; /* Increment the image index by 1 for non-arrays and by the * number of array elements for arrays. */ this->next_image += MAX2(1, uniform->array_elements); + for (unsigned i = first; i < MIN2(next_image, MAX_IMAGE_UNIFORMS); i++) + prog->_LinkedShaders[shader_type]->ImageAccess[i] = access; } } @@ -1038,9 +1038,43 @@ assign_hidden_uniform_slot_id(const char *name, unsigned hidden_id, uniform_size->map->put(hidden_uniform_start + hidden_id, name); } +/** + * Search through the list of empty blocks to find one that fits the current + * uniform. + */ +static int +find_empty_block(struct gl_shader_program *prog, + struct gl_uniform_storage *uniform) +{ + const unsigned entries = MAX2(1, uniform->array_elements); + + foreach_list_typed(struct empty_uniform_block, block, link, + &prog->EmptyUniformLocations) { + /* Found a block with enough slots to fit the uniform */ + if (block->slots == entries) { + unsigned start = block->start; + exec_node_remove(&block->link); + ralloc_free(block); + + return start; + /* Found a block with more slots than needed. It can still be used. */ + } else if (block->slots > entries) { + unsigned start = block->start; + block->start += entries; + block->slots -= entries; + + return start; + } + } + + return -1; +} + void link_assign_uniform_locations(struct gl_shader_program *prog, - unsigned int boolean_true) + unsigned int boolean_true, + unsigned int num_explicit_uniform_locs, + unsigned int max_uniform_locs) { ralloc_free(prog->UniformStorage); prog->UniformStorage = NULL; @@ -1131,6 +1165,9 @@ link_assign_uniform_locations(struct gl_shader_program *prog, parcel_out_uniform_storage parcel(prog, prog->UniformHash, uniforms, data); + unsigned total_entries = num_explicit_uniform_locs; + unsigned empty_locs = prog->NumUniformRemapTable - num_explicit_uniform_locs; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i] == NULL) continue; @@ -1194,21 +1231,44 @@ link_assign_uniform_locations(struct gl_shader_program *prog, /* how many new entries for this uniform? */ const unsigned entries = MAX2(1, uniforms[i].array_elements); - /* resize remap table to fit new entries */ - prog->UniformRemapTable = - reralloc(prog, - prog->UniformRemapTable, - gl_uniform_storage *, - prog->NumUniformRemapTable + entries); + /* Find UniformRemapTable for empty blocks where we can fit this uniform. */ + int chosen_location = -1; + + if (empty_locs) + chosen_location = find_empty_block(prog, &uniforms[i]); + + /* Add new entries to the total amount of entries. */ + total_entries += entries; + + if (chosen_location != -1) { + empty_locs -= entries; + } else { + chosen_location = prog->NumUniformRemapTable; + + /* resize remap table to fit new entries */ + prog->UniformRemapTable = + reralloc(prog, + prog->UniformRemapTable, + gl_uniform_storage *, + prog->NumUniformRemapTable + entries); + prog->NumUniformRemapTable += entries; + } /* set pointers for this uniform */ for (unsigned j = 0; j < entries; j++) - prog->UniformRemapTable[prog->NumUniformRemapTable+j] = &uniforms[i]; + prog->UniformRemapTable[chosen_location + j] = &uniforms[i]; /* set the base location in remap table for the uniform */ - uniforms[i].remap_location = prog->NumUniformRemapTable; + uniforms[i].remap_location = chosen_location; + } + + /* Verify that total amount of entries for explicit and implicit locations + * is less than MAX_UNIFORM_LOCATIONS. + */ - prog->NumUniformRemapTable += entries; + if (total_entries > max_uniform_locs) { + linker_error(prog, "count of uniform locations > MAX_UNIFORM_LOCATIONS" + "(%u > %u)", total_entries, max_uniform_locs); } /* Reserve all the explicit locations of the active subroutine uniforms. */ diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 590de174507..05cc1a2b7f8 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -1739,22 +1739,7 @@ assign_varying_locations(struct gl_context *ctx, if (var && var->data.mode == ir_var_shader_in && var->data.is_unmatched_generic_inout) { - if (prog->IsES) { - /* - * On Page 91 (Page 97 of the PDF) of the GLSL ES 1.0 spec: - * - * If the vertex shader declares but doesn't write to a - * varying and the fragment shader declares and reads it, - * is this an error? - * - * RESOLUTION: No. - */ - linker_warning(prog, "%s shader varying %s not written " - "by %s shader\n.", - _mesa_shader_stage_to_string(consumer->Stage), - var->name, - _mesa_shader_stage_to_string(producer->Stage)); - } else if (prog->Version <= 120) { + if (!prog->IsES && prog->Version <= 120) { /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: * * Only those varying variables used (i.e. read) in @@ -1772,6 +1757,12 @@ assign_varying_locations(struct gl_context *ctx, _mesa_shader_stage_to_string(consumer->Stage), var->name, _mesa_shader_stage_to_string(producer->Stage)); + } else { + linker_warning(prog, "%s shader varying %s not written " + "by %s shader\n.", + _mesa_shader_stage_to_string(consumer->Stage), + var->name, + _mesa_shader_stage_to_string(producer->Stage)); } } } diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index bad1c1742b7..5326bfd4d68 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -3008,12 +3008,13 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog) * for a variable, checks for overlaps between other uniforms using explicit * locations. */ -static bool +static int reserve_explicit_locations(struct gl_shader_program *prog, string_to_uint_map *map, ir_variable *var) { unsigned slots = var->type->uniform_locations(); unsigned max_loc = var->data.location + slots - 1; + unsigned return_value = slots; /* Resize remap table if locations do not fit in the current one. */ if (max_loc + 1 > prog->NumUniformRemapTable) { @@ -3024,7 +3025,7 @@ reserve_explicit_locations(struct gl_shader_program *prog, if (!prog->UniformRemapTable) { linker_error(prog, "Out of memory during linking.\n"); - return false; + return -1; } /* Initialize allocated space. */ @@ -3042,8 +3043,10 @@ reserve_explicit_locations(struct gl_shader_program *prog, /* Possibly same uniform from a different stage, this is ok. */ unsigned hash_loc; - if (map->get(hash_loc, var->name) && hash_loc == loc - i) - continue; + if (map->get(hash_loc, var->name) && hash_loc == loc - i) { + return_value = 0; + continue; + } /* ARB_explicit_uniform_location specification states: * @@ -3055,7 +3058,7 @@ reserve_explicit_locations(struct gl_shader_program *prog, "location qualifier for uniform %s overlaps " "previously used location\n", var->name); - return false; + return -1; } /* Initialize location as inactive before optimization @@ -3067,7 +3070,7 @@ reserve_explicit_locations(struct gl_shader_program *prog, /* Note, base location used for arrays. */ map->put(var->data.location, var->name); - return true; + return return_value; } static bool @@ -3128,12 +3131,12 @@ reserve_subroutine_explicit_locations(struct gl_shader_program *prog, * any optimizations happen to handle also inactive uniforms and * inactive array elements that may get trimmed away. */ -static void +static int check_explicit_uniform_locations(struct gl_context *ctx, struct gl_shader_program *prog) { if (!ctx->Extensions.ARB_explicit_uniform_location) - return; + return -1; /* This map is used to detect if overlapping explicit locations * occur with the same uniform (from different stage) or a different one. @@ -3142,7 +3145,7 @@ check_explicit_uniform_locations(struct gl_context *ctx, if (!uniform_map) { linker_error(prog, "Out of memory during linking.\n"); - return; + return -1; } unsigned entries_total = 0; @@ -3157,31 +3160,47 @@ check_explicit_uniform_locations(struct gl_context *ctx, if (!var || var->data.mode != ir_var_uniform) continue; - entries_total += var->type->uniform_locations(); - if (var->data.explicit_location) { - bool ret; + bool ret = false; if (var->type->without_array()->is_subroutine()) ret = reserve_subroutine_explicit_locations(prog, sh, var); - else - ret = reserve_explicit_locations(prog, uniform_map, var); + else { + int slots = reserve_explicit_locations(prog, uniform_map, + var); + if (slots != -1) { + ret = true; + entries_total += slots; + } + } if (!ret) { delete uniform_map; - return; + return -1; } } } } - /* Verify that total amount of entries for explicit and implicit locations - * is less than MAX_UNIFORM_LOCATIONS. - */ - if (entries_total >= ctx->Const.MaxUserAssignableUniformLocations) { - linker_error(prog, "count of uniform locations >= MAX_UNIFORM_LOCATIONS" - "(%u >= %u)", entries_total, - ctx->Const.MaxUserAssignableUniformLocations); + exec_list_make_empty(&prog->EmptyUniformLocations); + struct empty_uniform_block *current_block = NULL; + + for (unsigned i = 0; i < prog->NumUniformRemapTable; i++) { + /* We found empty space in UniformRemapTable. */ + if (prog->UniformRemapTable[i] == NULL) { + /* We've found the beginning of a new continous block of empty slots */ + if (!current_block || current_block->start + current_block->slots != i) { + current_block = rzalloc(prog, struct empty_uniform_block); + current_block->start = i; + exec_list_push_tail(&prog->EmptyUniformLocations, + ¤t_block->link); + } + + /* The current block continues, so we simply increment its slots */ + current_block->slots++; + } } + delete uniform_map; + return entries_total; } static bool @@ -4129,6 +4148,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) tfeedback_decl *tfeedback_decls = NULL; unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying; + unsigned int num_explicit_uniform_locs = 0; void *mem_ctx = ralloc_context(NULL); // temporary linker context @@ -4310,7 +4330,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) last = i; } - check_explicit_uniform_locations(ctx, prog); + num_explicit_uniform_locs = check_explicit_uniform_locations(ctx, prog); link_assign_subroutine_types(prog); if (!prog->LinkStatus) @@ -4541,7 +4561,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) goto done; update_array_sizes(prog); - link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue); + link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue, + num_explicit_uniform_locs, + ctx->Const.MaxUserAssignableUniformLocations); link_assign_atomic_counter_resources(ctx, prog); store_fragdepth_layout(prog); diff --git a/src/compiler/glsl/linker.h b/src/compiler/glsl/linker.h index c80be1c7e22..a60bb6ed087 100644 --- a/src/compiler/glsl/linker.h +++ b/src/compiler/glsl/linker.h @@ -35,7 +35,9 @@ link_invalidate_variable_locations(exec_list *ir); extern void link_assign_uniform_locations(struct gl_shader_program *prog, - unsigned int boolean_true); + unsigned int boolean_true, + unsigned int num_explicit_uniform_locs, + unsigned int max_uniform_locs); extern void link_set_uniform_initializers(struct gl_shader_program *prog, @@ -202,4 +204,17 @@ linker_error(gl_shader_program *prog, const char *fmt, ...); void linker_warning(gl_shader_program *prog, const char *fmt, ...); +/** + * Sometimes there are empty slots left over in UniformRemapTable after we + * allocate slots to explicit locations. This struct represents a single + * continouous block of empty slots in UniformRemapTable. + */ +struct empty_uniform_block { + struct exec_node link; + /* The start location of the block */ + unsigned start; + /* The number of slots in the block */ + unsigned slots; +}; + #endif /* GLSL_LINKER_H */ diff --git a/src/compiler/glsl/lower_discard_flow.cpp b/src/compiler/glsl/lower_discard_flow.cpp index 9d0a56b230d..9e3a7c05583 100644 --- a/src/compiler/glsl/lower_discard_flow.cpp +++ b/src/compiler/glsl/lower_discard_flow.cpp @@ -62,8 +62,8 @@ public: { } + ir_visitor_status visit(ir_loop_jump *ir); ir_visitor_status visit_enter(ir_discard *ir); - ir_visitor_status visit_enter(ir_loop_jump *ir); ir_visitor_status visit_enter(ir_loop *ir); ir_visitor_status visit_enter(ir_function_signature *ir); @@ -76,7 +76,7 @@ public: } /* anonymous namespace */ ir_visitor_status -lower_discard_flow_visitor::visit_enter(ir_loop_jump *ir) +lower_discard_flow_visitor::visit(ir_loop_jump *ir) { if (ir->mode != ir_loop_jump::jump_continue) return visit_continue; diff --git a/src/compiler/glsl/main.cpp b/src/compiler/glsl/main.cpp index df93a013ede..d2535758e1a 100644 --- a/src/compiler/glsl/main.cpp +++ b/src/compiler/glsl/main.cpp @@ -58,10 +58,16 @@ initialize_context(struct gl_context *ctx, gl_api api) ctx->Const.MaxComputeWorkGroupSize[1] = 1024; ctx->Const.MaxComputeWorkGroupSize[2] = 64; ctx->Const.MaxComputeWorkGroupInvocations = 1024; + ctx->Const.MaxComputeSharedMemorySize = 32768; ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16; ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents = 1024; ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = 8; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = 8; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms = 8; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformBlocks = 12; switch (ctx->Const.GLSLVersion) { case 100: @@ -77,12 +83,14 @@ initialize_context(struct gl_context *ctx, gl_api api) ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 8; ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 128 * 4; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 128 * 4; ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = ctx->Const.MaxCombinedTextureImageUnits; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 16 * 4; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 16 * 4; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ @@ -103,12 +111,14 @@ initialize_context(struct gl_context *ctx, gl_api api) ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 512; ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = ctx->Const.MaxCombinedTextureImageUnits; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 64; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ @@ -129,11 +139,13 @@ initialize_context(struct gl_context *ctx, gl_api api) ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024; ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 1024; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ @@ -153,17 +165,20 @@ initialize_context(struct gl_context *ctx, gl_api api) ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024; ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64; ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 16; ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents = 1024; ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 1024; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ @@ -191,11 +206,13 @@ initialize_context(struct gl_context *ctx, gl_api api) ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents = 1024; ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 16 * 4; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 224; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents = 224; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 15 * 4; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ diff --git a/src/compiler/glsl/tests/sampler_types_test.cpp b/src/compiler/glsl/tests/sampler_types_test.cpp index 04dd65e6e8d..ef03158bba9 100644 --- a/src/compiler/glsl/tests/sampler_types_test.cpp +++ b/src/compiler/glsl/tests/sampler_types_test.cpp @@ -43,7 +43,7 @@ TEST(sampler_types, TYPE) \ const glsl_type *type = glsl_type::TYPE##_type; \ EXPECT_EQ(GLSL_TYPE_SAMPLER, type->base_type); \ EXPECT_EQ(DIM, type->sampler_dimensionality); \ - EXPECT_EQ(DATA_TYPE, type->sampler_type); \ + EXPECT_EQ(DATA_TYPE, type->sampled_type); \ ARR; \ SHAD; \ EXPECT_EQ(COMPS, type->coordinate_components()); \ diff --git a/src/compiler/glsl/tests/uniform_initializer_utils.cpp b/src/compiler/glsl/tests/uniform_initializer_utils.cpp index 5006387036f..ec64be18cb3 100644 --- a/src/compiler/glsl/tests/uniform_initializer_utils.cpp +++ b/src/compiler/glsl/tests/uniform_initializer_utils.cpp @@ -103,6 +103,7 @@ generate_data_element(void *mem_ctx, const glsl_type *type, case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_FUNCTION: ASSERT_TRUE(false); break; } @@ -136,6 +137,7 @@ generate_data_element(void *mem_ctx, const glsl_type *type, case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_FUNCTION: ASSERT_TRUE(false); break; } @@ -241,6 +243,7 @@ verify_data(gl_constant_value *storage, unsigned storage_array_size, case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_FUNCTION: ASSERT_TRUE(false); break; } diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index d2eaec173b3..c549230a83c 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -51,7 +51,7 @@ glsl_type::glsl_type(GLenum gl_type, gl_type(gl_type), base_type(base_type), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing(0), + sampled_type(0), interface_packing(0), vector_elements(vector_elements), matrix_columns(matrix_columns), length(0) { @@ -75,7 +75,7 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, gl_type(gl_type), base_type(base_type), sampler_dimensionality(dim), sampler_shadow(shadow), - sampler_array(array), sampler_type(type), interface_packing(0), + sampler_array(array), sampled_type(type), interface_packing(0), length(0) { mtx_lock(&glsl_type::mutex); @@ -101,7 +101,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, gl_type(0), base_type(GLSL_TYPE_STRUCT), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing(0), + sampled_type(0), interface_packing(0), vector_elements(0), matrix_columns(0), length(num_fields) { @@ -141,7 +141,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, gl_type(0), base_type(GLSL_TYPE_INTERFACE), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing((unsigned) packing), + sampled_type(0), interface_packing((unsigned) packing), vector_elements(0), matrix_columns(0), length(num_fields) { @@ -180,7 +180,7 @@ glsl_type::glsl_type(const glsl_type *return_type, gl_type(0), base_type(GLSL_TYPE_FUNCTION), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing(0), + sampled_type(0), interface_packing(0), vector_elements(0), matrix_columns(0), length(num_params) { @@ -212,7 +212,7 @@ glsl_type::glsl_type(const char *subroutine_name) : gl_type(0), base_type(GLSL_TYPE_SUBROUTINE), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing(0), + sampled_type(0), interface_packing(0), vector_elements(1), matrix_columns(1), length(0) { @@ -428,7 +428,7 @@ _mesa_glsl_release_types(void) glsl_type::glsl_type(const glsl_type *array, unsigned length) : base_type(GLSL_TYPE_ARRAY), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing(0), + sampled_type(0), interface_packing(0), vector_elements(0), matrix_columns(0), length(length), name(NULL) { diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h index 5965cb2eedb..2f612d8857d 100644 --- a/src/compiler/glsl_types.h +++ b/src/compiler/glsl_types.h @@ -56,11 +56,11 @@ enum glsl_base_type { GLSL_TYPE_IMAGE, GLSL_TYPE_ATOMIC_UINT, GLSL_TYPE_STRUCT, - GLSL_TYPE_FUNCTION, GLSL_TYPE_INTERFACE, GLSL_TYPE_ARRAY, GLSL_TYPE_VOID, GLSL_TYPE_SUBROUTINE, + GLSL_TYPE_FUNCTION, GLSL_TYPE_ERROR }; @@ -122,7 +122,7 @@ struct glsl_type { unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */ unsigned sampler_shadow:1; unsigned sampler_array:1; - unsigned sampler_type:2; /**< Type of data returned using this + unsigned sampled_type:2; /**< Type of data returned using this * sampler or image. Only \c * GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT, * and \c GLSL_TYPE_UINT are valid. diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index 00703fe6f52..70e9cd397fc 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -148,7 +148,7 @@ glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type) { assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); - return (glsl_base_type)type->sampler_type; + return (glsl_base_type)type->sampled_type; } unsigned @@ -315,6 +315,12 @@ glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array, } const struct glsl_type * +glsl_bare_sampler_type() +{ + return glsl_type::sampler_type; +} + +const struct glsl_type * glsl_image_type(enum glsl_sampler_dim dim, bool is_array, enum glsl_base_type base_type) { @@ -331,6 +337,7 @@ glsl_function_type(const glsl_type *return_type, const glsl_type * glsl_transposed_type(const struct glsl_type *type) { + assert(glsl_type_is_matrix(type)); return glsl_type::get_instance(type->base_type, type->matrix_columns, type->vector_elements); } diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index 4ef0dcf9a31..d92605bf4fb 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -113,6 +113,7 @@ const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields, const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array, enum glsl_base_type base_type); +const struct glsl_type *glsl_bare_sampler_type(); const struct glsl_type *glsl_image_type(enum glsl_sampler_dim dim, bool is_array, enum glsl_base_type base_type); diff --git a/src/egl/Android.mk b/src/egl/Android.mk index ebd67af34cc..cf7125145ca 100644 --- a/src/egl/Android.mk +++ b/src/egl/Android.mk @@ -44,9 +44,8 @@ LOCAL_CFLAGS := \ -DHAVE_ANDROID_PLATFORM ifeq ($(MESA_LOLLIPOP_BUILD),true) -LOCAL_CFLAGS_arm := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\" -LOCAL_CFLAGS_x86 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\" -LOCAL_CFLAGS_x86_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\" +LOCAL_CFLAGS_32 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\" +LOCAL_CFLAGS_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\" else LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\" endif diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index 8f3abcb9867..7d546650272 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -532,7 +532,12 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy) { HAL_PIXEL_FORMAT_RGB_888, { 0xff, 0xff00, 0xff0000, 0x0 } }, { HAL_PIXEL_FORMAT_RGB_565, { 0xf800, 0x7e0, 0x1f, 0x0 } }, { HAL_PIXEL_FORMAT_BGRA_8888, { 0xff0000, 0xff00, 0xff, 0xff000000 } }, - { 0, 0, { 0, 0, 0, 0 } } + { 0, { 0, 0, 0, 0 } } + }; + EGLint config_attrs[] = { + EGL_NATIVE_VISUAL_ID, 0, + EGL_NATIVE_VISUAL_TYPE, 0, + EGL_NONE }; int count, i, j; @@ -540,6 +545,9 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy) for (i = 0; visuals[i].format; i++) { int format_count = 0; + config_attrs[1] = visuals[i].format; + config_attrs[3] = visuals[i].format; + for (j = 0; dri2_dpy->driver_configs[j]; j++) { const EGLint surface_type = EGL_WINDOW_BIT | EGL_PBUFFER_BIT; struct dri2_egl_config *dri2_conf; @@ -553,10 +561,8 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy) continue; dri2_conf = dri2_add_config(dpy, dri2_dpy->driver_configs[j], - count + 1, surface_type, NULL, visuals[i].rgba_masks); + count + 1, surface_type, config_attrs, visuals[i].rgba_masks); if (dri2_conf) { - dri2_conf->base.NativeVisualID = visuals[i].format; - dri2_conf->base.NativeVisualType = visuals[i].format; count++; format_count++; } diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index 08cbf2d8393..420f567651c 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -472,6 +472,8 @@ dri2_x11_get_buffers(__DRIdrawable * driDrawable, dri2_surf->drawable, count, count, attachments); reply = xcb_dri2_get_buffers_reply (dri2_dpy->conn, cookie, NULL); + if (reply == NULL) + return NULL; buffers = xcb_dri2_get_buffers_buffers (reply); if (buffers == NULL) return NULL; @@ -870,7 +872,12 @@ dri2_x11_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw); if (dri2_dpy->dri2) { - return dri2_x11_swap_buffers_msc(drv, disp, draw, 0, 0, 0) != -1; + if (dri2_x11_swap_buffers_msc(drv, disp, draw, 0, 0, 0) != -1) { + return EGL_TRUE; + } + /* Swap failed with a window drawable. */ + _eglError(EGL_BAD_NATIVE_WINDOW, __FUNCTION__); + return EGL_FALSE; } else { assert(dri2_dpy->swrast); diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 323634e4511..32f68233aeb 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1555,8 +1555,14 @@ eglGetSyncAttrib(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *valu static EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value) { - EGLAttrib attrib = *value; - EGLBoolean result = eglGetSyncAttrib(dpy, sync, attribute, &attrib); + EGLAttrib attrib; + EGLBoolean result; + + if (!value) + RETURN_EGL_ERROR(NULL, EGL_BAD_PARAMETER, EGL_FALSE); + + attrib = *value; + result = eglGetSyncAttrib(dpy, sync, attribute, &attrib); /* The EGL_KHR_fence_sync spec says this about eglGetSyncAttribKHR: * diff --git a/src/egl/main/eglsync.c b/src/egl/main/eglsync.c index 3019e6e9333..999cb480c4b 100644 --- a/src/egl/main/eglsync.c +++ b/src/egl/main/eglsync.c @@ -144,9 +144,6 @@ EGLBoolean _eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint attribute, EGLAttrib *value) { - if (!value) - return _eglError(EGL_BAD_PARAMETER, "eglGetSyncAttribKHR"); - switch (attribute) { case EGL_SYNC_TYPE_KHR: *value = sync->Type; diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 6b29b20c53e..f0013f70472 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -69,8 +69,11 @@ struct cso_context { boolean has_geometry_shader; boolean has_tessellation; + boolean has_compute_shader; boolean has_streamout; + unsigned saved_state; /**< bitmask of CSO_BIT_x flags */ + struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS]; unsigned nr_fragment_views; @@ -106,6 +109,7 @@ struct cso_context { void *geometry_shader, *geometry_shader_saved; void *tessctrl_shader, *tessctrl_shader_saved; void *tesseval_shader, *tesseval_shader_saved; + void *compute_shader; void *velements, *velements_saved; struct pipe_query *render_condition, *render_condition_saved; uint render_condition_mode, render_condition_mode_saved; @@ -272,6 +276,15 @@ struct cso_context *cso_create_context( struct pipe_context *pipe ) PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { ctx->has_tessellation = TRUE; } + if (pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { + int supported_irs = + pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_SUPPORTED_IRS); + if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) { + ctx->has_compute_shader = TRUE; + } + } if (pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) { ctx->has_streamout = TRUE; @@ -333,6 +346,10 @@ void cso_destroy_context( struct cso_context *ctx ) ctx->pipe->bind_tes_state(ctx->pipe, NULL); ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_EVAL, 0, NULL); } + if (ctx->has_compute_shader) { + ctx->pipe->bind_compute_state(ctx->pipe, NULL); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_COMPUTE, 0, NULL); + } ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL ); if (ctx->has_streamout) @@ -425,13 +442,15 @@ enum pipe_error cso_set_blend(struct cso_context *ctx, return PIPE_OK; } -void cso_save_blend(struct cso_context *ctx) +static void +cso_save_blend(struct cso_context *ctx) { assert(!ctx->blend_saved); ctx->blend_saved = ctx->blend; } -void cso_restore_blend(struct cso_context *ctx) +static void +cso_restore_blend(struct cso_context *ctx) { if (ctx->blend != ctx->blend_saved) { ctx->blend = ctx->blend_saved; @@ -488,13 +507,15 @@ cso_set_depth_stencil_alpha(struct cso_context *ctx, return PIPE_OK; } -void cso_save_depth_stencil_alpha(struct cso_context *ctx) +static void +cso_save_depth_stencil_alpha(struct cso_context *ctx) { assert(!ctx->depth_stencil_saved); ctx->depth_stencil_saved = ctx->depth_stencil; } -void cso_restore_depth_stencil_alpha(struct cso_context *ctx) +static void +cso_restore_depth_stencil_alpha(struct cso_context *ctx) { if (ctx->depth_stencil != ctx->depth_stencil_saved) { ctx->depth_stencil = ctx->depth_stencil_saved; @@ -547,13 +568,15 @@ enum pipe_error cso_set_rasterizer(struct cso_context *ctx, return PIPE_OK; } -void cso_save_rasterizer(struct cso_context *ctx) +static void +cso_save_rasterizer(struct cso_context *ctx) { assert(!ctx->rasterizer_saved); ctx->rasterizer_saved = ctx->rasterizer; } -void cso_restore_rasterizer(struct cso_context *ctx) +static void +cso_restore_rasterizer(struct cso_context *ctx) { if (ctx->rasterizer != ctx->rasterizer_saved) { ctx->rasterizer = ctx->rasterizer_saved; @@ -581,13 +604,15 @@ void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ) ctx->pipe->delete_fs_state(ctx->pipe, handle); } -void cso_save_fragment_shader(struct cso_context *ctx) +static void +cso_save_fragment_shader(struct cso_context *ctx) { assert(!ctx->fragment_shader_saved); ctx->fragment_shader_saved = ctx->fragment_shader; } -void cso_restore_fragment_shader(struct cso_context *ctx) +static void +cso_restore_fragment_shader(struct cso_context *ctx) { if (ctx->fragment_shader_saved != ctx->fragment_shader) { ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved); @@ -615,13 +640,15 @@ void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ) ctx->pipe->delete_vs_state(ctx->pipe, handle); } -void cso_save_vertex_shader(struct cso_context *ctx) +static void +cso_save_vertex_shader(struct cso_context *ctx) { assert(!ctx->vertex_shader_saved); ctx->vertex_shader_saved = ctx->vertex_shader; } -void cso_restore_vertex_shader(struct cso_context *ctx) +static void +cso_restore_vertex_shader(struct cso_context *ctx) { if (ctx->vertex_shader_saved != ctx->vertex_shader) { ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved); @@ -640,12 +667,14 @@ void cso_set_framebuffer(struct cso_context *ctx, } } -void cso_save_framebuffer(struct cso_context *ctx) +static void +cso_save_framebuffer(struct cso_context *ctx) { util_copy_framebuffer_state(&ctx->fb_saved, &ctx->fb); } -void cso_restore_framebuffer(struct cso_context *ctx) +static void +cso_restore_framebuffer(struct cso_context *ctx) { if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) { util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved); @@ -664,13 +693,33 @@ void cso_set_viewport(struct cso_context *ctx, } } -void cso_save_viewport(struct cso_context *ctx) +/** + * Setup viewport state for given width and height (position is always (0,0)). + * Invert the Y axis if 'invert' is true. + */ +void +cso_set_viewport_dims(struct cso_context *ctx, + float width, float height, boolean invert) +{ + struct pipe_viewport_state vp; + vp.scale[0] = width * 0.5f; + vp.scale[1] = height * (invert ? -0.5f : 0.5f); + vp.scale[2] = 0.5f; + vp.translate[0] = 0.5f * width; + vp.translate[1] = 0.5f * height; + vp.translate[2] = 0.5f; + cso_set_viewport(ctx, &vp); +} + +static void +cso_save_viewport(struct cso_context *ctx) { ctx->vp_saved = ctx->vp; } -void cso_restore_viewport(struct cso_context *ctx) +static void +cso_restore_viewport(struct cso_context *ctx) { if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) { ctx->vp = ctx->vp_saved; @@ -696,12 +745,14 @@ void cso_set_sample_mask(struct cso_context *ctx, unsigned sample_mask) } } -void cso_save_sample_mask(struct cso_context *ctx) +static void +cso_save_sample_mask(struct cso_context *ctx) { ctx->sample_mask_saved = ctx->sample_mask; } -void cso_restore_sample_mask(struct cso_context *ctx) +static void +cso_restore_sample_mask(struct cso_context *ctx) { cso_set_sample_mask(ctx, ctx->sample_mask_saved); } @@ -714,12 +765,14 @@ void cso_set_min_samples(struct cso_context *ctx, unsigned min_samples) } } -void cso_save_min_samples(struct cso_context *ctx) +static void +cso_save_min_samples(struct cso_context *ctx) { ctx->min_samples_saved = ctx->min_samples; } -void cso_restore_min_samples(struct cso_context *ctx) +static void +cso_restore_min_samples(struct cso_context *ctx) { cso_set_min_samples(ctx, ctx->min_samples_saved); } @@ -733,13 +786,15 @@ void cso_set_stencil_ref(struct cso_context *ctx, } } -void cso_save_stencil_ref(struct cso_context *ctx) +static void +cso_save_stencil_ref(struct cso_context *ctx) { ctx->stencil_ref_saved = ctx->stencil_ref; } -void cso_restore_stencil_ref(struct cso_context *ctx) +static void +cso_restore_stencil_ref(struct cso_context *ctx) { if (memcmp(&ctx->stencil_ref, &ctx->stencil_ref_saved, sizeof(ctx->stencil_ref))) { @@ -764,14 +819,16 @@ void cso_set_render_condition(struct cso_context *ctx, } } -void cso_save_render_condition(struct cso_context *ctx) +static void +cso_save_render_condition(struct cso_context *ctx) { ctx->render_condition_saved = ctx->render_condition; ctx->render_condition_cond_saved = ctx->render_condition_cond; ctx->render_condition_mode_saved = ctx->render_condition_mode; } -void cso_restore_render_condition(struct cso_context *ctx) +static void +cso_restore_render_condition(struct cso_context *ctx) { cso_set_render_condition(ctx, ctx->render_condition_saved, ctx->render_condition_cond_saved, @@ -798,7 +855,8 @@ void cso_delete_geometry_shader(struct cso_context *ctx, void *handle) ctx->pipe->delete_gs_state(ctx->pipe, handle); } -void cso_save_geometry_shader(struct cso_context *ctx) +static void +cso_save_geometry_shader(struct cso_context *ctx) { if (!ctx->has_geometry_shader) { return; @@ -808,7 +866,8 @@ void cso_save_geometry_shader(struct cso_context *ctx) ctx->geometry_shader_saved = ctx->geometry_shader; } -void cso_restore_geometry_shader(struct cso_context *ctx) +static void +cso_restore_geometry_shader(struct cso_context *ctx) { if (!ctx->has_geometry_shader) { return; @@ -841,7 +900,8 @@ void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle) ctx->pipe->delete_tcs_state(ctx->pipe, handle); } -void cso_save_tessctrl_shader(struct cso_context *ctx) +static void +cso_save_tessctrl_shader(struct cso_context *ctx) { if (!ctx->has_tessellation) { return; @@ -851,7 +911,8 @@ void cso_save_tessctrl_shader(struct cso_context *ctx) ctx->tessctrl_shader_saved = ctx->tessctrl_shader; } -void cso_restore_tessctrl_shader(struct cso_context *ctx) +static void +cso_restore_tessctrl_shader(struct cso_context *ctx) { if (!ctx->has_tessellation) { return; @@ -884,7 +945,8 @@ void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle) ctx->pipe->delete_tes_state(ctx->pipe, handle); } -void cso_save_tesseval_shader(struct cso_context *ctx) +static void +cso_save_tesseval_shader(struct cso_context *ctx) { if (!ctx->has_tessellation) { return; @@ -894,7 +956,8 @@ void cso_save_tesseval_shader(struct cso_context *ctx) ctx->tesseval_shader_saved = ctx->tesseval_shader; } -void cso_restore_tesseval_shader(struct cso_context *ctx) +static void +cso_restore_tesseval_shader(struct cso_context *ctx) { if (!ctx->has_tessellation) { return; @@ -907,6 +970,26 @@ void cso_restore_tesseval_shader(struct cso_context *ctx) ctx->tesseval_shader_saved = NULL; } +void cso_set_compute_shader_handle(struct cso_context *ctx, void *handle) +{ + assert(ctx->has_compute_shader || !handle); + + if (ctx->has_compute_shader && ctx->compute_shader != handle) { + ctx->compute_shader = handle; + ctx->pipe->bind_compute_state(ctx->pipe, handle); + } +} + +void cso_delete_compute_shader(struct cso_context *ctx, void *handle) +{ + if (handle == ctx->compute_shader) { + /* unbind before deleting */ + ctx->pipe->bind_compute_state(ctx->pipe, NULL); + ctx->compute_shader = NULL; + } + ctx->pipe->delete_compute_state(ctx->pipe, handle); +} + enum pipe_error cso_set_vertex_elements(struct cso_context *ctx, unsigned count, @@ -967,7 +1050,8 @@ cso_set_vertex_elements(struct cso_context *ctx, return PIPE_OK; } -void cso_save_vertex_elements(struct cso_context *ctx) +static void +cso_save_vertex_elements(struct cso_context *ctx) { struct u_vbuf *vbuf = ctx->vbuf; @@ -980,7 +1064,8 @@ void cso_save_vertex_elements(struct cso_context *ctx) ctx->velements_saved = ctx->velements; } -void cso_restore_vertex_elements(struct cso_context *ctx) +static void +cso_restore_vertex_elements(struct cso_context *ctx) { struct u_vbuf *vbuf = ctx->vbuf; @@ -1032,7 +1117,8 @@ void cso_set_vertex_buffers(struct cso_context *ctx, ctx->pipe->set_vertex_buffers(ctx->pipe, start_slot, count, buffers); } -void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx) +static void +cso_save_aux_vertex_buffer_slot(struct cso_context *ctx) { struct u_vbuf *vbuf = ctx->vbuf; @@ -1047,7 +1133,8 @@ void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx) sizeof(struct pipe_vertex_buffer)); } -void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx) +static void +cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx) { struct u_vbuf *vbuf = ctx->vbuf; @@ -1165,7 +1252,7 @@ cso_set_samplers(struct cso_context *ctx, return error; } -void +static void cso_save_fragment_samplers(struct cso_context *ctx) { struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT]; @@ -1176,7 +1263,7 @@ cso_save_fragment_samplers(struct cso_context *ctx) } -void +static void cso_restore_fragment_samplers(struct cso_context *ctx) { struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT]; @@ -1223,7 +1310,7 @@ cso_set_sampler_views(struct cso_context *ctx, } -void +static void cso_save_fragment_sampler_views(struct cso_context *ctx) { unsigned i; @@ -1238,7 +1325,7 @@ cso_save_fragment_sampler_views(struct cso_context *ctx) } -void +static void cso_restore_fragment_sampler_views(struct cso_context *ctx) { unsigned i, nr_saved = ctx->nr_fragment_views_saved; @@ -1298,7 +1385,7 @@ cso_set_stream_outputs(struct cso_context *ctx, ctx->nr_so_targets = num_targets; } -void +static void cso_save_stream_outputs(struct cso_context *ctx) { uint i; @@ -1315,7 +1402,7 @@ cso_save_stream_outputs(struct cso_context *ctx) } } -void +static void cso_restore_stream_outputs(struct cso_context *ctx) { struct pipe_context *pipe = ctx->pipe; @@ -1402,6 +1489,113 @@ cso_restore_constant_buffer_slot0(struct cso_context *cso, NULL); } + +/** + * Save all the CSO state items specified by the state_mask bitmask + * of CSO_BIT_x flags. + */ +void +cso_save_state(struct cso_context *cso, unsigned state_mask) +{ + assert(cso->saved_state == 0); + + cso->saved_state = state_mask; + + if (state_mask & CSO_BIT_AUX_VERTEX_BUFFER_SLOT) + cso_save_aux_vertex_buffer_slot(cso); + if (state_mask & CSO_BIT_BLEND) + cso_save_blend(cso); + if (state_mask & CSO_BIT_DEPTH_STENCIL_ALPHA) + cso_save_depth_stencil_alpha(cso); + if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS) + cso_save_fragment_samplers(cso); + if (state_mask & CSO_BIT_FRAGMENT_SAMPLER_VIEWS) + cso_save_fragment_sampler_views(cso); + if (state_mask & CSO_BIT_FRAGMENT_SHADER) + cso_save_fragment_shader(cso); + if (state_mask & CSO_BIT_FRAMEBUFFER) + cso_save_framebuffer(cso); + if (state_mask & CSO_BIT_GEOMETRY_SHADER) + cso_save_geometry_shader(cso); + if (state_mask & CSO_BIT_MIN_SAMPLES) + cso_save_min_samples(cso); + if (state_mask & CSO_BIT_RASTERIZER) + cso_save_rasterizer(cso); + if (state_mask & CSO_BIT_RENDER_CONDITION) + cso_save_render_condition(cso); + if (state_mask & CSO_BIT_SAMPLE_MASK) + cso_save_sample_mask(cso); + if (state_mask & CSO_BIT_STENCIL_REF) + cso_save_stencil_ref(cso); + if (state_mask & CSO_BIT_STREAM_OUTPUTS) + cso_save_stream_outputs(cso); + if (state_mask & CSO_BIT_TESSCTRL_SHADER) + cso_save_tessctrl_shader(cso); + if (state_mask & CSO_BIT_TESSEVAL_SHADER) + cso_save_tesseval_shader(cso); + if (state_mask & CSO_BIT_VERTEX_ELEMENTS) + cso_save_vertex_elements(cso); + if (state_mask & CSO_BIT_VERTEX_SHADER) + cso_save_vertex_shader(cso); + if (state_mask & CSO_BIT_VIEWPORT) + cso_save_viewport(cso); +} + + +/** + * Restore the state which was saved by cso_save_state(). + */ +void +cso_restore_state(struct cso_context *cso) +{ + unsigned state_mask = cso->saved_state; + + assert(state_mask); + + if (state_mask & CSO_BIT_AUX_VERTEX_BUFFER_SLOT) + cso_restore_aux_vertex_buffer_slot(cso); + if (state_mask & CSO_BIT_BLEND) + cso_restore_blend(cso); + if (state_mask & CSO_BIT_DEPTH_STENCIL_ALPHA) + cso_restore_depth_stencil_alpha(cso); + if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS) + cso_restore_fragment_samplers(cso); + if (state_mask & CSO_BIT_FRAGMENT_SAMPLER_VIEWS) + cso_restore_fragment_sampler_views(cso); + if (state_mask & CSO_BIT_FRAGMENT_SHADER) + cso_restore_fragment_shader(cso); + if (state_mask & CSO_BIT_FRAMEBUFFER) + cso_restore_framebuffer(cso); + if (state_mask & CSO_BIT_GEOMETRY_SHADER) + cso_restore_geometry_shader(cso); + if (state_mask & CSO_BIT_MIN_SAMPLES) + cso_restore_min_samples(cso); + if (state_mask & CSO_BIT_RASTERIZER) + cso_restore_rasterizer(cso); + if (state_mask & CSO_BIT_RENDER_CONDITION) + cso_restore_render_condition(cso); + if (state_mask & CSO_BIT_SAMPLE_MASK) + cso_restore_sample_mask(cso); + if (state_mask & CSO_BIT_STENCIL_REF) + cso_restore_stencil_ref(cso); + if (state_mask & CSO_BIT_STREAM_OUTPUTS) + cso_restore_stream_outputs(cso); + if (state_mask & CSO_BIT_TESSCTRL_SHADER) + cso_restore_tessctrl_shader(cso); + if (state_mask & CSO_BIT_TESSEVAL_SHADER) + cso_restore_tesseval_shader(cso); + if (state_mask & CSO_BIT_VERTEX_ELEMENTS) + cso_restore_vertex_elements(cso); + if (state_mask & CSO_BIT_VERTEX_SHADER) + cso_restore_vertex_shader(cso); + if (state_mask & CSO_BIT_VIEWPORT) + cso_restore_viewport(cso); + + cso->saved_state = 0; +} + + + /* drawing */ void diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index f0a27390d17..a3563d83a02 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -47,22 +47,15 @@ void cso_destroy_context( struct cso_context *cso ); enum pipe_error cso_set_blend( struct cso_context *cso, const struct pipe_blend_state *blend ); -void cso_save_blend(struct cso_context *cso); -void cso_restore_blend(struct cso_context *cso); - enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso, const struct pipe_depth_stencil_alpha_state *dsa ); -void cso_save_depth_stencil_alpha(struct cso_context *cso); -void cso_restore_depth_stencil_alpha(struct cso_context *cso); enum pipe_error cso_set_rasterizer( struct cso_context *cso, const struct pipe_rasterizer_state *rasterizer ); -void cso_save_rasterizer(struct cso_context *cso); -void cso_restore_rasterizer(struct cso_context *cso); enum pipe_error @@ -71,11 +64,6 @@ cso_set_samplers(struct cso_context *cso, unsigned count, const struct pipe_sampler_state **states); -void -cso_save_fragment_samplers(struct cso_context *cso); - -void -cso_restore_fragment_samplers(struct cso_context *cso); /* Alternate interface to support state trackers that like to modify * samplers one at a time: @@ -91,9 +79,6 @@ cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage); enum pipe_error cso_set_vertex_elements(struct cso_context *ctx, unsigned count, const struct pipe_vertex_element *states); -void cso_save_vertex_elements(struct cso_context *ctx); -void cso_restore_vertex_elements(struct cso_context *ctx); - void cso_set_vertex_buffers(struct cso_context *ctx, unsigned start_slot, unsigned count, @@ -101,8 +86,6 @@ void cso_set_vertex_buffers(struct cso_context *ctx, /* One vertex buffer slot is provided with the save/restore functionality. * cso_context chooses the slot, it can be non-zero. */ -void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx); -void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx); unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx); @@ -110,8 +93,6 @@ void cso_set_stream_outputs(struct cso_context *ctx, unsigned num_targets, struct pipe_stream_output_target **targets, const unsigned *offsets); -void cso_save_stream_outputs(struct cso_context *ctx); -void cso_restore_stream_outputs(struct cso_context *ctx); /* @@ -123,67 +104,81 @@ void cso_restore_stream_outputs(struct cso_context *ctx); void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle); void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ); -void cso_save_fragment_shader(struct cso_context *cso); -void cso_restore_fragment_shader(struct cso_context *cso); void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle); void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ); -void cso_save_vertex_shader(struct cso_context *cso); -void cso_restore_vertex_shader(struct cso_context *cso); void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle); void cso_delete_geometry_shader(struct cso_context *ctx, void *handle); -void cso_save_geometry_shader(struct cso_context *cso); -void cso_restore_geometry_shader(struct cso_context *cso); void cso_set_tessctrl_shader_handle(struct cso_context *ctx, void *handle); void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle); -void cso_save_tessctrl_shader(struct cso_context *cso); -void cso_restore_tessctrl_shader(struct cso_context *cso); void cso_set_tesseval_shader_handle(struct cso_context *ctx, void *handle); void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle); -void cso_save_tesseval_shader(struct cso_context *cso); -void cso_restore_tesseval_shader(struct cso_context *cso); + + +void cso_set_compute_shader_handle(struct cso_context *ctx, void *handle); +void cso_delete_compute_shader(struct cso_context *ctx, void *handle); void cso_set_framebuffer(struct cso_context *cso, const struct pipe_framebuffer_state *fb); -void cso_save_framebuffer(struct cso_context *cso); -void cso_restore_framebuffer(struct cso_context *cso); void cso_set_viewport(struct cso_context *cso, const struct pipe_viewport_state *vp); -void cso_save_viewport(struct cso_context *cso); -void cso_restore_viewport(struct cso_context *cso); +void cso_set_viewport_dims(struct cso_context *ctx, + float width, float height, boolean invert); void cso_set_blend_color(struct cso_context *cso, const struct pipe_blend_color *bc); void cso_set_sample_mask(struct cso_context *cso, unsigned stencil_mask); -void cso_save_sample_mask(struct cso_context *ctx); -void cso_restore_sample_mask(struct cso_context *ctx); void cso_set_min_samples(struct cso_context *cso, unsigned min_samples); -void cso_save_min_samples(struct cso_context *ctx); -void cso_restore_min_samples(struct cso_context *ctx); void cso_set_stencil_ref(struct cso_context *cso, const struct pipe_stencil_ref *sr); -void cso_save_stencil_ref(struct cso_context *cso); -void cso_restore_stencil_ref(struct cso_context *cso); void cso_set_render_condition(struct cso_context *cso, struct pipe_query *query, boolean condition, uint mode); -void cso_save_render_condition(struct cso_context *cso); -void cso_restore_render_condition(struct cso_context *cso); + + +#define CSO_BIT_AUX_VERTEX_BUFFER_SLOT 0x1 +#define CSO_BIT_BLEND 0x2 +#define CSO_BIT_DEPTH_STENCIL_ALPHA 0x4 +#define CSO_BIT_FRAGMENT_SAMPLERS 0x8 +#define CSO_BIT_FRAGMENT_SAMPLER_VIEWS 0x10 +#define CSO_BIT_FRAGMENT_SHADER 0x20 +#define CSO_BIT_FRAMEBUFFER 0x40 +#define CSO_BIT_GEOMETRY_SHADER 0x80 +#define CSO_BIT_MIN_SAMPLES 0x100 +#define CSO_BIT_RASTERIZER 0x200 +#define CSO_BIT_RENDER_CONDITION 0x400 +#define CSO_BIT_SAMPLE_MASK 0x800 +#define CSO_BIT_STENCIL_REF 0x1000 +#define CSO_BIT_STREAM_OUTPUTS 0x2000 +#define CSO_BIT_TESSCTRL_SHADER 0x4000 +#define CSO_BIT_TESSEVAL_SHADER 0x8000 +#define CSO_BIT_VERTEX_ELEMENTS 0x10000 +#define CSO_BIT_VERTEX_SHADER 0x20000 +#define CSO_BIT_VIEWPORT 0x40000 + +#define CSO_BITS_ALL_SHADERS (CSO_BIT_VERTEX_SHADER | \ + CSO_BIT_FRAGMENT_SHADER | \ + CSO_BIT_GEOMETRY_SHADER | \ + CSO_BIT_TESSCTRL_SHADER | \ + CSO_BIT_TESSEVAL_SHADER) + +void cso_save_state(struct cso_context *cso, unsigned state_mask); +void cso_restore_state(struct cso_context *cso); /* sampler view state */ @@ -194,12 +189,6 @@ cso_set_sampler_views(struct cso_context *cso, unsigned count, struct pipe_sampler_view **views); -void -cso_save_fragment_sampler_views(struct cso_context *ctx); - -void -cso_restore_fragment_sampler_views(struct cso_context *ctx); - /* constant buffers */ @@ -230,7 +219,6 @@ cso_draw_arrays_instanced(struct cso_context *cso, uint mode, uint start, uint count, uint start_instance, uint instance_count); -/* helper drawing function */ void cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 7283e2f162f..efaf2fa306a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -26,6 +26,9 @@ **************************************************************************/ #include <stddef.h> +#include <fstream> +#include <sstream> +#include <iomanip> #include <llvm-c/Core.h> #include <llvm-c/Disassembler.h> @@ -125,7 +128,7 @@ lp_debug_dump_value(LLVMValueRef value) * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html */ static size_t -disassemble(const void* func) +disassemble(const void* func, std::stringstream &buffer) { const uint8_t *bytes = (const uint8_t *)func; @@ -143,8 +146,8 @@ disassemble(const void* func) char outline[1024]; if (!D) { - _debug_printf("error: couldn't create disassembler for triple %s\n", - Triple.c_str()); + buffer << "error: could not create disassembler for triple " + << Triple.c_str() << '\n'; return 0; } @@ -158,13 +161,13 @@ disassemble(const void* func) * so that between runs. */ - _debug_printf("%6lu:\t", (unsigned long)pc); + buffer << std::setw(6) << (unsigned long)pc << ":\t"; Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent - pc, 0, outline, sizeof outline); if (!Size) { - _debug_printf("invalid\n"); + buffer << "invalid\n"; pc += 1; break; } @@ -176,10 +179,11 @@ disassemble(const void* func) if (0) { unsigned i; for (i = 0; i < Size; ++i) { - _debug_printf("%02x ", bytes[pc + i]); + buffer << std::hex << std::setfill('0') << std::setw(2) + << static_cast<int> (bytes[pc + i]); } for (; i < 16; ++i) { - _debug_printf(" "); + buffer << std::dec << " "; } } @@ -187,9 +191,7 @@ disassemble(const void* func) * Print the instruction. */ - _debug_printf("%*s", Size, outline); - - _debug_printf("\n"); + buffer << std::setw(Size) << outline << '\n'; /* * Stop disassembling on return statements, if there is no record of a @@ -198,9 +200,11 @@ disassemble(const void* func) * XXX: This currently assumes x86 */ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if (Size == 1 && bytes[pc] == 0xc3) { break; } +#endif /* * Advance. @@ -209,12 +213,12 @@ disassemble(const void* func) pc += Size; if (pc >= extent) { - _debug_printf("disassembly larger than %ull bytes, aborting\n", extent); + buffer << "disassembly larger than " << extent << " bytes, aborting\n"; break; } } - _debug_printf("\n"); + buffer << '\n'; LLVMDisasmDispose(D); @@ -222,7 +226,8 @@ disassemble(const void* func) * Print GDB command, useful to verify output. */ if (0) { - _debug_printf("disassemble %p %p\n", bytes, bytes + pc); + buffer << "disassemble " << static_cast<const void*>(bytes) << ' ' + << static_cast<const void*>(bytes + pc) << '\n'; } return pc; @@ -231,8 +236,14 @@ disassemble(const void* func) extern "C" void lp_disassemble(LLVMValueRef func, const void *code) { - _debug_printf("%s:\n", LLVMGetValueName(func)); - disassemble(code); + std::stringstream buffer; + std::string s; + + buffer << LLVMGetValueName(func) << ":\n"; + disassemble(code, buffer); + s = buffer.str(); + _debug_printf("%s", s.c_str()); + _debug_printf("\n"); } @@ -248,9 +259,10 @@ extern "C" void lp_profile(LLVMValueRef func, const void *code) { #if defined(__linux__) && defined(PROFILE) + std::stringstream buffer; + static std::ofstream perf_asm_file; static boolean first_time = TRUE; static FILE *perf_map_file = NULL; - static int perf_asm_fd = -1; if (first_time) { /* * We rely on the disassembler for determining a function's size, but @@ -264,17 +276,16 @@ lp_profile(LLVMValueRef func, const void *code) util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid); perf_map_file = fopen(filename, "wt"); util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid); - mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; - perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode); + perf_asm_file.open(filename); } first_time = FALSE; } if (perf_map_file) { const char *symbol = LLVMGetValueName(func); unsigned long addr = (uintptr_t)code; - llvm::raw_fd_ostream Out(perf_asm_fd, false); - Out << symbol << ":\n"; - unsigned long size = disassemble(code); + buffer << symbol << ":\n"; + unsigned long size = disassemble(code, buffer); + perf_asm_file << buffer.rdbuf() << std::flush; fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol); fflush(perf_map_file); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 96aba7370c1..ab55be4c439 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -118,8 +118,10 @@ create_pass_manager(struct gallivm_state *gallivm) * simple, or constant propagation into them, etc. */ +#if HAVE_LLVM < 0x0309 // Old versions of LLVM get the DataLayout from the pass manager. LLVMAddTargetData(gallivm->target, gallivm->passmgr); +#endif /* Setting the module's DataLayout to an empty string will cause the * ExecutionEngine to copy to the DataLayout string from its target diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h index 4598db851ae..32addec9724 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -128,6 +128,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param) return PIPE_MAX_SHADER_SAMPLER_VIEWS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 1 << PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; @@ -137,6 +139,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 3ee708f4fad..30ef37c9d22 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -61,6 +61,11 @@ #include <llvm/Target/TargetOptions.h> #include <llvm/ExecutionEngine/ExecutionEngine.h> #include <llvm/ADT/Triple.h> +#if HAVE_LLVM >= 0x0307 +#include <llvm/Analysis/TargetLibraryInfo.h> +#else +#include <llvm/Target/TargetLibraryInfo.h> +#endif #if HAVE_LLVM < 0x0306 #include <llvm/ExecutionEngine/JITMemoryManager.h> #else @@ -147,6 +152,31 @@ lp_set_target_options(void) gallivm_init_llvm_targets(); } +extern "C" +LLVMTargetLibraryInfoRef +gallivm_create_target_library_info(const char *triple) +{ + return reinterpret_cast<LLVMTargetLibraryInfoRef>( +#if HAVE_LLVM < 0x0307 + new llvm::TargetLibraryInfo( +#else + new llvm::TargetLibraryInfoImpl( +#endif + llvm::Triple(triple))); +} + +extern "C" +void +gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info) +{ + delete reinterpret_cast< +#if HAVE_LLVM < 0x0307 + llvm::TargetLibraryInfo +#else + llvm::TargetLibraryInfoImpl +#endif + *>(library_info); +} extern "C" LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_misc.h index 86d2f86ac45..30b7b1674af 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h @@ -32,6 +32,7 @@ #include "lp_bld.h" #include <llvm-c/ExecutionEngine.h> +#include <llvm-c/Target.h> #ifdef __cplusplus @@ -44,6 +45,12 @@ struct lp_generated_code; extern void gallivm_init_llvm_targets(void); +extern LLVMTargetLibraryInfoRef +gallivm_create_target_library_info(const char *triple); + +extern void +gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info); + extern void lp_set_target_options(void); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 8c39ab0afe9..a19be8a503a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -2592,7 +2592,10 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); } - /* XXX: for real msaa support, the w component would be the sample index. */ + /* + * XXX: for real msaa support, the w component (or src2.x for sample_i_ms) + * would be the sample index. + */ for (i = 0; i < dims; i++) { coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); @@ -2742,6 +2745,7 @@ near_end_of_shader(struct lp_build_tgsi_soa_context *bld, opcode == TGSI_OPCODE_SAMPLE_C_LZ || opcode == TGSI_OPCODE_SAMPLE_D || opcode == TGSI_OPCODE_SAMPLE_I || + opcode == TGSI_OPCODE_SAMPLE_I_MS || opcode == TGSI_OPCODE_SAMPLE_L || opcode == TGSI_OPCODE_SVIEWINFO || opcode == TGSI_OPCODE_CAL || @@ -3989,6 +3993,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit; bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit; bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit; + bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit; bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; diff --git a/src/gallium/auxiliary/hud/font.c b/src/gallium/auxiliary/hud/font.c index 60e8ae514a5..067de9e39c7 100644 --- a/src/gallium/auxiliary/hud/font.c +++ b/src/gallium/auxiliary/hud/font.c @@ -199,6 +199,7 @@ static const GLubyte Fixed8x13_Character_123[] = { 8, 0, 0, 0, 14, 16, 16, static const GLubyte Fixed8x13_Character_124[] = { 8, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 0}; static const GLubyte Fixed8x13_Character_125[] = { 8, 0, 0, 0,112, 8, 8, 16, 12, 16, 8, 8,112, 0, 0}; static const GLubyte Fixed8x13_Character_126[] = { 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 84, 36, 0, 0}; +#if 0 /* currently unused */ static const GLubyte Fixed8x13_Character_127[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0}; static const GLubyte Fixed8x13_Character_128[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0}; static const GLubyte Fixed8x13_Character_129[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0}; @@ -232,6 +233,7 @@ static const GLubyte Fixed8x13_Character_156[] = { 9, 0, 0, 0, 0, 0, 0,17 static const GLubyte Fixed8x13_Character_157[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0}; static const GLubyte Fixed8x13_Character_158[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0}; static const GLubyte Fixed8x13_Character_159[] = { 9, 0, 0, 0, 0, 0, 0,170, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,130, 0, 0, 0,170, 0, 0, 0, 0, 0}; +#endif static const GLubyte Fixed8x13_Character_160[] = { 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; static const GLubyte Fixed8x13_Character_161[] = { 8, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 0, 16, 0, 0}; static const GLubyte Fixed8x13_Character_162[] = { 8, 0, 0, 0, 0, 16, 56, 84, 80, 80, 84, 56, 16, 0, 0}; diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c index 75afebe4919..fb998349a35 100644 --- a/src/gallium/auxiliary/hud/hud_context.c +++ b/src/gallium/auxiliary/hud/hud_context.c @@ -460,25 +460,25 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex) hud->constants.two_div_fb_width = 2.0f / hud->fb_width; hud->constants.two_div_fb_height = 2.0f / hud->fb_height; - cso_save_framebuffer(cso); - cso_save_sample_mask(cso); - cso_save_min_samples(cso); - cso_save_blend(cso); - cso_save_depth_stencil_alpha(cso); - cso_save_fragment_shader(cso); - cso_save_fragment_sampler_views(cso); - cso_save_fragment_samplers(cso); - cso_save_rasterizer(cso); - cso_save_viewport(cso); - cso_save_stream_outputs(cso); - cso_save_geometry_shader(cso); - cso_save_tessctrl_shader(cso); - cso_save_tesseval_shader(cso); - cso_save_vertex_shader(cso); - cso_save_vertex_elements(cso); - cso_save_aux_vertex_buffer_slot(cso); + cso_save_state(cso, (CSO_BIT_FRAMEBUFFER | + CSO_BIT_SAMPLE_MASK | + CSO_BIT_MIN_SAMPLES | + CSO_BIT_BLEND | + CSO_BIT_DEPTH_STENCIL_ALPHA | + CSO_BIT_FRAGMENT_SHADER | + CSO_BIT_FRAGMENT_SAMPLER_VIEWS | + CSO_BIT_FRAGMENT_SAMPLERS | + CSO_BIT_RASTERIZER | + CSO_BIT_VIEWPORT | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_GEOMETRY_SHADER | + CSO_BIT_TESSCTRL_SHADER | + CSO_BIT_TESSEVAL_SHADER | + CSO_BIT_VERTEX_SHADER | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BIT_RENDER_CONDITION)); cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX); - cso_save_render_condition(cso); /* set states */ memset(&surf_templ, 0, sizeof(surf_templ)); @@ -591,26 +591,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex) hud_pane_draw_colored_objects(hud, pane); } - /* restore states */ - cso_restore_framebuffer(cso); - cso_restore_sample_mask(cso); - cso_restore_min_samples(cso); - cso_restore_blend(cso); - cso_restore_depth_stencil_alpha(cso); - cso_restore_fragment_shader(cso); - cso_restore_fragment_sampler_views(cso); - cso_restore_fragment_samplers(cso); - cso_restore_rasterizer(cso); - cso_restore_viewport(cso); - cso_restore_stream_outputs(cso); - cso_restore_tessctrl_shader(cso); - cso_restore_tesseval_shader(cso); - cso_restore_geometry_shader(cso); - cso_restore_vertex_shader(cso); - cso_restore_vertex_elements(cso); - cso_restore_aux_vertex_buffer_slot(cso); + cso_restore_state(cso); cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX); - cso_restore_render_condition(cso); pipe_surface_reference(&surf, NULL); } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 14de61b163f..023a028a1d0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -283,8 +283,8 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, return SUPER(mm); failure: -if(mm->heap) - u_mmDestroy(mm->heap); + if(mm->heap) + u_mmDestroy(mm->heap); if(mm->map) pb_unmap(mm->buffer); FREE(mm); diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c index c6c7b88eea3..9dc8fb51ae2 100644 --- a/src/gallium/auxiliary/postprocess/pp_run.c +++ b/src/gallium/auxiliary/postprocess/pp_run.c @@ -115,27 +115,27 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in, } /* save state (restored below) */ - cso_save_blend(cso); - cso_save_depth_stencil_alpha(cso); - cso_save_fragment_shader(cso); - cso_save_framebuffer(cso); - cso_save_tessctrl_shader(cso); - cso_save_tesseval_shader(cso); - cso_save_geometry_shader(cso); - cso_save_rasterizer(cso); - cso_save_sample_mask(cso); - cso_save_min_samples(cso); - cso_save_fragment_samplers(cso); - cso_save_fragment_sampler_views(cso); - cso_save_stencil_ref(cso); - cso_save_stream_outputs(cso); - cso_save_vertex_elements(cso); - cso_save_vertex_shader(cso); - cso_save_viewport(cso); - cso_save_aux_vertex_buffer_slot(cso); + cso_save_state(cso, (CSO_BIT_BLEND | + CSO_BIT_DEPTH_STENCIL_ALPHA | + CSO_BIT_FRAGMENT_SHADER | + CSO_BIT_FRAMEBUFFER | + CSO_BIT_TESSCTRL_SHADER | + CSO_BIT_TESSEVAL_SHADER | + CSO_BIT_GEOMETRY_SHADER | + CSO_BIT_RASTERIZER | + CSO_BIT_SAMPLE_MASK | + CSO_BIT_MIN_SAMPLES | + CSO_BIT_FRAGMENT_SAMPLERS | + CSO_BIT_FRAGMENT_SAMPLER_VIEWS | + CSO_BIT_STENCIL_REF | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_VERTEX_SHADER | + CSO_BIT_VIEWPORT | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BIT_RENDER_CONDITION)); cso_save_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX); cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT); - cso_save_render_condition(cso); /* set default state */ cso_set_sample_mask(cso, ~0); @@ -186,27 +186,9 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in, } /* restore state we changed */ - cso_restore_blend(cso); - cso_restore_depth_stencil_alpha(cso); - cso_restore_fragment_shader(cso); - cso_restore_framebuffer(cso); - cso_restore_tessctrl_shader(cso); - cso_restore_tesseval_shader(cso); - cso_restore_geometry_shader(cso); - cso_restore_rasterizer(cso); - cso_restore_sample_mask(cso); - cso_restore_min_samples(cso); - cso_restore_fragment_samplers(cso); - cso_restore_fragment_sampler_views(cso); - cso_restore_stencil_ref(cso); - cso_restore_stream_outputs(cso); - cso_restore_vertex_elements(cso); - cso_restore_vertex_shader(cso); - cso_restore_viewport(cso); - cso_restore_aux_vertex_buffer_slot(cso); + cso_restore_state(cso); cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_VERTEX); cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT); - cso_restore_render_condition(cso); pipe_resource_reference(&ppq->depth, NULL); pipe_resource_reference(&refin, NULL); diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 83f50628b40..cfe9b92ee1b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -111,6 +111,7 @@ tgsi_default_declaration( void ) declaration.Local = 0; declaration.Array = 0; declaration.Atomic = 0; + declaration.Shared = 0; declaration.Padding = 0; return declaration; diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 2ad29b9d49a..f232f3870d1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -352,7 +352,7 @@ iter_declaration( TXT(", "); ENM(decl->Image.Resource, tgsi_texture_names); TXT(", "); - UID(decl->Image.Format); + TXT(util_format_name(decl->Image.Format)); if (decl->Image.Writable) TXT(", WR"); if (decl->Image.Raw) @@ -364,6 +364,11 @@ iter_declaration( TXT(", ATOMIC"); } + if (decl->Declaration.File == TGSI_FILE_MEMORY) { + if (decl->Declaration.Shared) + TXT(", SHARED"); + } + if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { TXT(", "); ENM(decl->SamplerView.Resource, tgsi_texture_names); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index d898fd66f48..126259fc0f8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2300,7 +2300,8 @@ exec_txf(struct tgsi_exec_machine *mach, IFETCH(&r[3], 0, TGSI_CHAN_W); - if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { + if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || + inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { target = mach->SamplerViews[unit].Resource; } else { @@ -2342,7 +2343,8 @@ exec_txf(struct tgsi_exec_machine *mach, r[3].f[j] = rgba[3][j]; } - if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { + if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || + inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { unsigned char swizzles[4]; swizzles[0] = inst->Src[1].Register.SwizzleX; swizzles[1] = inst->Src[1].Register.SwizzleY; @@ -4967,7 +4969,7 @@ exec_instruction( break; case TGSI_OPCODE_SAMPLE_I_MS: - assert(0); + exec_txf(mach, inst); break; case TGSI_OPCODE_SAMPLE: diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 26fec8e2142..12a68759ce5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -465,6 +465,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return PIPE_MAX_SHADER_SAMPLER_VIEWS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 1 << PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; case PIPE_SHADER_CAP_DOUBLES: @@ -474,6 +476,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 489423d7f12..4f85d2fda67 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -247,7 +247,14 @@ scan_declaration(struct tgsi_shader_info *info, info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location; info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; - info->num_inputs++; + + /* Vertex shaders can have inputs with holes between them. */ + if (info->processor == TGSI_PROCESSOR_VERTEX) + info->num_inputs = MAX2(info->num_inputs, reg + 1); + else { + info->num_inputs++; + assert(reg < info->num_inputs); + } /* Only interpolated varyings. Don't include POSITION. * Don't include integer varyings, because they are not @@ -341,6 +348,7 @@ scan_declaration(struct tgsi_shader_info *info, info->output_semantic_name[reg] = (ubyte) semName; info->output_semantic_index[reg] = (ubyte) semIndex; info->num_outputs++; + assert(reg < info->num_outputs); if (semName == TGSI_SEMANTIC_COLOR) info->colors_written |= 1 << semIndex; diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index f2d70d49839..b15ae69cf7a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -57,6 +57,7 @@ static const char *tgsi_file_names[] = "IMAGE", "SVIEW", "BUFFER", + "MEMORY", }; const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 97b1869a66f..91baa01ad8b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -1290,8 +1290,6 @@ static boolean parse_declaration( struct translate_ctx *ctx ) return FALSE; } - /* XXX format */ - cur2 = cur; eat_opt_white(&cur2); while (*cur2 == ',') { @@ -1304,7 +1302,16 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl.Image.Writable = 1; } else { - break; + for (i = 0; i < PIPE_FORMAT_COUNT; i++) { + const struct util_format_description *desc = + util_format_description(i); + if (desc && str_match_nocase_whole(&cur2, desc->name)) { + decl.Image.Format = i; + break; + } + } + if (i == PIPE_FORMAT_COUNT) + break; } cur = cur2; eat_opt_white(&cur2); @@ -1381,6 +1388,9 @@ static boolean parse_declaration( struct translate_ctx *ctx ) if (str_match_nocase_whole(&cur, "ATOMIC")) { decl.Declaration.Atomic = 1; ctx->cur = cur; + } else if (str_match_nocase_whole(&cur, "SHARED")) { + decl.Declaration.Shared = 1; + ctx->cur = cur; } } else { if (str_match_nocase_whole(&cur, "LOCAL")) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 9654ac52bf2..e1a72786476 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -189,6 +189,8 @@ struct ureg_program unsigned nr_instructions; struct ureg_tokens domain[2]; + + bool use_shared_memory; }; static union tgsi_any_token error_tokens[32]; @@ -727,6 +729,16 @@ struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, return reg; } +/* Allocate a shared memory area. + */ +struct ureg_src ureg_DECL_shared_memory(struct ureg_program *ureg) +{ + struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, 0); + + ureg->use_shared_memory = true; + return reg; +} + static int match_or_expand_immediate64( const unsigned *v, int type, @@ -1654,6 +1666,23 @@ emit_decl_buffer(struct ureg_program *ureg, } static void +emit_decl_shared_memory(struct ureg_program *ureg) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 2; + out[0].decl.File = TGSI_FILE_MEMORY; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + out[0].decl.Shared = true; + + out[1].value = 0; + out[1].decl_range.First = 0; + out[1].decl_range.Last = 0; +} + +static void emit_immediate( struct ureg_program *ureg, const unsigned *v, unsigned type ) @@ -1825,6 +1854,9 @@ static void emit_decls( struct ureg_program *ureg ) emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic); } + if (ureg->use_shared_memory) + emit_decl_shared_memory(ureg); + if (ureg->const_decls.nr_constant_ranges) { for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { emit_decl_range(ureg, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 86e58a91343..6a3b5ddf017 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -337,6 +337,9 @@ ureg_DECL_image(struct ureg_program *ureg, struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic); +struct ureg_src +ureg_DECL_shared_memory(struct ureg_program *ureg); + static inline struct ureg_src ureg_imm4f( struct ureg_program *ureg, float a, float b, diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 9737c940936..22c40d1382d 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -541,23 +541,23 @@ util_blit_pixels_tex(struct blit_state *ctx, PIPE_BIND_RENDER_TARGET)); /* save state (restored below) */ - cso_save_blend(ctx->cso); - cso_save_depth_stencil_alpha(ctx->cso); - cso_save_rasterizer(ctx->cso); - cso_save_sample_mask(ctx->cso); - cso_save_min_samples(ctx->cso); - cso_save_fragment_samplers(ctx->cso); - cso_save_fragment_sampler_views(ctx->cso); - cso_save_stream_outputs(ctx->cso); - cso_save_viewport(ctx->cso); - cso_save_framebuffer(ctx->cso); - cso_save_fragment_shader(ctx->cso); - cso_save_vertex_shader(ctx->cso); - cso_save_tessctrl_shader(ctx->cso); - cso_save_tesseval_shader(ctx->cso); - cso_save_geometry_shader(ctx->cso); - cso_save_vertex_elements(ctx->cso); - cso_save_aux_vertex_buffer_slot(ctx->cso); + cso_save_state(ctx->cso, (CSO_BIT_BLEND | + CSO_BIT_DEPTH_STENCIL_ALPHA | + CSO_BIT_RASTERIZER | + CSO_BIT_SAMPLE_MASK | + CSO_BIT_MIN_SAMPLES | + CSO_BIT_FRAGMENT_SAMPLERS | + CSO_BIT_FRAGMENT_SAMPLER_VIEWS | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_VIEWPORT | + CSO_BIT_FRAMEBUFFER | + CSO_BIT_FRAGMENT_SHADER | + CSO_BIT_VERTEX_SHADER | + CSO_BIT_TESSCTRL_SHADER | + CSO_BIT_TESSEVAL_SHADER | + CSO_BIT_GEOMETRY_SHADER | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT)); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend_write_color); @@ -625,21 +625,5 @@ util_blit_pixels_tex(struct blit_state *ctx, 2); /* attribs/vert */ /* restore state we changed */ - cso_restore_blend(ctx->cso); - cso_restore_depth_stencil_alpha(ctx->cso); - cso_restore_rasterizer(ctx->cso); - cso_restore_sample_mask(ctx->cso); - cso_restore_min_samples(ctx->cso); - cso_restore_fragment_samplers(ctx->cso); - cso_restore_fragment_sampler_views(ctx->cso); - cso_restore_viewport(ctx->cso); - cso_restore_framebuffer(ctx->cso); - cso_restore_fragment_shader(ctx->cso); - cso_restore_vertex_shader(ctx->cso); - cso_restore_tessctrl_shader(ctx->cso); - cso_restore_tesseval_shader(ctx->cso); - cso_restore_geometry_shader(ctx->cso); - cso_restore_vertex_elements(ctx->cso); - cso_restore_aux_vertex_buffer_slot(ctx->cso); - cso_restore_stream_outputs(ctx->cso); + cso_restore_state(ctx->cso); } diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 57a3b0b6082..d0812039292 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -174,17 +174,6 @@ pipe_sampler_view_release(struct pipe_context *ctx, } static inline void -pipe_image_view_reference(struct pipe_image_view **ptr, struct pipe_image_view *view) -{ - struct pipe_image_view *old_view = *ptr; - - if (pipe_reference_described(&(*ptr)->reference, &view->reference, - (debug_reference_descriptor)debug_describe_image_view)) - old_view->context->image_view_destroy(old_view->context, old_view); - *ptr = view; -} - -static inline void pipe_so_target_reference(struct pipe_stream_output_target **ptr, struct pipe_stream_output_target *target) { diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 3324bcca6f4..b5d691f4f7e 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -415,6 +415,9 @@ to be 0. (also used to implement atomic counters). Having this be non-0 also implies support for the ``LOAD``, ``STORE``, and ``ATOM*`` TGSI opcodes. +* ``PIPE_SHADER_CAP_SUPPORTED_IRS``: Supported representations of the + program. It should be a mask of ``pipe_shader_ir`` bits. +* ``PIPE_SHADER_CAP_MAX_SHADER_IMAGES``: Maximum number of image units. .. _pipe_compute_cap: diff --git a/src/gallium/drivers/ddebug/dd_context.c b/src/gallium/drivers/ddebug/dd_context.c index 3ae7764ff3f..9dfaa0af289 100644 --- a/src/gallium/drivers/ddebug/dd_context.c +++ b/src/gallium/drivers/ddebug/dd_context.c @@ -415,30 +415,6 @@ dd_context_sampler_view_destroy(struct pipe_context *_pipe, pipe->sampler_view_destroy(pipe, view); } -static struct pipe_image_view * -dd_context_create_image_view(struct pipe_context *_pipe, - struct pipe_resource *resource, - const struct pipe_image_view *templ) -{ - struct pipe_context *pipe = dd_context(_pipe)->pipe; - struct pipe_image_view *view = - pipe->create_image_view(pipe, resource, templ); - - if (!view) - return NULL; - view->context = _pipe; - return view; -} - -static void -dd_context_image_view_destroy(struct pipe_context *_pipe, - struct pipe_image_view *view) -{ - struct pipe_context *pipe = dd_context(_pipe)->pipe; - - pipe->image_view_destroy(pipe, view); -} - static struct pipe_stream_output_target * dd_context_create_stream_output_target(struct pipe_context *_pipe, struct pipe_resource *res, @@ -486,7 +462,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader, static void dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader, unsigned start, unsigned num, - struct pipe_image_view **views) + struct pipe_image_view *views) { struct dd_context *dctx = dd_context(_pipe); struct pipe_context *pipe = dctx->pipe; @@ -744,8 +720,6 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe) CTX_INIT(sampler_view_destroy); CTX_INIT(create_surface); CTX_INIT(surface_destroy); - CTX_INIT(create_image_view); - CTX_INIT(image_view_destroy); CTX_INIT(transfer_map); CTX_INIT(transfer_flush_region); CTX_INIT(transfer_unmap); diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h index 80098dcb644..c9bbd569abe 100644 --- a/src/gallium/drivers/ddebug/dd_pipe.h +++ b/src/gallium/drivers/ddebug/dd_pipe.h @@ -94,7 +94,7 @@ struct dd_context struct pipe_constant_buffer constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; struct dd_state *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; - struct pipe_image_view *shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES]; + struct pipe_image_view shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES]; struct pipe_shader_buffer shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS]; struct dd_state *velems; diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index d23111352b7..71ee55054d3 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: - Rob Clark <[email protected]> (robclark) +- Ilia Mirkin <[email protected]> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index c4f253b836c..c6286a1f290 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <[email protected]> (robclark) +- Ilia Mirkin <[email protected]> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -255,11 +256,273 @@ enum a3xx_color_fmt { RB_R32G32B32A32_UINT = 59, }; +enum a3xx_cp_perfcounter_select { + CP_ALWAYS_COUNT = 0, + CP_AHB_PFPTRANS_WAIT = 3, + CP_AHB_NRTTRANS_WAIT = 6, + CP_CSF_NRT_READ_WAIT = 8, + CP_CSF_I1_FIFO_FULL = 9, + CP_CSF_I2_FIFO_FULL = 10, + CP_CSF_ST_FIFO_FULL = 11, + CP_RESERVED_12 = 12, + CP_CSF_RING_ROQ_FULL = 13, + CP_CSF_I1_ROQ_FULL = 14, + CP_CSF_I2_ROQ_FULL = 15, + CP_CSF_ST_ROQ_FULL = 16, + CP_RESERVED_17 = 17, + CP_MIU_TAG_MEM_FULL = 18, + CP_MIU_NRT_WRITE_STALLED = 22, + CP_MIU_NRT_READ_STALLED = 23, + CP_ME_REGS_RB_DONE_FIFO_FULL = 26, + CP_ME_REGS_VS_EVENT_FIFO_FULL = 27, + CP_ME_REGS_PS_EVENT_FIFO_FULL = 28, + CP_ME_REGS_CF_EVENT_FIFO_FULL = 29, + CP_ME_MICRO_RB_STARVED = 30, + CP_AHB_RBBM_DWORD_SENT = 40, + CP_ME_BUSY_CLOCKS = 41, + CP_ME_WAIT_CONTEXT_AVAIL = 42, + CP_PFP_TYPE0_PACKET = 43, + CP_PFP_TYPE3_PACKET = 44, + CP_CSF_RB_WPTR_NEQ_RPTR = 45, + CP_CSF_I1_SIZE_NEQ_ZERO = 46, + CP_CSF_I2_SIZE_NEQ_ZERO = 47, + CP_CSF_RBI1I2_FETCHING = 48, +}; + +enum a3xx_gras_tse_perfcounter_select { + GRAS_TSEPERF_INPUT_PRIM = 0, + GRAS_TSEPERF_INPUT_NULL_PRIM = 1, + GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2, + GRAS_TSEPERF_CLIPPED_PRIM = 3, + GRAS_TSEPERF_NEW_PRIM = 4, + GRAS_TSEPERF_ZERO_AREA_PRIM = 5, + GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6, + GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7, + GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8, + GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9, + GRAS_TSEPERF_PRE_CLIP_PRIM = 10, + GRAS_TSEPERF_POST_CLIP_PRIM = 11, + GRAS_TSEPERF_WORKING_CYCLES = 12, + GRAS_TSEPERF_PC_STARVE = 13, + GRAS_TSERASPERF_STALL = 14, +}; + +enum a3xx_gras_ras_perfcounter_select { + GRAS_RASPERF_16X16_TILES = 0, + GRAS_RASPERF_8X8_TILES = 1, + GRAS_RASPERF_4X4_TILES = 2, + GRAS_RASPERF_WORKING_CYCLES = 3, + GRAS_RASPERF_STALL_CYCLES_BY_RB = 4, + GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5, + GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6, +}; + +enum a3xx_hlsq_perfcounter_select { + HLSQ_PERF_SP_VS_CONSTANT = 0, + HLSQ_PERF_SP_VS_INSTRUCTIONS = 1, + HLSQ_PERF_SP_FS_CONSTANT = 2, + HLSQ_PERF_SP_FS_INSTRUCTIONS = 3, + HLSQ_PERF_TP_STATE = 4, + HLSQ_PERF_QUADS = 5, + HLSQ_PERF_PIXELS = 6, + HLSQ_PERF_VERTICES = 7, + HLSQ_PERF_FS8_THREADS = 8, + HLSQ_PERF_FS16_THREADS = 9, + HLSQ_PERF_FS32_THREADS = 10, + HLSQ_PERF_VS8_THREADS = 11, + HLSQ_PERF_VS16_THREADS = 12, + HLSQ_PERF_SP_VS_DATA_BYTES = 13, + HLSQ_PERF_SP_FS_DATA_BYTES = 14, + HLSQ_PERF_ACTIVE_CYCLES = 15, + HLSQ_PERF_STALL_CYCLES_SP_STATE = 16, + HLSQ_PERF_STALL_CYCLES_SP_VS = 17, + HLSQ_PERF_STALL_CYCLES_SP_FS = 18, + HLSQ_PERF_STALL_CYCLES_UCHE = 19, + HLSQ_PERF_RBBM_LOAD_CYCLES = 20, + HLSQ_PERF_DI_TO_VS_START_SP0 = 21, + HLSQ_PERF_DI_TO_FS_START_SP0 = 22, + HLSQ_PERF_VS_START_TO_DONE_SP0 = 23, + HLSQ_PERF_FS_START_TO_DONE_SP0 = 24, + HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25, + HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26, + HLSQ_PERF_UCHE_LATENCY_CYCLES = 27, + HLSQ_PERF_UCHE_LATENCY_COUNT = 28, +}; + +enum a3xx_pc_perfcounter_select { + PC_PCPERF_VISIBILITY_STREAMS = 0, + PC_PCPERF_TOTAL_INSTANCES = 1, + PC_PCPERF_PRIMITIVES_PC_VPC = 2, + PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3, + PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4, + PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5, + PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6, + PC_PCPERF_VERTICES_TO_VFD = 7, + PC_PCPERF_REUSED_VERTICES = 8, + PC_PCPERF_CYCLES_STALLED_BY_VFD = 9, + PC_PCPERF_CYCLES_STALLED_BY_TSE = 10, + PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11, + PC_PCPERF_CYCLES_IS_WORKING = 12, +}; + +enum a3xx_rb_perfcounter_select { + RB_RBPERF_ACTIVE_CYCLES_ANY = 0, + RB_RBPERF_ACTIVE_CYCLES_ALL = 1, + RB_RBPERF_STARVE_CYCLES_BY_SP = 2, + RB_RBPERF_STARVE_CYCLES_BY_RAS = 3, + RB_RBPERF_STARVE_CYCLES_BY_MARB = 4, + RB_RBPERF_STALL_CYCLES_BY_MARB = 5, + RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6, + RB_RBPERF_RB_MARB_DATA = 7, + RB_RBPERF_SP_RB_QUAD = 8, + RB_RBPERF_RAS_EARLY_Z_QUADS = 9, + RB_RBPERF_GMEM_CH0_READ = 10, + RB_RBPERF_GMEM_CH1_READ = 11, + RB_RBPERF_GMEM_CH0_WRITE = 12, + RB_RBPERF_GMEM_CH1_WRITE = 13, + RB_RBPERF_CP_CONTEXT_DONE = 14, + RB_RBPERF_CP_CACHE_FLUSH = 15, + RB_RBPERF_CP_ZPASS_DONE = 16, +}; + +enum a3xx_rbbm_perfcounter_select { + RBBM_ALAWYS_ON = 0, + RBBM_VBIF_BUSY = 1, + RBBM_TSE_BUSY = 2, + RBBM_RAS_BUSY = 3, + RBBM_PC_DCALL_BUSY = 4, + RBBM_PC_VSD_BUSY = 5, + RBBM_VFD_BUSY = 6, + RBBM_VPC_BUSY = 7, + RBBM_UCHE_BUSY = 8, + RBBM_VSC_BUSY = 9, + RBBM_HLSQ_BUSY = 10, + RBBM_ANY_RB_BUSY = 11, + RBBM_ANY_TEX_BUSY = 12, + RBBM_ANY_USP_BUSY = 13, + RBBM_ANY_MARB_BUSY = 14, + RBBM_ANY_ARB_BUSY = 15, + RBBM_AHB_STATUS_BUSY = 16, + RBBM_AHB_STATUS_STALLED = 17, + RBBM_AHB_STATUS_TXFR = 18, + RBBM_AHB_STATUS_TXFR_SPLIT = 19, + RBBM_AHB_STATUS_TXFR_ERROR = 20, + RBBM_AHB_STATUS_LONG_STALL = 21, + RBBM_RBBM_STATUS_MASKED = 22, +}; + enum a3xx_sp_perfcounter_select { + SP_LM_LOAD_INSTRUCTIONS = 0, + SP_LM_STORE_INSTRUCTIONS = 1, + SP_LM_ATOMICS = 2, + SP_UCHE_LOAD_INSTRUCTIONS = 3, + SP_UCHE_STORE_INSTRUCTIONS = 4, + SP_UCHE_ATOMICS = 5, + SP_VS_TEX_INSTRUCTIONS = 6, + SP_VS_CFLOW_INSTRUCTIONS = 7, + SP_VS_EFU_INSTRUCTIONS = 8, + SP_VS_FULL_ALU_INSTRUCTIONS = 9, + SP_VS_HALF_ALU_INSTRUCTIONS = 10, + SP_FS_TEX_INSTRUCTIONS = 11, SP_FS_CFLOW_INSTRUCTIONS = 12, + SP_FS_EFU_INSTRUCTIONS = 13, SP_FS_FULL_ALU_INSTRUCTIONS = 14, - SP0_ICL1_MISSES = 26, + SP_FS_HALF_ALU_INSTRUCTIONS = 15, + SP_FS_BARY_INSTRUCTIONS = 16, + SP_VS_INSTRUCTIONS = 17, + SP_FS_INSTRUCTIONS = 18, + SP_ADDR_LOCK_COUNT = 19, + SP_UCHE_READ_TRANS = 20, + SP_UCHE_WRITE_TRANS = 21, + SP_EXPORT_VPC_TRANS = 22, + SP_EXPORT_RB_TRANS = 23, + SP_PIXELS_KILLED = 24, + SP_ICL1_REQUESTS = 25, + SP_ICL1_MISSES = 26, + SP_ICL0_REQUESTS = 27, + SP_ICL0_MISSES = 28, SP_ALU_ACTIVE_CYCLES = 29, + SP_EFU_ACTIVE_CYCLES = 30, + SP_STALL_CYCLES_BY_VPC = 31, + SP_STALL_CYCLES_BY_TP = 32, + SP_STALL_CYCLES_BY_UCHE = 33, + SP_STALL_CYCLES_BY_RB = 34, + SP_ACTIVE_CYCLES_ANY = 35, + SP_ACTIVE_CYCLES_ALL = 36, +}; + +enum a3xx_tp_perfcounter_select { + TPL1_TPPERF_L1_REQUESTS = 0, + TPL1_TPPERF_TP0_L1_REQUESTS = 1, + TPL1_TPPERF_TP0_L1_MISSES = 2, + TPL1_TPPERF_TP1_L1_REQUESTS = 3, + TPL1_TPPERF_TP1_L1_MISSES = 4, + TPL1_TPPERF_TP2_L1_REQUESTS = 5, + TPL1_TPPERF_TP2_L1_MISSES = 6, + TPL1_TPPERF_TP3_L1_REQUESTS = 7, + TPL1_TPPERF_TP3_L1_MISSES = 8, + TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9, + TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10, + TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11, + TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12, + TPL1_TPPERF_BILINEAR_OPS = 13, + TPL1_TPPERF_QUADSQUADS_OFFSET = 14, + TPL1_TPPERF_QUADQUADS_SHADOW = 15, + TPL1_TPPERF_QUADS_ARRAY = 16, + TPL1_TPPERF_QUADS_PROJECTION = 17, + TPL1_TPPERF_QUADS_GRADIENT = 18, + TPL1_TPPERF_QUADS_1D2D = 19, + TPL1_TPPERF_QUADS_3DCUBE = 20, + TPL1_TPPERF_ZERO_LOD = 21, + TPL1_TPPERF_OUTPUT_TEXELS = 22, + TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23, + TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24, + TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25, + TPL1_TPPERF_LATENCY = 26, + TPL1_TPPERF_LATENCY_TRANS = 27, +}; + +enum a3xx_vfd_perfcounter_select { + VFD_PERF_UCHE_BYTE_FETCHED = 0, + VFD_PERF_UCHE_TRANS = 1, + VFD_PERF_VPC_BYPASS_COMPONENTS = 2, + VFD_PERF_FETCH_INSTRUCTIONS = 3, + VFD_PERF_DECODE_INSTRUCTIONS = 4, + VFD_PERF_ACTIVE_CYCLES = 5, + VFD_PERF_STALL_CYCLES_UCHE = 6, + VFD_PERF_STALL_CYCLES_HLSQ = 7, + VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8, + VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9, +}; + +enum a3xx_vpc_perfcounter_select { + VPC_PERF_SP_LM_PRIMITIVES = 0, + VPC_PERF_COMPONENTS_FROM_SP = 1, + VPC_PERF_SP_LM_COMPONENTS = 2, + VPC_PERF_ACTIVE_CYCLES = 3, + VPC_PERF_STALL_CYCLES_LM = 4, + VPC_PERF_STALL_CYCLES_RAS = 5, +}; + +enum a3xx_uche_perfcounter_select { + UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0, + UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1, + UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2, + UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3, + UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4, + UCHE_UCHEPERF_READ_REQUESTS_TP = 8, + UCHE_UCHEPERF_READ_REQUESTS_VFD = 9, + UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10, + UCHE_UCHEPERF_READ_REQUESTS_MARB = 11, + UCHE_UCHEPERF_READ_REQUESTS_SP = 12, + UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13, + UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14, + UCHE_UCHEPERF_TAG_CHECK_FAILS = 15, + UCHE_UCHEPERF_EVICTS = 16, + UCHE_UCHEPERF_FLUSHES = 17, + UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18, + UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19, + UCHE_UCHEPERF_ACTIVE_CYCLES = 20, }; enum a3xx_rb_blend_opcode { diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 811f58bbba2..8c37992e17d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -33,6 +33,7 @@ #include "util/u_format.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" #include "fd3_emit.h" #include "fd3_blend.h" @@ -888,6 +889,8 @@ fd3_emit_restore(struct fd_context *ctx) fd_wfi(ctx, ring); + fd_hw_query_enable(ctx, ring); + ctx->needs_rb_fbd = true; } diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index e8df429441e..d6fd1bb583e 100644 --- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <[email protected]> (robclark) +- Ilia Mirkin <[email protected]> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -271,6 +272,545 @@ enum a4xx_tess_spacing { EVEN_SPACING = 3, }; +enum a4xx_ccu_perfcounter_select { + CCU_BUSY_CYCLES = 0, + CCU_RB_DEPTH_RETURN_STALL = 2, + CCU_RB_COLOR_RETURN_STALL = 3, + CCU_DEPTH_BLOCKS = 6, + CCU_COLOR_BLOCKS = 7, + CCU_DEPTH_BLOCK_HIT = 8, + CCU_COLOR_BLOCK_HIT = 9, + CCU_DEPTH_FLAG1_COUNT = 10, + CCU_DEPTH_FLAG2_COUNT = 11, + CCU_DEPTH_FLAG3_COUNT = 12, + CCU_DEPTH_FLAG4_COUNT = 13, + CCU_COLOR_FLAG1_COUNT = 14, + CCU_COLOR_FLAG2_COUNT = 15, + CCU_COLOR_FLAG3_COUNT = 16, + CCU_COLOR_FLAG4_COUNT = 17, + CCU_PARTIAL_BLOCK_READ = 18, +}; + +enum a4xx_cp_perfcounter_select { + CP_ALWAYS_COUNT = 0, + CP_BUSY = 1, + CP_PFP_IDLE = 2, + CP_PFP_BUSY_WORKING = 3, + CP_PFP_STALL_CYCLES_ANY = 4, + CP_PFP_STARVE_CYCLES_ANY = 5, + CP_PFP_STARVED_PER_LOAD_ADDR = 6, + CP_PFP_STALLED_PER_STORE_ADDR = 7, + CP_PFP_PC_PROFILE = 8, + CP_PFP_MATCH_PM4_PKT_PROFILE = 9, + CP_PFP_COND_INDIRECT_DISCARDED = 10, + CP_LONG_RESUMPTIONS = 11, + CP_RESUME_CYCLES = 12, + CP_RESUME_TO_BOUNDARY_CYCLES = 13, + CP_LONG_PREEMPTIONS = 14, + CP_PREEMPT_CYCLES = 15, + CP_PREEMPT_TO_BOUNDARY_CYCLES = 16, + CP_ME_FIFO_EMPTY_PFP_IDLE = 17, + CP_ME_FIFO_EMPTY_PFP_BUSY = 18, + CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19, + CP_ME_FIFO_FULL_ME_BUSY = 20, + CP_ME_FIFO_FULL_ME_NON_WORKING = 21, + CP_ME_WAITING_FOR_PACKETS = 22, + CP_ME_BUSY_WORKING = 23, + CP_ME_STARVE_CYCLES_ANY = 24, + CP_ME_STARVE_CYCLES_PER_PROFILE = 25, + CP_ME_STALL_CYCLES_PER_PROFILE = 26, + CP_ME_PC_PROFILE = 27, + CP_RCIU_FIFO_EMPTY = 28, + CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29, + CP_RCIU_FIFO_FULL = 30, + CP_RCIU_FIFO_FULL_NO_CONTEXT = 31, + CP_RCIU_FIFO_FULL_AHB_MASTER = 32, + CP_RCIU_FIFO_FULL_OTHER = 33, + CP_AHB_IDLE = 34, + CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35, + CP_AHB_STALL_ON_GRANT_SPLIT = 36, + CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37, + CP_AHB_BUSY_WORKING = 38, + CP_AHB_BUSY_STALL_ON_HRDY = 39, + CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40, +}; + +enum a4xx_gras_ras_perfcounter_select { + RAS_SUPER_TILES = 0, + RAS_8X8_TILES = 1, + RAS_4X4_TILES = 2, + RAS_BUSY_CYCLES = 3, + RAS_STALL_CYCLES_BY_RB = 4, + RAS_STALL_CYCLES_BY_VSC = 5, + RAS_STARVE_CYCLES_BY_TSE = 6, + RAS_SUPERTILE_CYCLES = 7, + RAS_TILE_CYCLES = 8, + RAS_FULLY_COVERED_SUPER_TILES = 9, + RAS_FULLY_COVERED_8X8_TILES = 10, + RAS_4X4_PRIM = 11, + RAS_8X4_4X8_PRIM = 12, + RAS_8X8_PRIM = 13, +}; + +enum a4xx_gras_tse_perfcounter_select { + TSE_INPUT_PRIM = 0, + TSE_INPUT_NULL_PRIM = 1, + TSE_TRIVAL_REJ_PRIM = 2, + TSE_CLIPPED_PRIM = 3, + TSE_NEW_PRIM = 4, + TSE_ZERO_AREA_PRIM = 5, + TSE_FACENESS_CULLED_PRIM = 6, + TSE_ZERO_PIXEL_PRIM = 7, + TSE_OUTPUT_NULL_PRIM = 8, + TSE_OUTPUT_VISIBLE_PRIM = 9, + TSE_PRE_CLIP_PRIM = 10, + TSE_POST_CLIP_PRIM = 11, + TSE_BUSY_CYCLES = 12, + TSE_PC_STARVE = 13, + TSE_RAS_STALL = 14, + TSE_STALL_BARYPLANE_FIFO_FULL = 15, + TSE_STALL_ZPLANE_FIFO_FULL = 16, +}; + +enum a4xx_hlsq_perfcounter_select { + HLSQ_SP_VS_STAGE_CONSTANT = 0, + HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1, + HLSQ_SP_FS_STAGE_CONSTANT = 2, + HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3, + HLSQ_TP_STATE = 4, + HLSQ_QUADS = 5, + HLSQ_PIXELS = 6, + HLSQ_VERTICES = 7, + HLSQ_SP_VS_STAGE_DATA_BYTES = 13, + HLSQ_SP_FS_STAGE_DATA_BYTES = 14, + HLSQ_BUSY_CYCLES = 15, + HLSQ_STALL_CYCLES_SP_STATE = 16, + HLSQ_STALL_CYCLES_SP_VS_STAGE = 17, + HLSQ_STALL_CYCLES_SP_FS_STAGE = 18, + HLSQ_STALL_CYCLES_UCHE = 19, + HLSQ_RBBM_LOAD_CYCLES = 20, + HLSQ_DI_TO_VS_START_SP = 21, + HLSQ_DI_TO_FS_START_SP = 22, + HLSQ_VS_STAGE_START_TO_DONE_SP = 23, + HLSQ_FS_STAGE_START_TO_DONE_SP = 24, + HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25, + HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26, + HLSQ_UCHE_LATENCY_CYCLES = 27, + HLSQ_UCHE_LATENCY_COUNT = 28, + HLSQ_STARVE_CYCLES_VFD = 29, +}; + +enum a4xx_pc_perfcounter_select { + PC_VIS_STREAMS_LOADED = 0, + PC_VPC_PRIMITIVES = 2, + PC_DEAD_PRIM = 3, + PC_LIVE_PRIM = 4, + PC_DEAD_DRAWCALLS = 5, + PC_LIVE_DRAWCALLS = 6, + PC_VERTEX_MISSES = 7, + PC_STALL_CYCLES_VFD = 9, + PC_STALL_CYCLES_TSE = 10, + PC_STALL_CYCLES_UCHE = 11, + PC_WORKING_CYCLES = 12, + PC_IA_VERTICES = 13, + PC_GS_PRIMITIVES = 14, + PC_HS_INVOCATIONS = 15, + PC_DS_INVOCATIONS = 16, + PC_DS_PRIMITIVES = 17, + PC_STARVE_CYCLES_FOR_INDEX = 20, + PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21, + PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22, + PC_STALL_CYCLES_TESS = 23, + PC_STARVE_CYCLES_FOR_POSITION = 24, + PC_MODE0_DRAWCALL = 25, + PC_MODE1_DRAWCALL = 26, + PC_MODE2_DRAWCALL = 27, + PC_MODE3_DRAWCALL = 28, + PC_MODE4_DRAWCALL = 29, + PC_PREDICATED_DEAD_DRAWCALL = 30, + PC_STALL_CYCLES_BY_TSE_ONLY = 31, + PC_STALL_CYCLES_BY_VPC_ONLY = 32, + PC_VPC_POS_DATA_TRANSACTION = 33, + PC_BUSY_CYCLES = 34, + PC_STARVE_CYCLES_DI = 35, + PC_STALL_CYCLES_VPC = 36, + TESS_WORKING_CYCLES = 37, + TESS_NUM_CYCLES_SETUP_WORKING = 38, + TESS_NUM_CYCLES_PTGEN_WORKING = 39, + TESS_NUM_CYCLES_CONNGEN_WORKING = 40, + TESS_BUSY_CYCLES = 41, + TESS_STARVE_CYCLES_PC = 42, + TESS_STALL_CYCLES_PC = 43, +}; + +enum a4xx_pwr_perfcounter_select { + PWR_CORE_CLOCK_CYCLES = 0, + PWR_BUSY_CLOCK_CYCLES = 1, +}; + +enum a4xx_rb_perfcounter_select { + RB_BUSY_CYCLES = 0, + RB_BUSY_CYCLES_BINNING = 1, + RB_BUSY_CYCLES_RENDERING = 2, + RB_BUSY_CYCLES_RESOLVE = 3, + RB_STARVE_CYCLES_BY_SP = 4, + RB_STARVE_CYCLES_BY_RAS = 5, + RB_STARVE_CYCLES_BY_MARB = 6, + RB_STALL_CYCLES_BY_MARB = 7, + RB_STALL_CYCLES_BY_HLSQ = 8, + RB_RB_RB_MARB_DATA = 9, + RB_SP_RB_QUAD = 10, + RB_RAS_RB_Z_QUADS = 11, + RB_GMEM_CH0_READ = 12, + RB_GMEM_CH1_READ = 13, + RB_GMEM_CH0_WRITE = 14, + RB_GMEM_CH1_WRITE = 15, + RB_CP_CONTEXT_DONE = 16, + RB_CP_CACHE_FLUSH = 17, + RB_CP_ZPASS_DONE = 18, + RB_STALL_FIFO0_FULL = 19, + RB_STALL_FIFO1_FULL = 20, + RB_STALL_FIFO2_FULL = 21, + RB_STALL_FIFO3_FULL = 22, + RB_RB_HLSQ_TRANSACTIONS = 23, + RB_Z_READ = 24, + RB_Z_WRITE = 25, + RB_C_READ = 26, + RB_C_WRITE = 27, + RB_C_READ_LATENCY = 28, + RB_Z_READ_LATENCY = 29, + RB_STALL_BY_UCHE = 30, + RB_MARB_UCHE_TRANSACTIONS = 31, + RB_CACHE_STALL_MISS = 32, + RB_CACHE_STALL_FIFO_FULL = 33, + RB_8BIT_BLENDER_UNITS_ACTIVE = 34, + RB_16BIT_BLENDER_UNITS_ACTIVE = 35, + RB_SAMPLER_UNITS_ACTIVE = 36, + RB_TOTAL_PASS = 38, + RB_Z_PASS = 39, + RB_Z_FAIL = 40, + RB_S_FAIL = 41, + RB_POWER0 = 42, + RB_POWER1 = 43, + RB_POWER2 = 44, + RB_POWER3 = 45, + RB_POWER4 = 46, + RB_POWER5 = 47, + RB_POWER6 = 48, + RB_POWER7 = 49, +}; + +enum a4xx_rbbm_perfcounter_select { + RBBM_ALWAYS_ON = 0, + RBBM_VBIF_BUSY = 1, + RBBM_TSE_BUSY = 2, + RBBM_RAS_BUSY = 3, + RBBM_PC_DCALL_BUSY = 4, + RBBM_PC_VSD_BUSY = 5, + RBBM_VFD_BUSY = 6, + RBBM_VPC_BUSY = 7, + RBBM_UCHE_BUSY = 8, + RBBM_VSC_BUSY = 9, + RBBM_HLSQ_BUSY = 10, + RBBM_ANY_RB_BUSY = 11, + RBBM_ANY_TPL1_BUSY = 12, + RBBM_ANY_SP_BUSY = 13, + RBBM_ANY_MARB_BUSY = 14, + RBBM_ANY_ARB_BUSY = 15, + RBBM_AHB_STATUS_BUSY = 16, + RBBM_AHB_STATUS_STALLED = 17, + RBBM_AHB_STATUS_TXFR = 18, + RBBM_AHB_STATUS_TXFR_SPLIT = 19, + RBBM_AHB_STATUS_TXFR_ERROR = 20, + RBBM_AHB_STATUS_LONG_STALL = 21, + RBBM_STATUS_MASKED = 22, + RBBM_CP_BUSY_GFX_CORE_IDLE = 23, + RBBM_TESS_BUSY = 24, + RBBM_COM_BUSY = 25, + RBBM_DCOM_BUSY = 32, + RBBM_ANY_CCU_BUSY = 33, + RBBM_DPM_BUSY = 34, +}; + +enum a4xx_sp_perfcounter_select { + SP_LM_LOAD_INSTRUCTIONS = 0, + SP_LM_STORE_INSTRUCTIONS = 1, + SP_LM_ATOMICS = 2, + SP_GM_LOAD_INSTRUCTIONS = 3, + SP_GM_STORE_INSTRUCTIONS = 4, + SP_GM_ATOMICS = 5, + SP_VS_STAGE_TEX_INSTRUCTIONS = 6, + SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7, + SP_VS_STAGE_EFU_INSTRUCTIONS = 8, + SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9, + SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10, + SP_FS_STAGE_TEX_INSTRUCTIONS = 11, + SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12, + SP_FS_STAGE_EFU_INSTRUCTIONS = 13, + SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14, + SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15, + SP_VS_INSTRUCTIONS = 17, + SP_FS_INSTRUCTIONS = 18, + SP_ADDR_LOCK_COUNT = 19, + SP_UCHE_READ_TRANS = 20, + SP_UCHE_WRITE_TRANS = 21, + SP_EXPORT_VPC_TRANS = 22, + SP_EXPORT_RB_TRANS = 23, + SP_PIXELS_KILLED = 24, + SP_ICL1_REQUESTS = 25, + SP_ICL1_MISSES = 26, + SP_ICL0_REQUESTS = 27, + SP_ICL0_MISSES = 28, + SP_ALU_WORKING_CYCLES = 29, + SP_EFU_WORKING_CYCLES = 30, + SP_STALL_CYCLES_BY_VPC = 31, + SP_STALL_CYCLES_BY_TP = 32, + SP_STALL_CYCLES_BY_UCHE = 33, + SP_STALL_CYCLES_BY_RB = 34, + SP_BUSY_CYCLES = 35, + SP_HS_INSTRUCTIONS = 36, + SP_DS_INSTRUCTIONS = 37, + SP_GS_INSTRUCTIONS = 38, + SP_CS_INSTRUCTIONS = 39, + SP_SCHEDULER_NON_WORKING = 40, + SP_WAVE_CONTEXTS = 41, + SP_WAVE_CONTEXT_CYCLES = 42, + SP_POWER0 = 43, + SP_POWER1 = 44, + SP_POWER2 = 45, + SP_POWER3 = 46, + SP_POWER4 = 47, + SP_POWER5 = 48, + SP_POWER6 = 49, + SP_POWER7 = 50, + SP_POWER8 = 51, + SP_POWER9 = 52, + SP_POWER10 = 53, + SP_POWER11 = 54, + SP_POWER12 = 55, + SP_POWER13 = 56, + SP_POWER14 = 57, + SP_POWER15 = 58, +}; + +enum a4xx_tp_perfcounter_select { + TP_L1_REQUESTS = 0, + TP_L1_MISSES = 1, + TP_QUADS_OFFSET = 8, + TP_QUAD_SHADOW = 9, + TP_QUADS_ARRAY = 10, + TP_QUADS_GRADIENT = 11, + TP_QUADS_1D2D = 12, + TP_QUADS_3DCUBE = 13, + TP_BUSY_CYCLES = 16, + TP_STALL_CYCLES_BY_ARB = 17, + TP_STATE_CACHE_REQUESTS = 20, + TP_STATE_CACHE_MISSES = 21, + TP_POWER0 = 22, + TP_POWER1 = 23, + TP_POWER2 = 24, + TP_POWER3 = 25, + TP_POWER4 = 26, + TP_POWER5 = 27, + TP_POWER6 = 28, + TP_POWER7 = 29, +}; + +enum a4xx_uche_perfcounter_select { + UCHE_VBIF_READ_BEATS_TP = 0, + UCHE_VBIF_READ_BEATS_VFD = 1, + UCHE_VBIF_READ_BEATS_HLSQ = 2, + UCHE_VBIF_READ_BEATS_MARB = 3, + UCHE_VBIF_READ_BEATS_SP = 4, + UCHE_READ_REQUESTS_TP = 5, + UCHE_READ_REQUESTS_VFD = 6, + UCHE_READ_REQUESTS_HLSQ = 7, + UCHE_READ_REQUESTS_MARB = 8, + UCHE_READ_REQUESTS_SP = 9, + UCHE_WRITE_REQUESTS_MARB = 10, + UCHE_WRITE_REQUESTS_SP = 11, + UCHE_TAG_CHECK_FAILS = 12, + UCHE_EVICTS = 13, + UCHE_FLUSHES = 14, + UCHE_VBIF_LATENCY_CYCLES = 15, + UCHE_VBIF_LATENCY_SAMPLES = 16, + UCHE_BUSY_CYCLES = 17, + UCHE_VBIF_READ_BEATS_PC = 18, + UCHE_READ_REQUESTS_PC = 19, + UCHE_WRITE_REQUESTS_VPC = 20, + UCHE_STALL_BY_VBIF = 21, + UCHE_WRITE_REQUESTS_VSC = 22, + UCHE_POWER0 = 23, + UCHE_POWER1 = 24, + UCHE_POWER2 = 25, + UCHE_POWER3 = 26, + UCHE_POWER4 = 27, + UCHE_POWER5 = 28, + UCHE_POWER6 = 29, + UCHE_POWER7 = 30, +}; + +enum a4xx_vbif_perfcounter_select { + AXI_READ_REQUESTS_ID_0 = 0, + AXI_READ_REQUESTS_ID_1 = 1, + AXI_READ_REQUESTS_ID_2 = 2, + AXI_READ_REQUESTS_ID_3 = 3, + AXI_READ_REQUESTS_ID_4 = 4, + AXI_READ_REQUESTS_ID_5 = 5, + AXI_READ_REQUESTS_ID_6 = 6, + AXI_READ_REQUESTS_ID_7 = 7, + AXI_READ_REQUESTS_ID_8 = 8, + AXI_READ_REQUESTS_ID_9 = 9, + AXI_READ_REQUESTS_ID_10 = 10, + AXI_READ_REQUESTS_ID_11 = 11, + AXI_READ_REQUESTS_ID_12 = 12, + AXI_READ_REQUESTS_ID_13 = 13, + AXI_READ_REQUESTS_ID_14 = 14, + AXI_READ_REQUESTS_ID_15 = 15, + AXI0_READ_REQUESTS_TOTAL = 16, + AXI1_READ_REQUESTS_TOTAL = 17, + AXI2_READ_REQUESTS_TOTAL = 18, + AXI3_READ_REQUESTS_TOTAL = 19, + AXI_READ_REQUESTS_TOTAL = 20, + AXI_WRITE_REQUESTS_ID_0 = 21, + AXI_WRITE_REQUESTS_ID_1 = 22, + AXI_WRITE_REQUESTS_ID_2 = 23, + AXI_WRITE_REQUESTS_ID_3 = 24, + AXI_WRITE_REQUESTS_ID_4 = 25, + AXI_WRITE_REQUESTS_ID_5 = 26, + AXI_WRITE_REQUESTS_ID_6 = 27, + AXI_WRITE_REQUESTS_ID_7 = 28, + AXI_WRITE_REQUESTS_ID_8 = 29, + AXI_WRITE_REQUESTS_ID_9 = 30, + AXI_WRITE_REQUESTS_ID_10 = 31, + AXI_WRITE_REQUESTS_ID_11 = 32, + AXI_WRITE_REQUESTS_ID_12 = 33, + AXI_WRITE_REQUESTS_ID_13 = 34, + AXI_WRITE_REQUESTS_ID_14 = 35, + AXI_WRITE_REQUESTS_ID_15 = 36, + AXI0_WRITE_REQUESTS_TOTAL = 37, + AXI1_WRITE_REQUESTS_TOTAL = 38, + AXI2_WRITE_REQUESTS_TOTAL = 39, + AXI3_WRITE_REQUESTS_TOTAL = 40, + AXI_WRITE_REQUESTS_TOTAL = 41, + AXI_TOTAL_REQUESTS = 42, + AXI_READ_DATA_BEATS_ID_0 = 43, + AXI_READ_DATA_BEATS_ID_1 = 44, + AXI_READ_DATA_BEATS_ID_2 = 45, + AXI_READ_DATA_BEATS_ID_3 = 46, + AXI_READ_DATA_BEATS_ID_4 = 47, + AXI_READ_DATA_BEATS_ID_5 = 48, + AXI_READ_DATA_BEATS_ID_6 = 49, + AXI_READ_DATA_BEATS_ID_7 = 50, + AXI_READ_DATA_BEATS_ID_8 = 51, + AXI_READ_DATA_BEATS_ID_9 = 52, + AXI_READ_DATA_BEATS_ID_10 = 53, + AXI_READ_DATA_BEATS_ID_11 = 54, + AXI_READ_DATA_BEATS_ID_12 = 55, + AXI_READ_DATA_BEATS_ID_13 = 56, + AXI_READ_DATA_BEATS_ID_14 = 57, + AXI_READ_DATA_BEATS_ID_15 = 58, + AXI0_READ_DATA_BEATS_TOTAL = 59, + AXI1_READ_DATA_BEATS_TOTAL = 60, + AXI2_READ_DATA_BEATS_TOTAL = 61, + AXI3_READ_DATA_BEATS_TOTAL = 62, + AXI_READ_DATA_BEATS_TOTAL = 63, + AXI_WRITE_DATA_BEATS_ID_0 = 64, + AXI_WRITE_DATA_BEATS_ID_1 = 65, + AXI_WRITE_DATA_BEATS_ID_2 = 66, + AXI_WRITE_DATA_BEATS_ID_3 = 67, + AXI_WRITE_DATA_BEATS_ID_4 = 68, + AXI_WRITE_DATA_BEATS_ID_5 = 69, + AXI_WRITE_DATA_BEATS_ID_6 = 70, + AXI_WRITE_DATA_BEATS_ID_7 = 71, + AXI_WRITE_DATA_BEATS_ID_8 = 72, + AXI_WRITE_DATA_BEATS_ID_9 = 73, + AXI_WRITE_DATA_BEATS_ID_10 = 74, + AXI_WRITE_DATA_BEATS_ID_11 = 75, + AXI_WRITE_DATA_BEATS_ID_12 = 76, + AXI_WRITE_DATA_BEATS_ID_13 = 77, + AXI_WRITE_DATA_BEATS_ID_14 = 78, + AXI_WRITE_DATA_BEATS_ID_15 = 79, + AXI0_WRITE_DATA_BEATS_TOTAL = 80, + AXI1_WRITE_DATA_BEATS_TOTAL = 81, + AXI2_WRITE_DATA_BEATS_TOTAL = 82, + AXI3_WRITE_DATA_BEATS_TOTAL = 83, + AXI_WRITE_DATA_BEATS_TOTAL = 84, + AXI_DATA_BEATS_TOTAL = 85, + CYCLES_HELD_OFF_ID_0 = 86, + CYCLES_HELD_OFF_ID_1 = 87, + CYCLES_HELD_OFF_ID_2 = 88, + CYCLES_HELD_OFF_ID_3 = 89, + CYCLES_HELD_OFF_ID_4 = 90, + CYCLES_HELD_OFF_ID_5 = 91, + CYCLES_HELD_OFF_ID_6 = 92, + CYCLES_HELD_OFF_ID_7 = 93, + CYCLES_HELD_OFF_ID_8 = 94, + CYCLES_HELD_OFF_ID_9 = 95, + CYCLES_HELD_OFF_ID_10 = 96, + CYCLES_HELD_OFF_ID_11 = 97, + CYCLES_HELD_OFF_ID_12 = 98, + CYCLES_HELD_OFF_ID_13 = 99, + CYCLES_HELD_OFF_ID_14 = 100, + CYCLES_HELD_OFF_ID_15 = 101, + AXI_READ_REQUEST_HELD_OFF = 102, + AXI_WRITE_REQUEST_HELD_OFF = 103, + AXI_REQUEST_HELD_OFF = 104, + AXI_WRITE_DATA_HELD_OFF = 105, + OCMEM_AXI_READ_REQUEST_HELD_OFF = 106, + OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107, + OCMEM_AXI_REQUEST_HELD_OFF = 108, + OCMEM_AXI_WRITE_DATA_HELD_OFF = 109, + ELAPSED_CYCLES_DDR = 110, + ELAPSED_CYCLES_OCMEM = 111, +}; + +enum a4xx_vfd_perfcounter_select { + VFD_UCHE_BYTE_FETCHED = 0, + VFD_UCHE_TRANS = 1, + VFD_FETCH_INSTRUCTIONS = 3, + VFD_BUSY_CYCLES = 5, + VFD_STALL_CYCLES_UCHE = 6, + VFD_STALL_CYCLES_HLSQ = 7, + VFD_STALL_CYCLES_VPC_BYPASS = 8, + VFD_STALL_CYCLES_VPC_ALLOC = 9, + VFD_MODE_0_FIBERS = 13, + VFD_MODE_1_FIBERS = 14, + VFD_MODE_2_FIBERS = 15, + VFD_MODE_3_FIBERS = 16, + VFD_MODE_4_FIBERS = 17, + VFD_BFIFO_STALL = 18, + VFD_NUM_VERTICES_TOTAL = 19, + VFD_PACKER_FULL = 20, + VFD_UCHE_REQUEST_FIFO_FULL = 21, + VFD_STARVE_CYCLES_PC = 22, + VFD_STARVE_CYCLES_UCHE = 23, +}; + +enum a4xx_vpc_perfcounter_select { + VPC_SP_LM_COMPONENTS = 2, + VPC_SP0_LM_BYTES = 3, + VPC_SP1_LM_BYTES = 4, + VPC_SP2_LM_BYTES = 5, + VPC_SP3_LM_BYTES = 6, + VPC_WORKING_CYCLES = 7, + VPC_STALL_CYCLES_LM = 8, + VPC_STARVE_CYCLES_RAS = 9, + VPC_STREAMOUT_CYCLES = 10, + VPC_UCHE_TRANSACTIONS = 12, + VPC_STALL_CYCLES_UCHE = 13, + VPC_BUSY_CYCLES = 14, + VPC_STARVE_CYCLES_SP = 15, +}; + +enum a4xx_vsc_perfcounter_select { + VSC_BUSY_CYCLES = 0, + VSC_WORKING_CYCLES = 1, + VSC_STALL_CYCLES_UCHE = 2, + VSC_STARVE_CYCLES_RAS = 3, + VSC_EOT_NUM = 4, +}; + enum a4xx_tex_filter { A4XX_TEX_NEAREST = 0, A4XX_TEX_LINEAR = 1, @@ -357,6 +897,12 @@ static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val) #define REG_A4XX_RB_PERFCTR_RB_SEL_7 0x00000cce +#define REG_A4XX_RB_PERFCTR_CCU_SEL_0 0x00000ccf + +#define REG_A4XX_RB_PERFCTR_CCU_SEL_1 0x00000cd0 + +#define REG_A4XX_RB_PERFCTR_CCU_SEL_2 0x00000cd1 + #define REG_A4XX_RB_PERFCTR_CCU_SEL_3 0x00000cd2 #define REG_A4XX_RB_FRAME_BUFFER_DIMENSION 0x00000ce0 @@ -1070,6 +1616,380 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x #define REG_A4XX_RBBM_PERFCTR_CP_0_LO 0x0000009c +#define REG_A4XX_RBBM_PERFCTR_CP_0_HI 0x0000009d + +#define REG_A4XX_RBBM_PERFCTR_CP_1_LO 0x0000009e + +#define REG_A4XX_RBBM_PERFCTR_CP_1_HI 0x0000009f + +#define REG_A4XX_RBBM_PERFCTR_CP_2_LO 0x000000a0 + +#define REG_A4XX_RBBM_PERFCTR_CP_2_HI 0x000000a1 + +#define REG_A4XX_RBBM_PERFCTR_CP_3_LO 0x000000a2 + +#define REG_A4XX_RBBM_PERFCTR_CP_3_HI 0x000000a3 + +#define REG_A4XX_RBBM_PERFCTR_CP_4_LO 0x000000a4 + +#define REG_A4XX_RBBM_PERFCTR_CP_4_HI 0x000000a5 + +#define REG_A4XX_RBBM_PERFCTR_CP_5_LO 0x000000a6 + +#define REG_A4XX_RBBM_PERFCTR_CP_5_HI 0x000000a7 + +#define REG_A4XX_RBBM_PERFCTR_CP_6_LO 0x000000a8 + +#define REG_A4XX_RBBM_PERFCTR_CP_6_HI 0x000000a9 + +#define REG_A4XX_RBBM_PERFCTR_CP_7_LO 0x000000aa + +#define REG_A4XX_RBBM_PERFCTR_CP_7_HI 0x000000ab + +#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO 0x000000ac + +#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI 0x000000ad + +#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO 0x000000ae + +#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI 0x000000af + +#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO 0x000000b0 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI 0x000000b1 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO 0x000000b2 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI 0x000000b3 + +#define REG_A4XX_RBBM_PERFCTR_PC_0_LO 0x000000b4 + +#define REG_A4XX_RBBM_PERFCTR_PC_0_HI 0x000000b5 + +#define REG_A4XX_RBBM_PERFCTR_PC_1_LO 0x000000b6 + +#define REG_A4XX_RBBM_PERFCTR_PC_1_HI 0x000000b7 + +#define REG_A4XX_RBBM_PERFCTR_PC_2_LO 0x000000b8 + +#define REG_A4XX_RBBM_PERFCTR_PC_2_HI 0x000000b9 + +#define REG_A4XX_RBBM_PERFCTR_PC_3_LO 0x000000ba + +#define REG_A4XX_RBBM_PERFCTR_PC_3_HI 0x000000bb + +#define REG_A4XX_RBBM_PERFCTR_PC_4_LO 0x000000bc + +#define REG_A4XX_RBBM_PERFCTR_PC_4_HI 0x000000bd + +#define REG_A4XX_RBBM_PERFCTR_PC_5_LO 0x000000be + +#define REG_A4XX_RBBM_PERFCTR_PC_5_HI 0x000000bf + +#define REG_A4XX_RBBM_PERFCTR_PC_6_LO 0x000000c0 + +#define REG_A4XX_RBBM_PERFCTR_PC_6_HI 0x000000c1 + +#define REG_A4XX_RBBM_PERFCTR_PC_7_LO 0x000000c2 + +#define REG_A4XX_RBBM_PERFCTR_PC_7_HI 0x000000c3 + +#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO 0x000000c4 + +#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI 0x000000c5 + +#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO 0x000000c6 + +#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI 0x000000c7 + +#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO 0x000000c8 + +#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI 0x000000c9 + +#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO 0x000000ca + +#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI 0x000000cb + +#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO 0x000000cc + +#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI 0x000000cd + +#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO 0x000000ce + +#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI 0x000000cf + +#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO 0x000000d0 + +#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI 0x000000d1 + +#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO 0x000000d2 + +#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI 0x000000d3 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000d4 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000d5 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000d6 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000d7 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000d8 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000d9 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000da + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000db + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000dc + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000dd + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000de + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000df + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO 0x000000e0 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI 0x000000e1 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO 0x000000e2 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI 0x000000e3 + +#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO 0x000000e4 + +#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI 0x000000e5 + +#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO 0x000000e6 + +#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI 0x000000e7 + +#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO 0x000000e8 + +#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI 0x000000e9 + +#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO 0x000000ea + +#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI 0x000000eb + +#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO 0x000000ec + +#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI 0x000000ed + +#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO 0x000000ee + +#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI 0x000000ef + +#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO 0x000000f0 + +#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI 0x000000f1 + +#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO 0x000000f2 + +#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI 0x000000f3 + +#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO 0x000000f4 + +#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI 0x000000f5 + +#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO 0x000000f6 + +#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI 0x000000f7 + +#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO 0x000000f8 + +#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI 0x000000f9 + +#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO 0x000000fa + +#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI 0x000000fb + +#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO 0x000000fc + +#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI 0x000000fd + +#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO 0x000000fe + +#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI 0x000000ff + +#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO 0x00000100 + +#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI 0x00000101 + +#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO 0x00000102 + +#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI 0x00000103 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO 0x00000104 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI 0x00000105 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO 0x00000106 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI 0x00000107 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO 0x00000108 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI 0x00000109 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO 0x0000010a + +#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI 0x0000010b + +#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO 0x0000010c + +#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI 0x0000010d + +#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO 0x0000010e + +#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI 0x0000010f + +#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO 0x00000110 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI 0x00000111 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO 0x00000112 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI 0x00000113 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115 + +#define REG_A4XX_RBBM_PERFCTR_TP_1_LO 0x00000116 + +#define REG_A4XX_RBBM_PERFCTR_TP_1_HI 0x00000117 + +#define REG_A4XX_RBBM_PERFCTR_TP_2_LO 0x00000118 + +#define REG_A4XX_RBBM_PERFCTR_TP_2_HI 0x00000119 + +#define REG_A4XX_RBBM_PERFCTR_TP_3_LO 0x0000011a + +#define REG_A4XX_RBBM_PERFCTR_TP_3_HI 0x0000011b + +#define REG_A4XX_RBBM_PERFCTR_TP_4_LO 0x0000011c + +#define REG_A4XX_RBBM_PERFCTR_TP_4_HI 0x0000011d + +#define REG_A4XX_RBBM_PERFCTR_TP_5_LO 0x0000011e + +#define REG_A4XX_RBBM_PERFCTR_TP_5_HI 0x0000011f + +#define REG_A4XX_RBBM_PERFCTR_TP_6_LO 0x00000120 + +#define REG_A4XX_RBBM_PERFCTR_TP_6_HI 0x00000121 + +#define REG_A4XX_RBBM_PERFCTR_TP_7_LO 0x00000122 + +#define REG_A4XX_RBBM_PERFCTR_TP_7_HI 0x00000123 + +#define REG_A4XX_RBBM_PERFCTR_SP_0_LO 0x00000124 + +#define REG_A4XX_RBBM_PERFCTR_SP_0_HI 0x00000125 + +#define REG_A4XX_RBBM_PERFCTR_SP_1_LO 0x00000126 + +#define REG_A4XX_RBBM_PERFCTR_SP_1_HI 0x00000127 + +#define REG_A4XX_RBBM_PERFCTR_SP_2_LO 0x00000128 + +#define REG_A4XX_RBBM_PERFCTR_SP_2_HI 0x00000129 + +#define REG_A4XX_RBBM_PERFCTR_SP_3_LO 0x0000012a + +#define REG_A4XX_RBBM_PERFCTR_SP_3_HI 0x0000012b + +#define REG_A4XX_RBBM_PERFCTR_SP_4_LO 0x0000012c + +#define REG_A4XX_RBBM_PERFCTR_SP_4_HI 0x0000012d + +#define REG_A4XX_RBBM_PERFCTR_SP_5_LO 0x0000012e + +#define REG_A4XX_RBBM_PERFCTR_SP_5_HI 0x0000012f + +#define REG_A4XX_RBBM_PERFCTR_SP_6_LO 0x00000130 + +#define REG_A4XX_RBBM_PERFCTR_SP_6_HI 0x00000131 + +#define REG_A4XX_RBBM_PERFCTR_SP_7_LO 0x00000132 + +#define REG_A4XX_RBBM_PERFCTR_SP_7_HI 0x00000133 + +#define REG_A4XX_RBBM_PERFCTR_SP_8_LO 0x00000134 + +#define REG_A4XX_RBBM_PERFCTR_SP_8_HI 0x00000135 + +#define REG_A4XX_RBBM_PERFCTR_SP_9_LO 0x00000136 + +#define REG_A4XX_RBBM_PERFCTR_SP_9_HI 0x00000137 + +#define REG_A4XX_RBBM_PERFCTR_SP_10_LO 0x00000138 + +#define REG_A4XX_RBBM_PERFCTR_SP_10_HI 0x00000139 + +#define REG_A4XX_RBBM_PERFCTR_SP_11_LO 0x0000013a + +#define REG_A4XX_RBBM_PERFCTR_SP_11_HI 0x0000013b + +#define REG_A4XX_RBBM_PERFCTR_RB_0_LO 0x0000013c + +#define REG_A4XX_RBBM_PERFCTR_RB_0_HI 0x0000013d + +#define REG_A4XX_RBBM_PERFCTR_RB_1_LO 0x0000013e + +#define REG_A4XX_RBBM_PERFCTR_RB_1_HI 0x0000013f + +#define REG_A4XX_RBBM_PERFCTR_RB_2_LO 0x00000140 + +#define REG_A4XX_RBBM_PERFCTR_RB_2_HI 0x00000141 + +#define REG_A4XX_RBBM_PERFCTR_RB_3_LO 0x00000142 + +#define REG_A4XX_RBBM_PERFCTR_RB_3_HI 0x00000143 + +#define REG_A4XX_RBBM_PERFCTR_RB_4_LO 0x00000144 + +#define REG_A4XX_RBBM_PERFCTR_RB_4_HI 0x00000145 + +#define REG_A4XX_RBBM_PERFCTR_RB_5_LO 0x00000146 + +#define REG_A4XX_RBBM_PERFCTR_RB_5_HI 0x00000147 + +#define REG_A4XX_RBBM_PERFCTR_RB_6_LO 0x00000148 + +#define REG_A4XX_RBBM_PERFCTR_RB_6_HI 0x00000149 + +#define REG_A4XX_RBBM_PERFCTR_RB_7_LO 0x0000014a + +#define REG_A4XX_RBBM_PERFCTR_RB_7_HI 0x0000014b + +#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO 0x0000014c + +#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI 0x0000014d + +#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO 0x0000014e + +#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI 0x0000014f + +#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO 0x00000166 + +#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI 0x00000167 + +#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168 + +#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI 0x00000169 + +#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO 0x0000016e + +#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI 0x0000016f + static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; } static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; } @@ -1136,6 +2056,14 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) #define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000175 +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000176 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000177 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000178 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3 0x00000179 + #define REG_A4XX_RBBM_GPU_BUSY_MASKED 0x0000017a #define REG_A4XX_RBBM_INT_0_STATUS 0x0000017d @@ -1272,6 +2200,20 @@ static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240 #define REG_A4XX_CP_PERFCTR_CP_SEL_0 0x00000500 +#define REG_A4XX_CP_PERFCTR_CP_SEL_1 0x00000501 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_2 0x00000502 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_3 0x00000503 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_4 0x00000504 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_5 0x00000505 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_6 0x00000506 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_7 0x00000507 + #define REG_A4XX_CP_PERFCOMBINER_SELECT 0x0000050b static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; } @@ -1802,6 +2744,12 @@ static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) #define REG_A4XX_VPC_DEBUG_ECO_CONTROL 0x00000e64 +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0 0x00000e65 + +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1 0x00000e66 + +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2 0x00000e67 + #define REG_A4XX_VPC_PERFCTR_VPC_SEL_3 0x00000e68 #define REG_A4XX_VPC_ATTR 0x00002140 @@ -1914,6 +2862,20 @@ static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0 #define REG_A4XX_VFD_DEBUG_CONTROL 0x00000e40 +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0 0x00000e43 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1 0x00000e44 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2 0x00000e45 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3 0x00000e46 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4 0x00000e47 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5 0x00000e48 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6 0x00000e49 + #define REG_A4XX_VFD_PERFCTR_VFD_SEL_7 0x00000e4a #define REG_A4XX_VGT_CL_INITIATOR 0x000021d0 @@ -2070,6 +3032,20 @@ static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val) #define REG_A4XX_TPL1_TP_MODE_CONTROL 0x00000f03 +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0 0x00000f04 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1 0x00000f05 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2 0x00000f06 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3 0x00000f07 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4 0x00000f08 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5 0x00000f09 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6 0x00000f0a + #define REG_A4XX_TPL1_PERFCTR_TP_SEL_7 0x00000f0b #define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380 @@ -2124,8 +3100,20 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val) #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c88 +#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c89 + +#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c8a + #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c8c + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c8d + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c8e + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c8f + #define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000 #define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000 #define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 @@ -2391,6 +3379,20 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val) #define REG_A4XX_UCHE_CACHE_WAYS_VFD 0x00000e8c +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e8e + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e8f + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e90 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e91 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e92 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e93 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e94 + #define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e95 #define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD 0x00000e00 @@ -2401,6 +3403,22 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val) #define REG_A4XX_HLSQ_PERF_PIPE_MASK 0x00000e0e +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e06 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e07 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e08 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e09 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e0a + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e0b + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e0c + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e0d + #define REG_A4XX_HLSQ_CONTROL_0_REG 0x000023c0 #define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010 #define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4 @@ -2655,6 +3673,18 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val) #define REG_A4XX_PC_PERFCTR_PC_SEL_0 0x00000d10 +#define REG_A4XX_PC_PERFCTR_PC_SEL_1 0x00000d11 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_2 0x00000d12 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_3 0x00000d13 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_4 0x00000d14 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_5 0x00000d15 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_6 0x00000d16 + #define REG_A4XX_PC_PERFCTR_PC_SEL_7 0x00000d17 #define REG_A4XX_PC_BIN_BASE 0x000021c0 diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h index 074c5a752bf..0c1027d5804 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h @@ -49,6 +49,8 @@ struct fd4_context { /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We * could combine it with another allocation. + * + * (upper area used as scratch bo.. see fd4_query) */ struct fd_bo *vsc_size_mem; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 4a3f1da30ed..72154bf286a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -33,6 +33,7 @@ #include "util/u_format.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" #include "fd4_emit.h" #include "fd4_blend.h" @@ -882,6 +883,8 @@ fd4_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, 0x0); + fd_hw_query_enable(ctx, ring); + ctx->needs_rb_fbd = true; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c index 4f69e0c1694..14a809431ac 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_query.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c @@ -31,6 +31,7 @@ #include "freedreno_util.h" #include "fd4_query.h" +#include "fd4_context.h" #include "fd4_draw.h" #include "fd4_format.h" @@ -81,7 +82,12 @@ static uint64_t count_samples(const struct fd_rb_samp_ctrs *start, const struct fd_rb_samp_ctrs *end) { - return end->ctr[0] - start->ctr[0]; + uint64_t n = 0; + + for (unsigned i = 0; i < 16; i += 4) + n += end->ctr[i] - start->ctr[i]; + + return n / 2; } static void @@ -102,6 +108,127 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx, result->b |= (n > 0); } +/* + * Time Elapsed Query: + * + * Note: we could in theory support timestamp queries, but they + * won't give sensible results for tilers. + */ + +static void +time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + /* Right now, the assignment of countable to counter register is + * just hard coded. If we start exposing more countables than we + * have counters, we will need to be more clever. + */ + fd_wfi(ctx, ring); + OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1); + OUT_RING(ring, CP_ALWAYS_COUNT); +} + +static struct fd_hw_sample * +time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t)); + + /* use unused part of vsc_size_mem as scratch space, to avoid + * extra allocation: + */ + struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem; + const int sample_off = 128; + const int addr_off = sample_off + 8; + + debug_assert(ctx->screen->max_freq > 0); + + /* Basic issue is that we need to read counter value to a relative + * destination (with per-tile offset) rather than absolute dest + * addr. But there is no pm4 packet that can do that. This is + * where it would be *really* nice if we could write our own fw + * since afaict implementing the sort of packet we need would be + * trivial. + * + * Instead, we: + * (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer + * (2) CP_MEM_WRITE to write per-sample offset to scratch buffer + * (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base + * address to the per-sample offset in the scratch buffer + * (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3 + * to CP_ME_NRT_ADDR + * (5) CP_MEM_TO_REG's to copy saved counter value from scratch + * buffer to CP_ME_NRT_DATA to trigger the write out to query + * result buffer + * + * Straightforward, right? + * + * Maybe could swap the order of things in the scratch buffer to + * put address first, and copy back to CP_ME_NRT_ADDR+DATA in one + * shot, but that's really just polishing a turd.. + */ + + fd_wfi(ctx, ring); + + /* copy sample counter _LO and _HI to scratch: */ + OUT_PKT3(ring, CP_REG_TO_MEM, 2); + OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) | + CP_REG_TO_MEM_0_64B | + CP_REG_TO_MEM_0_CNT(2-1)); /* write 2 regs to mem */ + OUT_RELOC(ring, scratch_bo, sample_off, 0, 0); + + /* ok... here we really *would* like to use the CP_SET_CONSTANT + * mode which can add a constant to value in reg2 and write to + * reg1... *but* that only works for banked/context registers, + * and CP_ME_NRT_DATA isn't one of those.. so we need to do some + * CP math to the scratch buffer instead: + * + * (note first 8 bytes are counter value, use offset 0x8 for + * address calculation) + */ + + /* per-sample offset to scratch bo: */ + OUT_PKT3(ring, CP_MEM_WRITE, 2); + OUT_RELOC(ring, scratch_bo, addr_off, 0, 0); + OUT_RING(ring, samp->offset); + + /* now add to that the per-tile base: */ + OUT_PKT3(ring, CP_REG_TO_MEM, 2); + OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) | + CP_REG_TO_MEM_0_ACCUMULATE | + CP_REG_TO_MEM_0_CNT(1-1)); /* readback 1 regs */ + OUT_RELOC(ring, scratch_bo, addr_off, 0, 0); + + /* now copy that back to CP_ME_NRT_ADDR: */ + OUT_PKT3(ring, CP_MEM_TO_REG, 2); + OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR); + OUT_RELOC(ring, scratch_bo, addr_off, 0, 0); + + /* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA + * to trigger the write to result buffer + */ + OUT_PKT3(ring, CP_MEM_TO_REG, 2); + OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA); + OUT_RELOC(ring, scratch_bo, sample_off, 0, 0); + + /* and again to get the value of the _HI reg from scratch: */ + OUT_PKT3(ring, CP_MEM_TO_REG, 2); + OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA); + OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0); + + /* Sigh.. */ + + return samp; +} + +static void +time_elapsed_accumulate_result(struct fd_context *ctx, + const void *start, const void *end, + union pipe_query_result *result) +{ + uint64_t n = *(uint64_t *)end - *(uint64_t *)start; + /* max_freq is in Hz, convert cycle count to ns: */ + result->u64 += n * 1000000000 / ctx->screen->max_freq; +} + static const struct fd_hw_sample_provider occlusion_counter = { .query_type = PIPE_QUERY_OCCLUSION_COUNTER, .active = FD_STAGE_DRAW, @@ -116,8 +243,17 @@ static const struct fd_hw_sample_provider occlusion_predicate = { .accumulate_result = occlusion_predicate_accumulate_result, }; +static const struct fd_hw_sample_provider time_elapsed = { + .query_type = PIPE_QUERY_TIME_ELAPSED, + .active = FD_STAGE_DRAW, + .enable = time_elapsed_enable, + .get_sample = time_elapsed_get_sample, + .accumulate_result = time_elapsed_accumulate_result, +}; + void fd4_query_context_init(struct pipe_context *pctx) { fd_hw_query_register_provider(pctx, &occlusion_counter); fd_hw_query_register_provider(pctx, &occlusion_predicate); + fd_hw_query_register_provider(pctx, &time_elapsed); } diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index f9c0e6aaa83..ac5343f1a78 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <[email protected]> (robclark) +- Ilia Mirkin <[email protected]> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index c6741890c69..09b26a253f0 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <[email protected]> (robclark) +- Ilia Mirkin <[email protected]> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -172,6 +173,11 @@ enum adreno_pm4_type3_packets { CP_UNKNOWN_1A = 26, CP_UNKNOWN_4E = 78, CP_WIDE_REG_WRITE = 116, + CP_SCRATCH_TO_REG = 77, + CP_REG_TO_SCRATCH = 74, + CP_WAIT_MEM_WRITES = 18, + CP_COND_REG_EXEC = 71, + CP_MEM_TO_REG = 66, IN_IB_PREFETCH_END = 23, IN_SUBBLK_PREFETCH = 31, IN_INSTR_PREFETCH = 32, @@ -503,5 +509,29 @@ static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val) return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK; } +#define REG_CP_REG_TO_MEM_0 0x00000000 +#define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff +#define CP_REG_TO_MEM_0_REG__SHIFT 0 +static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK; +} +#define CP_REG_TO_MEM_0_CNT__MASK 0x3ff80000 +#define CP_REG_TO_MEM_0_CNT__SHIFT 19 +static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK; +} +#define CP_REG_TO_MEM_0_64B 0x40000000 +#define CP_REG_TO_MEM_0_ACCUMULATE 0x80000000 + +#define REG_CP_REG_TO_MEM_1 0x00000001 +#define CP_REG_TO_MEM_1_DEST__MASK 0xffffffff +#define CP_REG_TO_MEM_1_DEST__SHIFT 0 +static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK; +} + #endif /* ADRENO_PM4_XML */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 9e7130ab915..85ce97c16b7 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -164,6 +164,9 @@ struct fd_context { */ struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS]; + /* which sample providers were active in the current batch: */ + uint32_t active_providers; + /* tracking for current stage, to know when to start/stop * any active queries: */ diff --git a/src/gallium/drivers/freedreno/freedreno_query.h b/src/gallium/drivers/freedreno/freedreno_query.h index c2c71da2790..1e4f45ffcd3 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.h +++ b/src/gallium/drivers/freedreno/freedreno_query.h @@ -65,4 +65,16 @@ fd_query(struct pipe_query *pq) void fd_query_screen_init(struct pipe_screen *pscreen); void fd_query_context_init(struct pipe_context *pctx); +static inline bool +skip_begin_query(int type) +{ + switch (type) { + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_GPU_FINISHED: + return true; + default: + return false; + } +} + #endif /* FREEDRENO_QUERY_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c index 027fdc9de23..2ac03f22b41 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -47,6 +47,8 @@ static int pidx(unsigned query_type) return 0; case PIPE_QUERY_OCCLUSION_PREDICATE: return 1; + case PIPE_QUERY_TIME_ELAPSED: + return 2; default: return -1; } @@ -89,7 +91,9 @@ static void resume_query(struct fd_context *ctx, struct fd_hw_query *hq, struct fd_ringbuffer *ring) { + int idx = pidx(hq->provider->query_type); assert(!hq->period); + ctx->active_providers |= (1 << idx); hq->period = util_slab_alloc(&ctx->sample_period_pool); list_inithead(&hq->period->list); hq->period->start = get_sample(ctx, ring, hq->base.type); @@ -101,7 +105,9 @@ static void pause_query(struct fd_context *ctx, struct fd_hw_query *hq, struct fd_ringbuffer *ring) { + int idx = pidx(hq->provider->query_type); assert(hq->period && !hq->period->end); + assert(ctx->active_providers & (1 << idx)); hq->period->end = get_sample(ctx, ring, hq->base.type); list_addtail(&hq->period->list, &hq->current_periods); hq->period = NULL; @@ -156,6 +162,12 @@ static void fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) { struct fd_hw_query *hq = fd_hw_query(q); + /* there are a couple special cases, which don't have + * a matching ->begin_query(): + */ + if (skip_begin_query(q->type) && !q->active) { + fd_hw_begin_query(ctx, q); + } if (!q->active) return; if (is_active(hq, ctx->stage)) @@ -291,6 +303,8 @@ fd_hw_sample_init(struct fd_context *ctx, uint32_t size) struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool); pipe_reference_init(&samp->reference, 1); samp->size = size; + debug_assert(util_is_power_of_two(size)); + ctx->next_sample_offset = align(ctx->next_sample_offset, size); samp->offset = ctx->next_sample_offset; /* NOTE: util_slab_alloc() does not zero out the buffer: */ samp->bo = NULL; @@ -318,7 +332,7 @@ prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo, assert(samp->tile_stride == tile_stride); return; } - samp->bo = bo; + samp->bo = fd_bo_ref(bo); samp->num_tiles = num_tiles; samp->tile_stride = tile_stride; } @@ -431,6 +445,23 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring, ctx->stage = stage; } +/* call the provider->enable() for all the hw queries that were active + * in the current batch. This sets up perfctr selector regs statically + * for the duration of the batch. + */ +void +fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) { + if (ctx->active_providers & (1 << idx)) { + assert(ctx->sample_providers[idx]); + if (ctx->sample_providers[idx]->enable) + ctx->sample_providers[idx]->enable(ctx, ring); + } + } + ctx->active_providers = 0; /* clear it for next frame */ +} + void fd_hw_query_register_provider(struct pipe_context *pctx, const struct fd_hw_sample_provider *provider) diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h index 8f4b1f58ee5..8a5d114d806 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.h +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h @@ -76,6 +76,11 @@ struct fd_hw_sample_provider { /* stages applicable to the query type: */ enum fd_render_stage active; + /* Optional hook for enabling a counter. Guaranteed to happen + * at least once before the first ->get_sample() in a batch. + */ + void (*enable)(struct fd_context *ctx, struct fd_ringbuffer *ring); + /* when a new sample is required, emit appropriate cmdstream * and return a sample object: */ @@ -144,6 +149,7 @@ void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n, struct fd_ringbuffer *ring); void fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring, enum fd_render_stage stage); +void fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring); void fd_hw_query_register_provider(struct pipe_context *pctx, const struct fd_hw_sample_provider *provider); void fd_hw_query_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 27f4d267438..2b3ecfe664e 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -298,12 +298,14 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return is_a3xx(screen) ? 1 : 0; /* Queries. */ - case PIPE_CAP_QUERY_TIME_ELAPSED: case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; case PIPE_CAP_OCCLUSION_QUERY: return is_a3xx(screen) || is_a4xx(screen); + case PIPE_CAP_QUERY_TIME_ELAPSED: + /* only a4xx, requires new enough kernel so we know max_freq: */ + return (screen->max_freq > 0) && is_a4xx(screen); case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MIN_TEXEL_OFFSET: @@ -434,9 +436,12 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 16; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; } debug_printf("unknown shader param %d\n", param); @@ -534,6 +539,16 @@ fd_screen_create(struct fd_device *dev) } screen->device_id = val; + if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) { + DBG("could not get gpu freq"); + /* this limits what performance related queries are + * supported but is not fatal + */ + screen->max_freq = 0; + } else { + screen->max_freq = val; + } + if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) { DBG("could not get gpu-id"); goto fail; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index 8fb096a10dd..a81c7786390 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -56,6 +56,7 @@ struct fd_screen { uint32_t device_id; uint32_t gpu_id; /* 220, 305, etc */ uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */ + uint32_t max_freq; uint32_t max_rts; /* max # of render targets */ void *compiler; /* currently unused for a2xx */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index ffa75775505..7a1812f2518 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1365,7 +1365,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) struct ir3_block *b = ctx->block; struct ir3_instruction **dst, *sam, *src0[12], *src1[4]; struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy; - struct ir3_instruction *const_off[4]; bool has_bias = false, has_lod = false, has_proj = false, has_off = false; unsigned i, coords, flags; unsigned nsrc0 = 0, nsrc1 = 0; diff --git a/src/gallium/drivers/ilo/ilo_gpgpu.c b/src/gallium/drivers/ilo/ilo_gpgpu.c index b7415901a88..ab165b6d43b 100644 --- a/src/gallium/drivers/ilo/ilo_gpgpu.c +++ b/src/gallium/drivers/ilo/ilo_gpgpu.c @@ -79,9 +79,7 @@ launch_grid(struct ilo_context *ilo, } static void -ilo_launch_grid(struct pipe_context *pipe, - const uint *block_layout, const uint *grid_layout, - uint32_t pc, const void *input) +ilo_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct ilo_context *ilo = ilo_context(pipe); struct ilo_shader_state *cs = ilo->state_vector.cs; @@ -92,13 +90,13 @@ ilo_launch_grid(struct pipe_context *pipe, input_buf.buffer_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE); if (input_buf.buffer_size) { - u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input, + u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, info->input, &input_buf.buffer_offset, &input_buf.buffer); } ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder); - launch_grid(ilo, block_layout, grid_layout, &input_buf, pc); + launch_grid(ilo, info->block, info->grid, &input_buf, info->pc); ilo_render_invalidate_hw(ilo->render); diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 44d7c11af43..ef9da6b8315 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -136,6 +136,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader, return ILO_MAX_SAMPLER_VIEWS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 8dc2d38e039..f8d2637cc6f 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -1851,7 +1851,7 @@ ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader, static void ilo_set_shader_images(struct pipe_context *pipe, unsigned shader, unsigned start, unsigned count, - struct pipe_image_view **views) + struct pipe_image_view *views) { #if 0 struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d22e50777fa..9e56c962d2d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -910,7 +910,9 @@ lp_rast_create( unsigned num_threads ) create_rast_threads(rast); /* for synchronizing rasterization threads */ - pipe_barrier_init( &rast->barrier, rast->num_threads ); + if (rast->num_threads > 0) { + pipe_barrier_init( &rast->barrier, rast->num_threads ); + } memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); @@ -967,7 +969,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) } /* for synchronizing rasterization threads */ - pipe_barrier_destroy( &rast->barrier ); + if (rast->num_threads > 0) { + pipe_barrier_destroy( &rast->barrier ); + } lp_scene_queue_destroy(rast->full_scenes); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 5ab297d7e1a..97146912704 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -169,8 +169,8 @@ struct lp_setup_context }; static inline void -scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox, - struct u_rect *scissor) +scissor_planes_needed(boolean scis_planes[4], const struct u_rect *bbox, + const struct u_rect *scissor) { /* left */ scis_planes[0] = (bbox->x0 < scissor->x0); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index af4e7900d3c..018130c3192 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -719,7 +719,7 @@ try_setup_line( struct lp_setup_context *setup, */ if (nr_planes > 4) { /* why not just use draw_regions */ - struct u_rect *scissor = &setup->scissors[viewport_index]; + const struct u_rect *scissor = &setup->scissors[viewport_index]; struct lp_rast_plane *plane_s = &plane[4]; boolean s_planes[4]; scissor_planes_needed(s_planes, &bbox, scissor); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index cdb3d015dec..29aee726941 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -681,7 +681,7 @@ do_triangle_ccw(struct lp_setup_context *setup, */ if (nr_planes > 3) { /* why not just use draw_regions */ - struct u_rect *scissor = &setup->scissors[viewport_index]; + const struct u_rect *scissor = &setup->scissors[viewport_index]; struct lp_rast_plane *plane_s = &plane[3]; boolean s_planes[4]; scissor_planes_needed(s_planes, &bbox, scissor); diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 31a93659647..43ffce63a25 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -60,6 +60,8 @@ NV30_C_SOURCES := \ nv30/nvfx_vertprog.c NV50_C_SOURCES := \ + nv50/g80_defs.xml.h \ + nv50/g80_texture.xml.h \ nv50/nv50_2d.xml.h \ nv50/nv50_3ddefs.xml.h \ nv50/nv50_3d.xml.h \ @@ -68,7 +70,6 @@ NV50_C_SOURCES := \ nv50/nv50_compute.xml.h \ nv50/nv50_context.c \ nv50/nv50_context.h \ - nv50/nv50_defs.xml.h \ nv50/nv50_formats.c \ nv50/nv50_miptree.c \ nv50/nv50_program.c \ @@ -93,7 +94,6 @@ NV50_C_SOURCES := \ nv50/nv50_state_validate.c \ nv50/nv50_surface.c \ nv50/nv50_tex.c \ - nv50/nv50_texture.xml.h \ nv50/nv50_transfer.c \ nv50/nv50_transfer.h \ nv50/nv50_vbo.c \ @@ -147,6 +147,7 @@ NVC0_CODEGEN_SOURCES := \ codegen/nv50_ir_target_nvc0.h NVC0_C_SOURCES := \ + nvc0/gm107_texture.xml.h \ nvc0/nvc0_3d.xml.h \ nvc0/nvc0_compute.c \ nvc0/nvc0_compute.h \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 9d7becf27d4..97ebed455b6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -232,6 +232,8 @@ enum operation #define NV50_IR_SUBOP_SHFL_UP 1 #define NV50_IR_SUBOP_SHFL_DOWN 2 #define NV50_IR_SUBOP_SHFL_BFLY 3 +#define NV50_IR_SUBOP_LOAD_LOCKED 1 +#define NV50_IR_SUBOP_STORE_UNLOCKED 2 #define NV50_IR_SUBOP_MADSP_SD 0xffff // Yes, we could represent those with DataType. // Or put the type into operation and have a couple 1000 values in that enum. diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 0c7cd1d8137..a78b3f954a4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -433,6 +433,10 @@ CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2, srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10); break; default: + if (i->op == OP_SELP) { + assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE); + srcId(i->src(s), 42); + } // ignore here, can be predicate or flags, but must not be address break; } @@ -1045,7 +1049,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i) { emitForm_21(i, 0x250, 0x050); - if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT))) + if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13; } @@ -1239,7 +1243,7 @@ CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask defId(i->def(0), 2); srcId(i->src(0), 10); - srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23); + srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23); if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) code[1] |= 1 << 9; // dall diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index dee26225b7e..93c40d15e46 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -193,6 +193,8 @@ private: void emitNOP(); void emitKIL(); void emitOUT(); + + void emitMEMBAR(); }; /******************************************************************************* @@ -248,6 +250,8 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val) case SV_INVOCATION_ID : id = 0x11; break; case SV_THREAD_KILL : id = 0x13; break; case SV_INVOCATION_INFO: id = 0x1d; break; + case SV_TID : id = 0x21 + val->reg.data.sv.index; break; + case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break; default: assert(!"invalid system value"); id = 0; @@ -1531,7 +1535,10 @@ CodeEmitterGM107::emitFSWZADD() emitRND (0x27); emitField(0x26, 1, insn->lanes); /* abused for .ndv */ emitField(0x1c, 8, insn->subOp); - emitGPR (0x14, insn->src(1)); + if (insn->predSrc != 1) + emitGPR (0x14, insn->src(1)); + else + emitGPR (0x14); emitGPR (0x08, insn->src(0)); emitGPR (0x00, insn->def(0)); } @@ -2327,22 +2334,34 @@ void CodeEmitterGM107::emitATOM() { unsigned dType, subOp; - switch (insn->dType) { - case TYPE_U32: dType = 0; break; - case TYPE_S32: dType = 1; break; - case TYPE_U64: dType = 2; break; - case TYPE_F32: dType = 3; break; - case TYPE_B128: dType = 4; break; - case TYPE_S64: dType = 5; break; - default: assert(!"unexpected dType"); dType = 0; break; - } - if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) - subOp = 8; - else - subOp = insn->subOp; - assert(insn->subOp != NV50_IR_SUBOP_ATOM_CAS); /* XXX */ - emitInsn (0xed000000); + if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { + switch (insn->dType) { + case TYPE_U32: dType = 0; break; + case TYPE_U64: dType = 1; break; + default: assert(!"unexpected dType"); dType = 0; break; + } + subOp = 15; + + emitInsn (0xee000000); + } else { + switch (insn->dType) { + case TYPE_U32: dType = 0; break; + case TYPE_S32: dType = 1; break; + case TYPE_U64: dType = 2; break; + case TYPE_F32: dType = 3; break; + case TYPE_B128: dType = 4; break; + case TYPE_S64: dType = 5; break; + default: assert(!"unexpected dType"); dType = 0; break; + } + if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) + subOp = 8; + else + subOp = insn->subOp; + + emitInsn (0xed000000); + } + emitField(0x34, 4, subOp); emitField(0x31, 3, dType); emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8); @@ -2627,6 +2646,13 @@ CodeEmitterGM107::emitOUT() emitGPR (0x00, insn->def(0)); } +void +CodeEmitterGM107::emitMEMBAR() +{ + emitInsn (0xef980000); + emitField(0x08, 2, insn->subOp >> 2); +} + /******************************************************************************* * assembler front-end ******************************************************************************/ @@ -2926,6 +2952,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case OP_RESTART: emitOUT(); break; + case OP_MEMBAR: + emitMEMBAR(); + break; default: assert(!"invalid opcode"); emitNOP(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index bc8354deba1..682a19d6d78 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -527,7 +527,8 @@ CodeEmitterNV50::emitForm_ADD(const Instruction *i) setSrcFileBits(i, NV50_OP_ENC_LONG_ALT); setSrc(i, 0, 0); - setSrc(i, 1, 2); + if (i->predSrc != 1) + setSrc(i, 1, 2); if (i->getIndirect(0, 0)) { assert(!i->getIndirect(1, 0)); @@ -840,7 +841,7 @@ CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp) emitForm_ADD(i); - if (!i->srcExists(1)) + if (!i->srcExists(1) || i->predSrc == 1) srcId(i->src(0), 32 + 14); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 8637db91521..0068da5cbb7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -398,6 +398,11 @@ CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc) srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20); break; default: + if (i->op == OP_SELP) { + // OP_SELP is used to implement shared+atomics on Fermi. + assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE); + srcId(i->src(s), 49); + } // ignore here, can be predicate or flags, but must not be address break; } @@ -1174,7 +1179,7 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i) { emitForm_A(i, HEX64(20000000, 00000004)); - if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) + if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20; } @@ -1334,7 +1339,7 @@ CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) defId(i->def(0), 14); srcId(i->src(0), 20); - srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26); + srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26); if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) code[0] |= 1 << 9; // dall @@ -1773,7 +1778,16 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i) switch (i->src(0).getFile()) { case FILE_MEMORY_GLOBAL: opc = 0x90000000; break; case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; - case FILE_MEMORY_SHARED: opc = 0xc9000000; break; + case FILE_MEMORY_SHARED: + if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { + if (targ->getChipset() >= NVISA_GK104_CHIPSET) + opc = 0xb8000000; + else + opc = 0xcc000000; + } else { + opc = 0xc9000000; + } + break; default: assert(!"invalid memory file"); opc = 0; @@ -1782,6 +1796,15 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i) code[0] = 0x00000005; code[1] = opc; + if (targ->getChipset() >= NVISA_GK104_CHIPSET) { + // Unlocked store on shared memory can fail. + if (i->src(0).getFile() == FILE_MEMORY_SHARED && + i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { + assert(i->defExists(0)); + defId(i->def(0), 8); + } + } + setAddressByFile(i->src(0)); srcId(i->src(1), 14); srcId(i->src(0).getIndirect(0), 20); @@ -1804,7 +1827,16 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) switch (i->src(0).getFile()) { case FILE_MEMORY_GLOBAL: opc = 0x80000000; break; case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; - case FILE_MEMORY_SHARED: opc = 0xc1000000; break; + case FILE_MEMORY_SHARED: + if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { + if (targ->getChipset() >= NVISA_GK104_CHIPSET) + opc = 0xa8000000; + else + opc = 0xc4000000; + } else { + opc = 0xc1000000; + } + break; case FILE_MEMORY_CONST: if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { emitMOV(i); // not sure if this is any better @@ -1820,6 +1852,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) } code[1] = opc; + if (i->src(0).getFile() == FILE_MEMORY_SHARED) { + if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { + assert(i->defExists(1)); + defId(i->def(1), 32 + 18); + } + } + defId(i->def(0), 14); setAddressByFile(i->src(0)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 52ac198221d..d06e9efa463 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -374,6 +374,7 @@ static nv50_ir::DataFile translateFile(uint file) case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; + case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; case TGSI_FILE_SAMPLER: case TGSI_FILE_NULL: default: @@ -858,6 +859,11 @@ public: }; std::vector<Resource> resources; + struct MemoryFile { + bool shared; + }; + std::vector<MemoryFile> memoryFiles; + private: int inferSysValDirection(unsigned sn) const; bool scanDeclaration(const struct tgsi_full_declaration *); @@ -904,6 +910,7 @@ bool Source::scanSource() textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1); //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1); + memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1); info->immd.bufSize = 0; @@ -1213,6 +1220,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) for (i = first; i <= last; ++i) textureViews[i].target = decl->SamplerView.Resource; break; + case TGSI_FILE_MEMORY: + for (i = first; i <= last; ++i) + memoryFiles[i].shared = decl->Declaration.Shared; + break; + case TGSI_FILE_NULL: case TGSI_FILE_TEMPORARY: for (i = first; i <= last; ++i) tempArrayId[i] = arrayId; @@ -1220,7 +1232,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair( first, last - first + 1))); break; - case TGSI_FILE_NULL: case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: @@ -1516,6 +1527,9 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) sym->reg.fileIndex = fileIdx; + if (tgsiFile == TGSI_FILE_MEMORY && code->memoryFiles[fileIdx].shared) + sym->setFile(FILE_MEMORY_SHARED); + if (idx >= 0) { if (sym->reg.file == FILE_SHADER_INPUT) sym->setOffset(info->in[idx].slot[c] * 4); @@ -1769,7 +1783,7 @@ Converter::acquireDst(int d, int c) int idx = dst.getIndex(0); int idx2d = dst.is2D() ? dst.getIndex(1) : 0; - if (dst.isMasked(c) || f == TGSI_FILE_BUFFER) + if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY) return NULL; if (dst.isIndirect(0) || @@ -2239,7 +2253,8 @@ Converter::handleLOAD(Value *dst0[4]) int c; std::vector<Value *> off, src, ldv, def; - if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER || + tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) { for (c = 0; c < 4; ++c) { if (!dst0[c]) continue; @@ -2248,9 +2263,10 @@ Converter::handleLOAD(Value *dst0[4]) Symbol *sym; if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) { off = NULL; - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, + tgsi.getSrc(1).getValueU32(0, info) + 4 * c); } else { - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 4 * c); } Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off); @@ -2337,7 +2353,8 @@ Converter::handleSTORE() int c; std::vector<Value *> off, src, dummy; - if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) { + if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER || + tgsi.getDst(0).getFile() == TGSI_FILE_MEMORY) { for (c = 0; c < 4; ++c) { if (!(tgsi.getDst(0).getMask() & (1 << c))) continue; @@ -2346,11 +2363,11 @@ Converter::handleSTORE() Value *off; if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) { off = NULL; - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, + sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, tgsi.getSrc(0).getValueU32(0, info) + 4 * c); } else { off = fetchSrc(0, 0); - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); + sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c); } Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c)); @@ -2422,7 +2439,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) std::vector<Value *> defv; LValue *dst = getScratch(); - if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER || + tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) { for (int c = 0; c < 4; ++c) { if (!dst0[c]) continue; @@ -2431,9 +2449,10 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) Value *off = fetchSrc(1, c), *off2 = NULL; Value *sym; if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info)); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, + tgsi.getSrc(1).getValueU32(c, info)); else - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0); if (tgsi.getSrc(0).isIndirect(0)) off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); if (subOp == NV50_IR_SUBOP_ATOM_CAS) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index e7cb54bc426..d181f1574f1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1033,6 +1033,100 @@ NVC0LoweringPass::handleSUQ(Instruction *suq) return true; } +void +NVC0LoweringPass::handleSharedATOM(Instruction *atom) +{ + assert(atom->src(0).getFile() == FILE_MEMORY_SHARED); + + BasicBlock *currBB = atom->bb; + BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false); + BasicBlock *joinBB = atom->bb->splitAfter(atom); + + bld.setPosition(currBB, true); + assert(!currBB->joinAt); + currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL); + + bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL); + currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE); + + bld.setPosition(tryLockAndSetBB, true); + + Instruction *ld = + bld.mkLoad(TYPE_U32, atom->getDef(0), + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL); + ld->setDef(1, bld.getSSA(1, FILE_PREDICATE)); + ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED; + + Value *stVal; + if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) { + // Read the old value, and write the new one. + stVal = atom->getSrc(1); + } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) { + CmpInstruction *set = + bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), + TYPE_U32, ld->getDef(0), atom->getSrc(1)); + set->setPredicate(CC_P, ld->getDef(1)); + + Instruction *selp = + bld.mkOp3(OP_SELP, TYPE_U32, bld.getSSA(), ld->getDef(0), + atom->getSrc(2), set->getDef(0)); + selp->src(2).mod = Modifier(NV50_IR_MOD_NOT); + selp->setPredicate(CC_P, ld->getDef(1)); + + stVal = selp->getDef(0); + } else { + operation op; + + switch (atom->subOp) { + case NV50_IR_SUBOP_ATOM_ADD: + op = OP_ADD; + break; + case NV50_IR_SUBOP_ATOM_AND: + op = OP_AND; + break; + case NV50_IR_SUBOP_ATOM_OR: + op = OP_OR; + break; + case NV50_IR_SUBOP_ATOM_XOR: + op = OP_XOR; + break; + case NV50_IR_SUBOP_ATOM_MIN: + op = OP_MIN; + break; + case NV50_IR_SUBOP_ATOM_MAX: + op = OP_MAX; + break; + default: + assert(0); + } + + Instruction *i = + bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0), + atom->getSrc(1)); + i->setPredicate(CC_P, ld->getDef(1)); + + stVal = i->getDef(0); + } + + Instruction *st = + bld.mkStore(OP_STORE, TYPE_U32, + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), + NULL, stVal); + st->setPredicate(CC_P, ld->getDef(1)); + st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED; + + // Loop until the lock is acquired. + bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1)); + tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK); + tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS); + bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL); + + bld.remove(atom); + + bld.setPosition(joinBB, false); + bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; +} + bool NVC0LoweringPass::handleATOM(Instruction *atom) { @@ -1044,8 +1138,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom) sv = SV_LBASE; break; case FILE_MEMORY_SHARED: - sv = SV_SBASE; - break; + handleSharedATOM(atom); + return true; default: assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); @@ -1072,6 +1166,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom) bool NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) { + if (cas->src(0).getFile() == FILE_MEMORY_SHARED) { + // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM(). + return false; + } + if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) return false; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 09ec7e69ddc..6eb8aff3036 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -105,6 +105,7 @@ protected: bool handleATOM(Instruction *); bool handleCasExch(Instruction *, bool needCctl); void handleSurfaceOpNVE4(TexInstruction *); + void handleSharedATOM(Instruction *); void checkPredicate(Instruction *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 05b8db4a3d8..6192c0665e4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1539,6 +1539,7 @@ private: void handleCVT_CVT(Instruction *); void handleCVT_EXTBF(Instruction *); void handleSUCLAMP(Instruction *); + void handleNEG(Instruction *); BuildUtil bld; }; @@ -1634,6 +1635,9 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb) return false; + if (src->getInsn()->saturate) + return false; + if (src->getInsn()->postFactor) return false; if (toOp == OP_SAD) { @@ -2011,6 +2015,34 @@ AlgebraicOpt::handleSUCLAMP(Instruction *insn) insn->setSrc(0, add->getSrc(s)); } +// NEG(AND(SET, 1)) -> SET +void +AlgebraicOpt::handleNEG(Instruction *i) { + Instruction *src = i->getSrc(0)->getInsn(); + ImmediateValue imm; + int b; + + if (isFloatType(i->sType) || !src || src->op != OP_AND) + return; + + if (src->src(0).getImmediate(imm)) + b = 1; + else if (src->src(1).getImmediate(imm)) + b = 0; + else + return; + + if (!imm.isInteger(1)) + return; + + Instruction *set = src->getSrc(b)->getInsn(); + if ((set->op == OP_SET || set->op == OP_SET_AND || + set->op == OP_SET_OR || set->op == OP_SET_XOR) && + !isFloatType(set->dType)) { + i->def(0).replace(set->getDef(0), false); + } +} + bool AlgebraicOpt::visit(BasicBlock *bb) { @@ -2048,6 +2080,9 @@ AlgebraicOpt::visit(BasicBlock *bb) case OP_SUCLAMP: handleSUCLAMP(i); break; + case OP_NEG: + handleNEG(i); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 47285a25c33..85f77047c5c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -198,6 +198,11 @@ static const char *atomSubOpStr[] = "add", "min", "max", "inc", "dec", "and", "or", "xor", "cas", "exch" }; +static const char *ldstSubOpStr[] = +{ + "", "lock", "unlock" +}; + static const char *DataTypeStr[] = { "-", @@ -537,6 +542,11 @@ void Instruction::print() const if (subOp < Elements(atomSubOpStr)) PRINT("%s ", atomSubOpStr[subOp]); break; + case OP_LOAD: + case OP_STORE: + if (subOp < Elements(ldstSubOpStr)) + PRINT("%s ", ldstSubOpStr[subOp]); + break; default: if (subOp) PRINT("(SUBOP:%u) ", subOp); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index de39be872e4..d877c253a17 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -968,6 +968,7 @@ GCRA::coalesce(ArrayList& insns) case 0xf0: case 0x100: case 0x110: + case 0x120: ret = doCoalesce(insns, JOIN_MASK_UNION); break; default: @@ -2231,6 +2232,7 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) texConstraintNVE0(tex); break; case 0x110: + case 0x120: texConstraintGM107(tex); break; default: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index ae0a8bb61d1..89d3a08937f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -143,6 +143,7 @@ Target *Target::create(unsigned int chipset) STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1); switch (chipset & ~0xf) { case 0x110: + case 0x120: return getTargetGM107(chipset); case 0xc0: case 0xd0: diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index b62889119c5..5be7a3dab76 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -280,6 +280,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -324,6 +325,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h new file mode 100644 index 00000000000..5d40624bb9e --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h @@ -0,0 +1,279 @@ +#ifndef G80_DEFS_XML +#define G80_DEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/envytools/envytools/ +git clone https://github.com/envytools/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/g80_texture.xml ( 18837 bytes, from 2016-01-14 23:54:22) +- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40) +- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-02 23:45:00) +- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42) +- /home/skeggsb/git/envytools/rnndb/nv_defs.xml ( 5388 bytes, from 2016-01-14 23:54:22) + +Copyright (C) 2006-2016 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- Ilia Mirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define G80_VSTATUS_IDLE 0x00000000 +#define G80_VSTATUS_BUSY 0x00000001 +#define G80_VSTATUS_UNK2 0x00000002 +#define G80_VSTATUS_WAITING 0x00000003 +#define G80_VSTATUS_BLOCKED 0x00000005 +#define G80_VSTATUS_FAULTED 0x00000006 +#define G80_VSTATUS_PAUSED 0x00000007 +#define G80_TIC_SOURCE_ZERO 0x00000000 +#define G80_TIC_SOURCE_R 0x00000002 +#define G80_TIC_SOURCE_G 0x00000003 +#define G80_TIC_SOURCE_B 0x00000004 +#define G80_TIC_SOURCE_A 0x00000005 +#define G80_TIC_SOURCE_ONE_INT 0x00000006 +#define G80_TIC_SOURCE_ONE_FLOAT 0x00000007 +#define G80_TIC_TYPE_SNORM 0x00000001 +#define G80_TIC_TYPE_UNORM 0x00000002 +#define G80_TIC_TYPE_SINT 0x00000003 +#define G80_TIC_TYPE_UINT 0x00000004 +#define G80_TIC_TYPE_SNORM_FORCE_FP16 0x00000005 +#define G80_TIC_TYPE_UNORM_FORCE_FP16 0x00000006 +#define G80_TIC_TYPE_FLOAT 0x00000007 +#define G80_SURFACE_FORMAT_BITMAP 0x0000001c +#define G80_SURFACE_FORMAT_UNK1D 0x0000001d +#define G80_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0 +#define G80_SURFACE_FORMAT_RGBA32_SINT 0x000000c1 +#define G80_SURFACE_FORMAT_RGBA32_UINT 0x000000c2 +#define G80_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3 +#define G80_SURFACE_FORMAT_RGBX32_SINT 0x000000c4 +#define G80_SURFACE_FORMAT_RGBX32_UINT 0x000000c5 +#define G80_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6 +#define G80_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7 +#define G80_SURFACE_FORMAT_RGBA16_SINT 0x000000c8 +#define G80_SURFACE_FORMAT_RGBA16_UINT 0x000000c9 +#define G80_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca +#define G80_SURFACE_FORMAT_RG32_FLOAT 0x000000cb +#define G80_SURFACE_FORMAT_RG32_SINT 0x000000cc +#define G80_SURFACE_FORMAT_RG32_UINT 0x000000cd +#define G80_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce +#define G80_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf +#define G80_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0 +#define G80_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1 +#define G80_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2 +#define G80_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5 +#define G80_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6 +#define G80_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7 +#define G80_SURFACE_FORMAT_RGBA8_SINT 0x000000d8 +#define G80_SURFACE_FORMAT_RGBA8_UINT 0x000000d9 +#define G80_SURFACE_FORMAT_RG16_UNORM 0x000000da +#define G80_SURFACE_FORMAT_RG16_SNORM 0x000000db +#define G80_SURFACE_FORMAT_RG16_SINT 0x000000dc +#define G80_SURFACE_FORMAT_RG16_UINT 0x000000dd +#define G80_SURFACE_FORMAT_RG16_FLOAT 0x000000de +#define G80_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df +#define G80_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0 +#define G80_SURFACE_FORMAT_R32_SINT 0x000000e3 +#define G80_SURFACE_FORMAT_R32_UINT 0x000000e4 +#define G80_SURFACE_FORMAT_R32_FLOAT 0x000000e5 +#define G80_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6 +#define G80_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7 +#define G80_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8 +#define G80_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9 +#define G80_SURFACE_FORMAT_RG8_UNORM 0x000000ea +#define G80_SURFACE_FORMAT_RG8_SNORM 0x000000eb +#define G80_SURFACE_FORMAT_RG8_SINT 0x000000ec +#define G80_SURFACE_FORMAT_RG8_UINT 0x000000ed +#define G80_SURFACE_FORMAT_R16_UNORM 0x000000ee +#define G80_SURFACE_FORMAT_R16_SNORM 0x000000ef +#define G80_SURFACE_FORMAT_R16_SINT 0x000000f0 +#define G80_SURFACE_FORMAT_R16_UINT 0x000000f1 +#define G80_SURFACE_FORMAT_R16_FLOAT 0x000000f2 +#define G80_SURFACE_FORMAT_R8_UNORM 0x000000f3 +#define G80_SURFACE_FORMAT_R8_SNORM 0x000000f4 +#define G80_SURFACE_FORMAT_R8_SINT 0x000000f5 +#define G80_SURFACE_FORMAT_R8_UINT 0x000000f6 +#define G80_SURFACE_FORMAT_A8_UNORM 0x000000f7 +#define G80_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8 +#define G80_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9 +#define G80_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa +#define G80_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb +#define G80_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc +#define G80_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd +#define G80_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe +#define G80_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff +#define G80_ZETA_FORMAT_Z32_FLOAT 0x0000000a +#define G80_ZETA_FORMAT_Z16_UNORM 0x00000013 +#define G80_ZETA_FORMAT_S8_Z24_UNORM 0x00000014 +#define G80_ZETA_FORMAT_Z24_X8_UNORM 0x00000015 +#define G80_ZETA_FORMAT_Z24_S8_UNORM 0x00000016 +#define G80_ZETA_FORMAT_Z24_C8_UNORM 0x00000018 +#define G80_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019 +#define G80_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d +#define G80_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e +#define G80_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f +#define GK104_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002 +#define GK104_IMAGE_FORMAT_RGBA32_SINT 0x00000003 +#define GK104_IMAGE_FORMAT_RGBA32_UINT 0x00000004 +#define GK104_IMAGE_FORMAT_RGBA16_UNORM 0x00000008 +#define GK104_IMAGE_FORMAT_RGBA16_SNORM 0x00000009 +#define GK104_IMAGE_FORMAT_RGBA16_SINT 0x0000000a +#define GK104_IMAGE_FORMAT_RGBA16_UINT 0x0000000b +#define GK104_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c +#define GK104_IMAGE_FORMAT_RG32_FLOAT 0x0000000d +#define GK104_IMAGE_FORMAT_RG32_SINT 0x0000000e +#define GK104_IMAGE_FORMAT_RG32_UINT 0x0000000f +#define GK104_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013 +#define GK104_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015 +#define GK104_IMAGE_FORMAT_RGBA8_UNORM 0x00000018 +#define GK104_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a +#define GK104_IMAGE_FORMAT_RGBA8_SINT 0x0000001b +#define GK104_IMAGE_FORMAT_RGBA8_UINT 0x0000001c +#define GK104_IMAGE_FORMAT_RG16_UNORM 0x0000001d +#define GK104_IMAGE_FORMAT_RG16_SNORM 0x0000001e +#define GK104_IMAGE_FORMAT_RG16_SINT 0x0000001f +#define GK104_IMAGE_FORMAT_RG16_UINT 0x00000020 +#define GK104_IMAGE_FORMAT_RG16_FLOAT 0x00000021 +#define GK104_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024 +#define GK104_IMAGE_FORMAT_R32_SINT 0x00000027 +#define GK104_IMAGE_FORMAT_R32_UINT 0x00000028 +#define GK104_IMAGE_FORMAT_R32_FLOAT 0x00000029 +#define GK104_IMAGE_FORMAT_RG8_UNORM 0x0000002e +#define GK104_IMAGE_FORMAT_RG8_SNORM 0x0000002f +#define GK104_IMAGE_FORMAT_RG8_SINT 0x00000030 +#define GK104_IMAGE_FORMAT_RG8_UINT 0x00000031 +#define GK104_IMAGE_FORMAT_R16_UNORM 0x00000032 +#define GK104_IMAGE_FORMAT_R16_SNORM 0x00000033 +#define GK104_IMAGE_FORMAT_R16_SINT 0x00000034 +#define GK104_IMAGE_FORMAT_R16_UINT 0x00000035 +#define GK104_IMAGE_FORMAT_R16_FLOAT 0x00000036 +#define GK104_IMAGE_FORMAT_R8_UNORM 0x00000037 +#define GK104_IMAGE_FORMAT_R8_SNORM 0x00000038 +#define GK104_IMAGE_FORMAT_R8_SINT 0x00000039 +#define GK104_IMAGE_FORMAT_R8_UINT 0x0000003a +#define G80_PGRAPH_DATA_ERROR_INVALID_OPERATION 0x00000003 +#define G80_PGRAPH_DATA_ERROR_INVALID_VALUE 0x00000004 +#define G80_PGRAPH_DATA_ERROR_INVALID_ENUM 0x00000005 +#define G80_PGRAPH_DATA_ERROR_INVALID_OBJECT 0x00000008 +#define G80_PGRAPH_DATA_ERROR_READ_ONLY_OBJECT 0x00000009 +#define G80_PGRAPH_DATA_ERROR_SUPERVISOR_OBJECT 0x0000000a +#define G80_PGRAPH_DATA_ERROR_INVALID_ADDRESS_ALIGNMENT 0x0000000b +#define G80_PGRAPH_DATA_ERROR_INVALID_BITFIELD 0x0000000c +#define G80_PGRAPH_DATA_ERROR_BEGIN_END_ACTIVE 0x0000000d +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_BACK_OVER_LIMIT 0x0000000e +#define G80_PGRAPH_DATA_ERROR_VIEWPORT_ID_NEEDS_GP 0x0000000f +#define G80_PGRAPH_DATA_ERROR_RT_DOUBLE_BIND 0x00000010 +#define G80_PGRAPH_DATA_ERROR_RT_TYPES_MISMATCH 0x00000011 +#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_ZETA 0x00000012 +#define G80_PGRAPH_DATA_ERROR_FP_TOO_FEW_REGS 0x00000015 +#define G80_PGRAPH_DATA_ERROR_ZETA_FORMAT_CSAA_MISMATCH 0x00000016 +#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_MSAA 0x00000017 +#define G80_PGRAPH_DATA_ERROR_FP_INTERPOLANT_START_OVER_LIMIT 0x00000018 +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_LAYER_OVER_LIMIT 0x00000019 +#define G80_PGRAPH_DATA_ERROR_RT_INVALID_ALIGNMENT 0x0000001a +#define G80_PGRAPH_DATA_ERROR_SAMPLER_OVER_LIMIT 0x0000001b +#define G80_PGRAPH_DATA_ERROR_TEXTURE_OVER_LIMIT 0x0000001c +#define G80_PGRAPH_DATA_ERROR_GP_TOO_MANY_OUTPUTS 0x0000001e +#define G80_PGRAPH_DATA_ERROR_RT_BPP128_WITH_MS8 0x0000001f +#define G80_PGRAPH_DATA_ERROR_Z_OUT_OF_BOUNDS 0x00000021 +#define G80_PGRAPH_DATA_ERROR_XY_OUT_OF_BOUNDS 0x00000023 +#define G80_PGRAPH_DATA_ERROR_VP_ZERO_INPUTS 0x00000024 +#define G80_PGRAPH_DATA_ERROR_CP_MORE_PARAMS_THAN_SHARED 0x00000027 +#define G80_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_STRIPED 0x00000028 +#define G80_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_PACKED 0x00000029 +#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_WARPS 0x0000002a +#define G80_PGRAPH_DATA_ERROR_CP_BLOCK_SIZE_MISMATCH 0x0000002b +#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_LOCAL_WARPS 0x0000002c +#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_STACK_WARPS 0x0000002d +#define G80_PGRAPH_DATA_ERROR_CP_NO_BLOCKDIM_LATCH 0x0000002e +#define G80_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH 0x00000031 +#define G80_PGRAPH_DATA_ERROR_ENG2D_OPERATION_ILLEGAL_FOR_DST_FORMAT 0x00000033 +#define G80_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH_B 0x00000034 +#define G80_PGRAPH_DATA_ERROR_PRIMITIVE_ID_NEEDS_GP 0x0000003f +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_VIEWPORT_OVER_LIMIT 0x00000044 +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_FRONT_OVER_LIMIT 0x00000045 +#define G80_PGRAPH_DATA_ERROR_LAYER_ID_NEEDS_GP 0x00000046 +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_CLIP_OVER_LIMIT 0x00000047 +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_PTSZ_OVER_LIMIT 0x00000048 +#define G80_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_IN 0x00000051 +#define G80_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_OUT 0x00000053 +#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_ZETA_GF100 0x00000098 +#define G80_PGRAPH_DATA_ERROR_ENG2D_UNALIGNED_PITCH_GF100 0x000000a5 +#define G80_CG_IDLE_TIMEOUT__MASK 0x0000003f +#define G80_CG_IDLE_TIMEOUT__SHIFT 0 +#define G80_CG_IDLE_TIMEOUT_ENABLE 0x00000040 +#define G80_CG_INTERFACE_REENABLE_TIME__MASK 0x000f0000 +#define G80_CG_INTERFACE_REENABLE_TIME__SHIFT 16 +#define G80_CG_THROTTLE_DUTY_M1__MASK 0x00f00000 +#define G80_CG_THROTTLE_DUTY_M1__SHIFT 20 +#define G80_CG_DELAY__MASK 0x0f000000 +#define G80_CG_DELAY__SHIFT 24 +#define G80_CG_CLOCK_THROTTLE_ENABLE 0x10000000 +#define G80_CG_THROTTLE_MODE__MASK 0x20000000 +#define G80_CG_THROTTLE_MODE__SHIFT 29 +#define G80_CG_THROTTLE_MODE_AUTO 0x00000000 +#define G80_CG_THROTTLE_MODE_MANUAL 0x20000000 +#define G80_CG_INTERFACE_THROTTLE_ENABLE 0x40000000 +#define G80_QUERY__SIZE 0x00000010 +#define G80_QUERY_COUNTER 0x00000000 + +#define G80_QUERY_RES 0x00000004 + +#define G80_QUERY_TIME 0x00000008 + + +#endif /* G80_DEFS_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h b/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h new file mode 100644 index 00000000000..542963ca452 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h @@ -0,0 +1,451 @@ +#ifndef G80_TEXTURE_XML +#define G80_TEXTURE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/envytools/envytools/ +git clone https://github.com/envytools/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/g80_texture.xml ( 18837 bytes, from 2016-01-14 23:54:22) +- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40) +- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-02 23:45:00) +- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42) +- /home/skeggsb/git/envytools/rnndb/nv_defs.xml ( 5388 bytes, from 2016-01-14 23:54:22) + +Copyright (C) 2006-2016 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- Ilia Mirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define G80_TSC_WRAP_WRAP 0x00000000 +#define G80_TSC_WRAP_MIRROR 0x00000001 +#define G80_TSC_WRAP_CLAMP_TO_EDGE 0x00000002 +#define G80_TSC_WRAP_BORDER 0x00000003 +#define G80_TSC_WRAP_CLAMP_OGL 0x00000004 +#define G80_TSC_WRAP_MIRROR_ONCE_CLAMP_TO_EDGE 0x00000005 +#define G80_TSC_WRAP_MIRROR_ONCE_BORDER 0x00000006 +#define G80_TSC_WRAP_MIRROR_ONCE_CLAMP_OGL 0x00000007 +#define G80_TIC__SIZE 0x00000020 +#define G80_TIC_0 0x00000000 +#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__MASK 0x80000000 +#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT 31 +#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED_NO 0x00000000 +#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED_YES 0x80000000 +#define G84_TIC_0_PACK_COMPONENTS 0x40000000 +#define G80_TIC_0_W_SOURCE__MASK 0x38000000 +#define G80_TIC_0_W_SOURCE__SHIFT 27 +#define G80_TIC_0_Z_SOURCE__MASK 0x07000000 +#define G80_TIC_0_Z_SOURCE__SHIFT 24 +#define G80_TIC_0_Y_SOURCE__MASK 0x00e00000 +#define G80_TIC_0_Y_SOURCE__SHIFT 21 +#define G80_TIC_0_X_SOURCE__MASK 0x001c0000 +#define G80_TIC_0_X_SOURCE__SHIFT 18 +#define G80_TIC_0_A_DATA_TYPE__MASK 0x00038000 +#define G80_TIC_0_A_DATA_TYPE__SHIFT 15 +#define G80_TIC_0_B_DATA_TYPE__MASK 0x00007000 +#define G80_TIC_0_B_DATA_TYPE__SHIFT 12 +#define G80_TIC_0_G_DATA_TYPE__MASK 0x00000e00 +#define G80_TIC_0_G_DATA_TYPE__SHIFT 9 +#define G80_TIC_0_R_DATA_TYPE__MASK 0x000001c0 +#define G80_TIC_0_R_DATA_TYPE__SHIFT 6 +#define G80_TIC_0_COMPONENTS_SIZES__MASK 0x0000003f +#define G80_TIC_0_COMPONENTS_SIZES__SHIFT 0 +#define G80_TIC_0_COMPONENTS_SIZES_R32_G32_B32_A32 0x00000001 +#define GF100_TIC_0_COMPONENTS_SIZES_R32_G32_B32 0x00000002 +#define G80_TIC_0_COMPONENTS_SIZES_R16_G16_B16_A16 0x00000003 +#define G80_TIC_0_COMPONENTS_SIZES_R32_G32 0x00000004 +#define G80_TIC_0_COMPONENTS_SIZES_R32_B24G8 0x00000005 +#define G80_TIC_0_COMPONENTS_SIZES_X8B8G8R8 0x00000007 +#define G80_TIC_0_COMPONENTS_SIZES_A8B8G8R8 0x00000008 +#define G80_TIC_0_COMPONENTS_SIZES_A2B10G10R10 0x00000009 +#define G80_TIC_0_COMPONENTS_SIZES_R16_G16 0x0000000c +#define G80_TIC_0_COMPONENTS_SIZES_G8R24 0x0000000d +#define G80_TIC_0_COMPONENTS_SIZES_G24R8 0x0000000e +#define G80_TIC_0_COMPONENTS_SIZES_R32 0x0000000f +#define G80_TIC_0_COMPONENTS_SIZES_A4B4G4R4 0x00000012 +#define G80_TIC_0_COMPONENTS_SIZES_A5B5G5R1 0x00000013 +#define G80_TIC_0_COMPONENTS_SIZES_A1B5G5R5 0x00000014 +#define G80_TIC_0_COMPONENTS_SIZES_B5G6R5 0x00000015 +#define G80_TIC_0_COMPONENTS_SIZES_B6G5R5 0x00000016 +#define G80_TIC_0_COMPONENTS_SIZES_G8R8 0x00000018 +#define G80_TIC_0_COMPONENTS_SIZES_R16 0x0000001b +#define G80_TIC_0_COMPONENTS_SIZES_Y8_VIDEO 0x0000001c +#define G80_TIC_0_COMPONENTS_SIZES_R8 0x0000001d +#define G80_TIC_0_COMPONENTS_SIZES_G4R4 0x0000001e +#define G80_TIC_0_COMPONENTS_SIZES_R1 0x0000001f +#define G80_TIC_0_COMPONENTS_SIZES_E5B9G9R9_SHAREDEXP 0x00000020 +#define G80_TIC_0_COMPONENTS_SIZES_BF10GF11RF11 0x00000021 +#define G80_TIC_0_COMPONENTS_SIZES_G8B8G8R8 0x00000022 +#define G80_TIC_0_COMPONENTS_SIZES_B8G8R8G8 0x00000023 +#define G80_TIC_0_COMPONENTS_SIZES_DXT1 0x00000024 +#define G80_TIC_0_COMPONENTS_SIZES_DXT23 0x00000025 +#define G80_TIC_0_COMPONENTS_SIZES_DXT45 0x00000026 +#define G80_TIC_0_COMPONENTS_SIZES_DXN1 0x00000027 +#define G80_TIC_0_COMPONENTS_SIZES_DXN2 0x00000028 +#define GF100_TIC_0_COMPONENTS_SIZES_BC6H_SF16 0x00000010 +#define GF100_TIC_0_COMPONENTS_SIZES_BC6H_UF16 0x00000011 +#define GF100_TIC_0_COMPONENTS_SIZES_BC7U 0x00000017 +#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGB 0x00000006 +#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGB_PTA 0x0000000a +#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGBA 0x0000000b +#define GK20A_TIC_0_COMPONENTS_SIZES_EAC 0x00000019 +#define GK20A_TIC_0_COMPONENTS_SIZES_EACX2 0x0000001a +#define G80_TIC_0_COMPONENTS_SIZES_Z24S8 0x00000029 +#define G80_TIC_0_COMPONENTS_SIZES_X8Z24 0x0000002a +#define G80_TIC_0_COMPONENTS_SIZES_S8Z24 0x0000002b +#define G80_TIC_0_COMPONENTS_SIZES_X4V4Z24__COV4R4V 0x0000002c +#define G80_TIC_0_COMPONENTS_SIZES_X4V4Z24__COV8R8V 0x0000002d +#define G80_TIC_0_COMPONENTS_SIZES_V8Z24__COV4R12V 0x0000002e +#define G80_TIC_0_COMPONENTS_SIZES_ZF32 0x0000002f +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X24S8 0x00000030 +#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV4R4V 0x00000031 +#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV8R8V 0x00000032 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV4R4V 0x00000033 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV8R8V 0x00000034 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV4R4V 0x00000035 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV8R8V 0x00000036 +#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV4R12V 0x00000037 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV4R12V 0x00000038 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV4R12V 0x00000039 +#define G200_TIC_0_COMPONENTS_SIZES_Z16 0x0000003a +#define G200_TIC_0_COMPONENTS_SIZES_V8Z24__COV8R24V 0x0000003b +#define G200_TIC_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV8R24V 0x0000003c +#define G200_TIC_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV8R24V 0x0000003d +#define G200_TIC_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV8R24V 0x0000003e +#define G80_TIC_0_COMPONENTS_SIZES__MASK 0x0000003f +#define G80_TIC_0_COMPONENTS_SIZES__SHIFT 0 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_4X4 0x00000000 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_5X4 0x00000010 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_5X5 0x00000001 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_6X5 0x00000011 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_6X6 0x00000002 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X5 0x00000015 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X6 0x00000012 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X8 0x00000004 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X5 0x00000016 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X6 0x00000017 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X8 0x00000013 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X10 0x00000005 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_12X10 0x00000014 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_12X12 0x00000006 + +#define G80_TIC_1 0x00000004 +#define G80_TIC_1_OFFSET_LOWER__MASK 0xffffffff +#define G80_TIC_1_OFFSET_LOWER__SHIFT 0 + +#define G80_TIC_2 0x00000008 +#define G80_TIC_2_OFFSET_UPPER__MASK 0x000000ff +#define G80_TIC_2_OFFSET_UPPER__SHIFT 0 +#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_LSB__MASK 0x00000300 +#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_LSB__SHIFT 8 +#define G80_TIC_2_SRGB_CONVERSION 0x00000400 +#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_MSB 0x00000800 +#define G80_TIC_2_LOD_ANISO_QUALITY_2 0x00001000 +#define G80_TIC_2_COLOR_KEY_OP 0x00002000 +#define G80_TIC_2_TEXTURE_TYPE__MASK 0x0003c000 +#define G80_TIC_2_TEXTURE_TYPE__SHIFT 14 +#define G80_TIC_2_TEXTURE_TYPE_ONE_D 0x00000000 +#define G80_TIC_2_TEXTURE_TYPE_TWO_D 0x00004000 +#define G80_TIC_2_TEXTURE_TYPE_THREE_D 0x00008000 +#define G80_TIC_2_TEXTURE_TYPE_CUBEMAP 0x0000c000 +#define G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY 0x00010000 +#define G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY 0x00014000 +#define G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER 0x00018000 +#define G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP 0x0001c000 +#define G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY 0x00020000 +#define G80_TIC_2_LAYOUT__MASK 0x00040000 +#define G80_TIC_2_LAYOUT__SHIFT 18 +#define G80_TIC_2_LAYOUT_BLOCKLINEAR 0x00000000 +#define G80_TIC_2_LAYOUT_PITCH 0x00040000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MASK 0x00380000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__SHIFT 19 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MIN 0x00000000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MAX 0x00000000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_ONE 0x00000000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_TWO 0x00080000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_FOUR 0x00100000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_EIGHT 0x00180000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_SIXTEEN 0x00200000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_THIRTYTWO 0x00280000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT__MASK 0x01c00000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT__SHIFT 22 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_ONE 0x00000000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_TWO 0x00400000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_FOUR 0x00800000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_EIGHT 0x00c00000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_SIXTEEN 0x01000000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_THIRTYTWO 0x01400000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH__MASK 0x0e000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH__SHIFT 25 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_ONE 0x00000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_TWO 0x02000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_FOUR 0x04000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_EIGHT 0x06000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_SIXTEEN 0x08000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_THIRTYTWO 0x0a000000 +#define G80_TIC_2_SECTOR_PROMOTION__MASK 0x30000000 +#define G80_TIC_2_SECTOR_PROMOTION__SHIFT 28 +#define G80_TIC_2_SECTOR_PROMOTION_NO_PROMOTION 0x00000000 +#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_2_V 0x10000000 +#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_2_H 0x20000000 +#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_4 0x30000000 +#define G80_TIC_2_BORDER_SOURCE__MASK 0x40000000 +#define G80_TIC_2_BORDER_SOURCE__SHIFT 30 +#define G80_TIC_2_BORDER_SOURCE_TEXTURE 0x00000000 +#define G80_TIC_2_BORDER_SOURCE_COLOR 0x40000000 +#define G80_TIC_2_NORMALIZED_COORDS 0x80000000 + +#define G80_TIC_3 0x0000000c +#define G80_TIC_3_PITCH__MASK 0x000fffff +#define G80_TIC_3_PITCH__SHIFT 0 +#define G80_TIC_3_LOD_ANISO_QUALITY__MASK 0x00100000 +#define G80_TIC_3_LOD_ANISO_QUALITY__SHIFT 20 +#define G80_TIC_3_LOD_ANISO_QUALITY_LOW 0x00000000 +#define G80_TIC_3_LOD_ANISO_QUALITY_HIGH 0x00100000 +#define G80_TIC_3_LOD_ISO_QUALITY__MASK 0x00200000 +#define G80_TIC_3_LOD_ISO_QUALITY__SHIFT 21 +#define G80_TIC_3_LOD_ISO_QUALITY_LOW 0x00000000 +#define G80_TIC_3_LOD_ISO_QUALITY_HIGH 0x00200000 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER__MASK 0x00c00000 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER__SHIFT 22 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_NONE 0x00000000 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_ONE 0x00400000 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_TWO 0x00800000 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_SQRT 0x00c00000 +#define G80_TIC_3_ANISO_SPREAD_SCALE__MASK 0x1f000000 +#define G80_TIC_3_ANISO_SPREAD_SCALE__SHIFT 24 +#define G80_TIC_3_USE_HEADER_OPT_CONTROL 0x20000000 +#define G84_TIC_3_ANISO_CLAMP_AT_MAX_LOD 0x40000000 +#define G84_TIC_3_ANISO_POW2 0x80000000 + +#define G80_TIC_4 0x00000010 +#define G80_TIC_4_WIDTH__MASK 0x3fffffff +#define G80_TIC_4_WIDTH__SHIFT 0 +#define G80_TIC_4_DEPTH_TEXTURE 0x40000000 +#define G84_TIC_4_USE_TEXTURE_HEADER_V2 0x80000000 + +#define G80_TIC_5 0x00000014 +#define G80_TIC_5_MAP_MIP_LEVEL__MASK 0xf0000000 +#define G80_TIC_5_MAP_MIP_LEVEL__SHIFT 28 +#define G80_TIC_5_DEPTH__MASK 0x0fff0000 +#define G80_TIC_5_DEPTH__SHIFT 16 +#define G80_TIC_5_HEIGHT__MASK 0x0000ffff +#define G80_TIC_5_HEIGHT__SHIFT 0 + +#define G80_TIC_6 0x00000018 +#define G80_TIC_6_TRILIN_OPT__MASK 0x0000001f +#define G80_TIC_6_TRILIN_OPT__SHIFT 0 +#define G80_TIC_6_MIP_LOD_BIAS__MASK 0x0003ffe0 +#define G80_TIC_6_MIP_LOD_BIAS__SHIFT 5 +#define G80_TIC_6_MIP_LOD_BIAS__RADIX 0x00000008 +#define G80_TIC_6_ANISO_BIAS__MASK 0x00780000 +#define G80_TIC_6_ANISO_BIAS__SHIFT 19 +#define G80_TIC_6_ANISO_BIAS__RADIX 0x00000004 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC__MASK 0x01800000 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC__SHIFT 23 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_HALF 0x00000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_ONE 0x00800000 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_TWO 0x01000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_MAX 0x01800000 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC__MASK 0x06000000 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC__SHIFT 25 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_HALF 0x00000000 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_ONE 0x02000000 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_TWO 0x04000000 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_MAX 0x06000000 +#define G80_TIC_6_MAX_ANISOTROPY__MASK 0x38000000 +#define G80_TIC_6_MAX_ANISOTROPY__SHIFT 27 +#define G80_TIC_6_MAX_ANISOTROPY_1_TO_1 0x00000000 +#define G80_TIC_6_MAX_ANISOTROPY_2_TO_1 0x08000000 +#define G80_TIC_6_MAX_ANISOTROPY_4_TO_1 0x10000000 +#define G80_TIC_6_MAX_ANISOTROPY_6_TO_1 0x18000000 +#define G80_TIC_6_MAX_ANISOTROPY_8_TO_1 0x20000000 +#define G80_TIC_6_MAX_ANISOTROPY_10_TO_1 0x28000000 +#define G80_TIC_6_MAX_ANISOTROPY_12_TO_1 0x30000000 +#define G80_TIC_6_MAX_ANISOTROPY_16_TO_1 0x38000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER__MASK 0xc0000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER__SHIFT 30 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_NONE 0x00000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_CONST_ONE 0x40000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO 0x80000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_SQRT 0xc0000000 + +#define G80_TIC_7 0x0000001c +#define G80_TIC_7_COLOR_KEY_VALUE__MASK 0xffffffff +#define G80_TIC_7_COLOR_KEY_VALUE__SHIFT 0 + +#define G84_TIC_7 0x0000001c +#define G84_TIC_7_RES_VIEW_MIN_MIP_LEVEL__MASK 0x0000000f +#define G84_TIC_7_RES_VIEW_MIN_MIP_LEVEL__SHIFT 0 +#define G84_TIC_7_RES_VIEW_MAX_MIP_LEVEL__MASK 0x000000f0 +#define G84_TIC_7_RES_VIEW_MAX_MIP_LEVEL__SHIFT 4 +#define G84_TIC_7_HEIGHT_MSB 0x00000100 +#define G84_TIC_7_MULTI_SAMPLE_COUNT__MASK 0x0000f000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT__SHIFT 12 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_1X1 0x00000000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X1 0x00001000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2 0x00002000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_4X2 0x00003000 +#define GT215_TIC_7_MULTI_SAMPLE_COUNT_4X2_D3D 0x00004000 +#define GT215_TIC_7_MULTI_SAMPLE_COUNT_2X1_D3D 0x00005000 +#define GF100_TIC_7_MULTI_SAMPLE_COUNT_4X4 0x00006000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2_VC_4 0x00008000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2_VC_12 0x00009000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_4X2_VC_8 0x0000a000 +#define GF100_TIC_7_MULTI_SAMPLE_COUNT_4X2_VC_24 0x0000b000 +#define G84_TIC_7_MIN_LOD_CLAMP__MASK 0x0fff0000 +#define G84_TIC_7_MIN_LOD_CLAMP__SHIFT 16 +#define G84_TIC_7_MIN_LOD_CLAMP__RADIX 0x00000008 +#define G84_TIC_7_DEPTH_MSB__MASK 0x70000000 +#define G84_TIC_7_DEPTH_MSB__SHIFT 28 + +#define G80_TSC__SIZE 0x00000020 +#define G80_TSC_0 0x00000000 +#define G80_TSC_0_ADDRESS_U__MASK 0x00000007 +#define G80_TSC_0_ADDRESS_U__SHIFT 0 +#define G80_TSC_0_ADDRESS_V__MASK 0x00000038 +#define G80_TSC_0_ADDRESS_V__SHIFT 3 +#define G80_TSC_0_ADDRESS_P__MASK 0x000001c0 +#define G80_TSC_0_ADDRESS_P__SHIFT 6 +#define G80_TSC_0_DEPTH_COMPARE 0x00000200 +#define G80_TSC_0_DEPTH_COMPARE_FUNC__MASK 0x00001c00 +#define G80_TSC_0_DEPTH_COMPARE_FUNC__SHIFT 10 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_NEVER 0x00000000 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_LESS 0x00000400 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_EQUAL 0x00000800 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_LEQUAL 0x00000c00 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_GREATER 0x00001000 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_NOTEQUAL 0x00001400 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_GEQUAL 0x00001800 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_ALWAYS 0x00001c00 +#define G80_TSC_0_SRGB_CONVERSION 0x00002000 +#define G80_TSC_0_FONT_FILTER_WIDTH__MASK 0x0001c000 +#define G80_TSC_0_FONT_FILTER_WIDTH__SHIFT 14 +#define G80_TSC_0_FONT_FILTER_HEIGHT__MASK 0x000e0000 +#define G80_TSC_0_FONT_FILTER_HEIGHT__SHIFT 17 +#define G80_TSC_0_MAX_ANISOTROPY__MASK 0x00700000 +#define G80_TSC_0_MAX_ANISOTROPY__SHIFT 20 +#define G80_TSC_0_MAX_ANISOTROPY_1_TO_1 0x00000000 +#define G80_TSC_0_MAX_ANISOTROPY_2_TO_1 0x00100000 +#define G80_TSC_0_MAX_ANISOTROPY_4_TO_1 0x00200000 +#define G80_TSC_0_MAX_ANISOTROPY_6_TO_1 0x00300000 +#define G80_TSC_0_MAX_ANISOTROPY_8_TO_1 0x00400000 +#define G80_TSC_0_MAX_ANISOTROPY_10_TO_1 0x00500000 +#define G80_TSC_0_MAX_ANISOTROPY_12_TO_1 0x00600000 +#define G80_TSC_0_MAX_ANISOTROPY_16_TO_1 0x00700000 + +#define G80_TSC_1 0x00000004 +#define G80_TSC_1_MAG_FILTER__MASK 0x00000003 +#define G80_TSC_1_MAG_FILTER__SHIFT 0 +#define G80_TSC_1_MAG_FILTER_NEAREST 0x00000001 +#define G80_TSC_1_MAG_FILTER_LINEAR 0x00000002 +#define G80_TSC_1_MIN_FILTER__MASK 0x00000030 +#define G80_TSC_1_MIN_FILTER__SHIFT 4 +#define G80_TSC_1_MIN_FILTER_NEAREST 0x00000010 +#define G80_TSC_1_MIN_FILTER_LINEAR 0x00000020 +#define G80_TSC_1_MIP_FILTER__MASK 0x000000c0 +#define G80_TSC_1_MIP_FILTER__SHIFT 6 +#define G80_TSC_1_MIP_FILTER_NONE 0x00000040 +#define G80_TSC_1_MIP_FILTER_NEAREST 0x00000080 +#define G80_TSC_1_MIP_FILTER_LINEAR 0x000000c0 +#define GK104_TSC_1_CUBEMAP_INTERFACE_FILTERING 0x00000200 +#define G80_TSC_1_MIP_LOD_BIAS__MASK 0x01fff000 +#define G80_TSC_1_MIP_LOD_BIAS__SHIFT 12 +#define G80_TSC_1_MIP_LOD_BIAS__RADIX 0x00000008 +#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION__MASK 0x02000000 +#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION__SHIFT 25 +#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION_USE_HEADER_SETTING 0x00000000 +#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION_FORCE_UNNORMALIZED_COORDS 0x02000000 +#define G80_TSC_1_TRILIN_OPT__MASK 0x7c000000 +#define G80_TSC_1_TRILIN_OPT__SHIFT 26 + +#define G80_TSC_2 0x00000008 +#define G80_TSC_2_MIN_LOD_CLAMP__MASK 0x00000fff +#define G80_TSC_2_MIN_LOD_CLAMP__SHIFT 0 +#define G80_TSC_2_MIN_LOD_CLAMP__RADIX 0x00000008 +#define G80_TSC_2_MAX_LOD_CLAMP__MASK 0x00fff000 +#define G80_TSC_2_MAX_LOD_CLAMP__SHIFT 12 +#define G80_TSC_2_MAX_LOD_CLAMP__RADIX 0x00000008 +#define G80_TSC_2_SRGB_BORDER_COLOR_R__MASK 0xff000000 +#define G80_TSC_2_SRGB_BORDER_COLOR_R__SHIFT 24 + +#define G80_TSC_3 0x0000000c +#define G80_TSC_3_SRGB_BORDER_COLOR_G__MASK 0x000ff000 +#define G80_TSC_3_SRGB_BORDER_COLOR_G__SHIFT 12 +#define G80_TSC_3_SRGB_BORDER_COLOR_B__MASK 0x0ff00000 +#define G80_TSC_3_SRGB_BORDER_COLOR_B__SHIFT 20 + +#define G80_TSC_4 0x00000010 +#define G80_TSC_4_BORDER_COLOR_R__MASK 0xffffffff +#define G80_TSC_4_BORDER_COLOR_R__SHIFT 0 + +#define G80_TSC_5 0x00000014 +#define G80_TSC_5_BORDER_COLOR_G__MASK 0xffffffff +#define G80_TSC_5_BORDER_COLOR_G__SHIFT 0 + +#define G80_TSC_6 0x00000018 +#define G80_TSC_6_BORDER_COLOR_B__MASK 0xffffffff +#define G80_TSC_6_BORDER_COLOR_B__SHIFT 0 + +#define G80_TSC_7 0x0000001c +#define G80_TSC_7_BORDER_COLOR_A__MASK 0xffffffff +#define G80_TSC_7_BORDER_COLOR_A__SHIFT 0 + + +#endif /* G80_TEXTURE_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c index 6d23fd66945..04488d6d0a6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -270,13 +270,11 @@ nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label) } void -nv50_launch_grid(struct pipe_context *pipe, - const uint *block_layout, const uint *grid_layout, - uint32_t label, const void *input) +nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; - unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2]; + unsigned block_size = info->block[0] * info->block[1] * info->block[2]; struct nv50_program *cp = nv50->compprog; bool ret; @@ -286,10 +284,10 @@ nv50_launch_grid(struct pipe_context *pipe, return; } - nv50_compute_upload_input(nv50, input); + nv50_compute_upload_input(nv50, info->input); BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1); - PUSH_DATA (push, nv50_compute_find_symbol(nv50, label)); + PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc)); BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1); PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); @@ -298,14 +296,14 @@ nv50_launch_grid(struct pipe_context *pipe, /* grid/block setup */ BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2); - PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]); - PUSH_DATA (push, block_layout[2]); + PUSH_DATA (push, info->block[1] << 16 | info->block[0]); + PUSH_DATA (push, info->block[2]); BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1); PUSH_DATA (push, 1 << 16 | block_size); BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1); - PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]); + PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]); BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 342ec96d62c..2620d03b999 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -153,6 +153,7 @@ struct nv50_context { uint32_t textures_coherent[3]; struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; unsigned num_samplers[3]; + bool seamless_cube_map; uint8_t num_so_targets; uint8_t so_targets_dirty; @@ -322,7 +323,6 @@ nv98_video_buffer_create(struct pipe_context *pipe, /* nv50_compute.c */ void -nv50_launch_grid(struct pipe_context *, const uint *, const uint *, - uint32_t, const void *); +nv50_launch_grid(struct pipe_context *, const struct pipe_grid_info *); #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h deleted file mode 100644 index aad2a851691..00000000000 --- a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h +++ /dev/null @@ -1,263 +0,0 @@ -#ifndef NV50_DEFS_XML -#define NV50_DEFS_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/envytools/envytools/ -git clone https://github.com/envytools/envytools.git - -The rules-ng-ng source files this header was generated from are: -- rnndb/g80_defs.xml ( 18175 bytes, from 2014-09-25 06:32:11) -- rnndb/copyright.xml ( 6452 bytes, from 2013-05-14 03:57:49) -- rnndb/nvchipsets.xml ( 2759 bytes, from 2014-10-05 01:51:02) - -Copyright (C) 2006-2014 by the following authors: -- Artur Huillet <[email protected]> (ahuillet) -- Ben Skeggs (darktama, darktama_) -- B. R. <[email protected]> (koala_br) -- Carlos Martin <[email protected]> (carlosmn) -- Christoph Bumiller <[email protected]> (calim, chrisbmr) -- Dawid Gajownik <[email protected]> (gajownik) -- Dmitry Baryshkov -- Dmitry Eremin-Solenikov <[email protected]> (lumag) -- EdB <[email protected]> (edb_) -- Erik Waling <[email protected]> (erikwaling) -- Francisco Jerez <[email protected]> (curro) -- imirkin <[email protected]> (imirkin) -- jb17bsome <[email protected]> (jb17bsome) -- Jeremy Kolb <[email protected]> (kjeremy) -- Laurent Carlier <[email protected]> (lordheavy) -- Luca Barbieri <[email protected]> (lb, lb1) -- Maarten Maathuis <[email protected]> (stillunknown) -- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin) -- Mark Carey <[email protected]> (careym) -- Matthieu Castet <[email protected]> (mat-c) -- nvidiaman <[email protected]> (nvidiaman) -- Patrice Mandin <[email protected]> (pmandin, pmdata) -- Pekka Paalanen <[email protected]> (pq, ppaalanen) -- Peter Popov <[email protected]> (ironpeter) -- Richard Hughes <[email protected]> (hughsient) -- Rudi Cilibrasi <[email protected]> (cilibrar) -- Serge Martin -- Simon Raffeiner -- Stephane Loeuillet <[email protected]> (leroutier) -- Stephane Marchesin <[email protected]> (marcheu) -- sturmflut <[email protected]> (sturmflut) -- Sylvain Munaut <[email protected]> -- Victor Stinner <[email protected]> (haypo) -- Wladmir van der Laan <[email protected]> (miathan6) -- Younes Manton <[email protected]> (ymanton) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -#define NV50_VSTATUS_IDLE 0x00000000 -#define NV50_VSTATUS_BUSY 0x00000001 -#define NV50_VSTATUS_UNK2 0x00000002 -#define NV50_VSTATUS_WAITING 0x00000003 -#define NV50_VSTATUS_BLOCKED 0x00000005 -#define NV50_VSTATUS_FAULTED 0x00000006 -#define NV50_VSTATUS_PAUSED 0x00000007 -#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c -#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d -#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0 -#define NV50_SURFACE_FORMAT_RGBA32_SINT 0x000000c1 -#define NV50_SURFACE_FORMAT_RGBA32_UINT 0x000000c2 -#define NV50_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3 -#define NV50_SURFACE_FORMAT_RGBX32_SINT 0x000000c4 -#define NV50_SURFACE_FORMAT_RGBX32_UINT 0x000000c5 -#define NV50_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6 -#define NV50_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7 -#define NV50_SURFACE_FORMAT_RGBA16_SINT 0x000000c8 -#define NV50_SURFACE_FORMAT_RGBA16_UINT 0x000000c9 -#define NV50_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca -#define NV50_SURFACE_FORMAT_RG32_FLOAT 0x000000cb -#define NV50_SURFACE_FORMAT_RG32_SINT 0x000000cc -#define NV50_SURFACE_FORMAT_RG32_UINT 0x000000cd -#define NV50_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce -#define NV50_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf -#define NV50_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0 -#define NV50_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1 -#define NV50_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2 -#define NV50_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5 -#define NV50_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6 -#define NV50_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7 -#define NV50_SURFACE_FORMAT_RGBA8_SINT 0x000000d8 -#define NV50_SURFACE_FORMAT_RGBA8_UINT 0x000000d9 -#define NV50_SURFACE_FORMAT_RG16_UNORM 0x000000da -#define NV50_SURFACE_FORMAT_RG16_SNORM 0x000000db -#define NV50_SURFACE_FORMAT_RG16_SINT 0x000000dc -#define NV50_SURFACE_FORMAT_RG16_UINT 0x000000dd -#define NV50_SURFACE_FORMAT_RG16_FLOAT 0x000000de -#define NV50_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df -#define NV50_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0 -#define NV50_SURFACE_FORMAT_R32_SINT 0x000000e3 -#define NV50_SURFACE_FORMAT_R32_UINT 0x000000e4 -#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5 -#define NV50_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6 -#define NV50_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7 -#define NV50_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8 -#define NV50_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9 -#define NV50_SURFACE_FORMAT_RG8_UNORM 0x000000ea -#define NV50_SURFACE_FORMAT_RG8_SNORM 0x000000eb -#define NV50_SURFACE_FORMAT_RG8_SINT 0x000000ec -#define NV50_SURFACE_FORMAT_RG8_UINT 0x000000ed -#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee -#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef -#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0 -#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1 -#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2 -#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3 -#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4 -#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5 -#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6 -#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7 -#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8 -#define NV50_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9 -#define NV50_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa -#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb -#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc -#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd -#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe -#define NV50_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff -#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a -#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013 -#define NV50_ZETA_FORMAT_S8_Z24_UNORM 0x00000014 -#define NV50_ZETA_FORMAT_Z24_X8_UNORM 0x00000015 -#define NV50_ZETA_FORMAT_Z24_S8_UNORM 0x00000016 -#define NV50_ZETA_FORMAT_Z24_C8_UNORM 0x00000018 -#define NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019 -#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d -#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e -#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f -#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002 -#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003 -#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004 -#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008 -#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009 -#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a -#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b -#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c -#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d -#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e -#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f -#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013 -#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015 -#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018 -#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a -#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b -#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c -#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d -#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e -#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f -#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020 -#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021 -#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024 -#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027 -#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028 -#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029 -#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e -#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f -#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030 -#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031 -#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032 -#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033 -#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034 -#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035 -#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036 -#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037 -#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038 -#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039 -#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a -#define NV50_PGRAPH_DATA_ERROR_INVALID_OPERATION 0x00000003 -#define NV50_PGRAPH_DATA_ERROR_INVALID_VALUE 0x00000004 -#define NV50_PGRAPH_DATA_ERROR_INVALID_ENUM 0x00000005 -#define NV50_PGRAPH_DATA_ERROR_INVALID_OBJECT 0x00000008 -#define NV50_PGRAPH_DATA_ERROR_READ_ONLY_OBJECT 0x00000009 -#define NV50_PGRAPH_DATA_ERROR_SUPERVISOR_OBJECT 0x0000000a -#define NV50_PGRAPH_DATA_ERROR_INVALID_ADDRESS_ALIGNMENT 0x0000000b -#define NV50_PGRAPH_DATA_ERROR_INVALID_BITFIELD 0x0000000c -#define NV50_PGRAPH_DATA_ERROR_BEGIN_END_ACTIVE 0x0000000d -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_BACK_OVER_LIMIT 0x0000000e -#define NV50_PGRAPH_DATA_ERROR_VIEWPORT_ID_NEEDS_GP 0x0000000f -#define NV50_PGRAPH_DATA_ERROR_RT_DOUBLE_BIND 0x00000010 -#define NV50_PGRAPH_DATA_ERROR_RT_TYPES_MISMATCH 0x00000011 -#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_ZETA 0x00000012 -#define NV50_PGRAPH_DATA_ERROR_FP_TOO_FEW_REGS 0x00000015 -#define NV50_PGRAPH_DATA_ERROR_ZETA_FORMAT_CSAA_MISMATCH 0x00000016 -#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_MSAA 0x00000017 -#define NV50_PGRAPH_DATA_ERROR_FP_INTERPOLANT_START_OVER_LIMIT 0x00000018 -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_LAYER_OVER_LIMIT 0x00000019 -#define NV50_PGRAPH_DATA_ERROR_RT_INVALID_ALIGNMENT 0x0000001a -#define NV50_PGRAPH_DATA_ERROR_SAMPLER_OVER_LIMIT 0x0000001b -#define NV50_PGRAPH_DATA_ERROR_TEXTURE_OVER_LIMIT 0x0000001c -#define NV50_PGRAPH_DATA_ERROR_GP_TOO_MANY_OUTPUTS 0x0000001e -#define NV50_PGRAPH_DATA_ERROR_RT_BPP128_WITH_MS8 0x0000001f -#define NV50_PGRAPH_DATA_ERROR_Z_OUT_OF_BOUNDS 0x00000021 -#define NV50_PGRAPH_DATA_ERROR_XY_OUT_OF_BOUNDS 0x00000023 -#define NV50_PGRAPH_DATA_ERROR_VP_ZERO_INPUTS 0x00000024 -#define NV50_PGRAPH_DATA_ERROR_CP_MORE_PARAMS_THAN_SHARED 0x00000027 -#define NV50_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_STRIPED 0x00000028 -#define NV50_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_PACKED 0x00000029 -#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_WARPS 0x0000002a -#define NV50_PGRAPH_DATA_ERROR_CP_BLOCK_SIZE_MISMATCH 0x0000002b -#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_LOCAL_WARPS 0x0000002c -#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_STACK_WARPS 0x0000002d -#define NV50_PGRAPH_DATA_ERROR_CP_NO_BLOCKDIM_LATCH 0x0000002e -#define NV50_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH 0x00000031 -#define NV50_PGRAPH_DATA_ERROR_ENG2D_OPERATION_ILLEGAL_FOR_DST_FORMAT 0x00000033 -#define NV50_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH_B 0x00000034 -#define NV50_PGRAPH_DATA_ERROR_PRIMITIVE_ID_NEEDS_GP 0x0000003f -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_VIEWPORT_OVER_LIMIT 0x00000044 -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_FRONT_OVER_LIMIT 0x00000045 -#define NV50_PGRAPH_DATA_ERROR_LAYER_ID_NEEDS_GP 0x00000046 -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_CLIP_OVER_LIMIT 0x00000047 -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_PTSZ_OVER_LIMIT 0x00000048 -#define NV50_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_IN 0x00000051 -#define NV50_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_OUT 0x00000053 -#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_ZETA_GF100 0x00000098 -#define NV50_PGRAPH_DATA_ERROR_ENG2D_UNALIGNED_PITCH_GF100 0x000000a5 -#define NV50_CG_IDLE_TIMEOUT__MASK 0x0000003f -#define NV50_CG_IDLE_TIMEOUT__SHIFT 0 -#define NV50_CG_IDLE_TIMEOUT_ENABLE 0x00000040 -#define NV50_CG_INTERFACE_REENABLE_TIME__MASK 0x000f0000 -#define NV50_CG_INTERFACE_REENABLE_TIME__SHIFT 16 -#define NV50_CG_THROTTLE_DUTY_M1__MASK 0x00f00000 -#define NV50_CG_THROTTLE_DUTY_M1__SHIFT 20 -#define NV50_CG_DELAY__MASK 0x0f000000 -#define NV50_CG_DELAY__SHIFT 24 -#define NV50_CG_CLOCK_THROTTLE_ENABLE 0x10000000 -#define NV50_CG_THROTTLE_MODE__MASK 0x20000000 -#define NV50_CG_THROTTLE_MODE__SHIFT 29 -#define NV50_CG_THROTTLE_MODE_AUTO 0x00000000 -#define NV50_CG_THROTTLE_MODE_MANUAL 0x20000000 -#define NV50_CG_INTERFACE_THROTTLE_ENABLE 0x40000000 -#define NV50_QUERY__SIZE 0x00000010 -#define NV50_QUERY_COUNTER 0x00000000 - -#define NV50_QUERY_RES 0x00000004 - -#define NV50_QUERY_TIME 0x00000008 - - -#endif /* NV50_DEFS_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c index 49a93bf1d91..717067cf2f7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c @@ -27,8 +27,8 @@ # include "nv50/nv50_screen.h" # include "nv50/nv50_3d.xml.h" #endif -#include "nv50/nv50_texture.xml.h" -#include "nv50/nv50_defs.xml.h" +#include "nv50/g80_texture.xml.h" +#include "nv50/g80_defs.xml.h" #include "pipe/p_defines.h" @@ -39,10 +39,8 @@ * C: render target (color), blendable only on nvc0 * D: scanout/display target, blendable * Z: depth/stencil - * V: vertex fetch * I: image / surface, implies T */ -#define U_V PIPE_BIND_VERTEX_BUFFER #define U_T PIPE_BIND_SAMPLER_VIEW #define U_I PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE #define U_TR PIPE_BIND_RENDER_TARGET | U_T @@ -51,38 +49,273 @@ #define U_IB PIPE_BIND_BLENDABLE | U_IR #define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB #define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T -#define U_TV U_V | U_T -#define U_TRV U_V | U_TR -#define U_IRV U_V | U_IR -#define U_TBV U_V | U_TB -#define U_IBV U_V | U_IB -#define U_TDV U_V | U_TD #if NOUVEAU_DRIVER == 0xc0 # define U_TC U_TB # define U_IC U_IB -# define U_TCV U_TBV -# define U_ICV U_IBV # define U_t U_T -# define U_tV U_TV #else # define U_TC U_TR # define U_IC U_IR -# define U_TCV U_TRV -# define U_ICV U_IRV # define U_t 0 -# define U_tV U_V #endif -#define NV50_ZETA_FORMAT_NONE 0 -#define NV50_SURFACE_FORMAT_NONE 0 +#define G80_ZETA_FORMAT_NONE 0 +#define G80_SURFACE_FORMAT_NONE 0 -/* for vertex buffers: */ -#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8 -#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16 -#define NV50_TIC_0_FMT_32_32_32 NVC0_TIC_0_FMT_32_32_32 -#define NV50_TIC_0_FMT_BPTC NVC0_TIC_0_FMT_BPTC -#define NV50_TIC_0_FMT_BPTC_FLOAT NVC0_TIC_0_FMT_BPTC_FLOAT -#define NV50_TIC_0_FMT_BPTC_UFLOAT NVC0_TIC_0_FMT_BPTC_UFLOAT +#define SF_A(sz) G80_TIC_0_COMPONENTS_SIZES_##sz +#define SF_B(sz) G200_TIC_0_COMPONENTS_SIZES_##sz +#define SF_C(sz) GF100_TIC_0_COMPONENTS_SIZES_##sz +#define SF(c, pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \ + [PIPE_FORMAT_##pf] = { \ + sf, { \ + SF_##c(sz), \ + G80_TIC_TYPE_##t0, \ + G80_TIC_TYPE_##t1, \ + G80_TIC_TYPE_##t2, \ + G80_TIC_TYPE_##t3, \ + G80_TIC_SOURCE_##r, \ + G80_TIC_SOURCE_##g, \ + G80_TIC_SOURCE_##b, \ + G80_TIC_SOURCE_##a, \ + }, U_##u \ + } + +#define C4(c, p, n, r, g, b, a, t, s, u) \ + SF(c, p, G80_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u) + +#define ZX(c, p, n, r, g, b, a, t, s, u) \ + SF(c, p, G80_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) +#define ZS(c, p, n, r, g, b, a, t, s, u) \ + SF(c, p, G80_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) +#define SZ(c, p, n, r, g, b, a, t, s, u) \ + SF(c, p, G80_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u) +#define SX(c, p, r, s, u) \ + SF(c, p, G80_ZETA_FORMAT_NONE, \ + r, r, r, r, UINT, UINT, UINT, UINT, s, u) + +#define F3(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, g, b, ONE_FLOAT, t, s, u) +#define I3(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, g, b, ONE_INT, t, s, u) + +#define F2(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, g, ZERO, ONE_FLOAT, t, s, u) +#define I2(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, g, ZERO, ONE_INT, t, s, u) + +#define F1(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u) +#define I1(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, ZERO, ZERO, ONE_INT, t, s, u) + +#define A1(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, ZERO, ZERO, ZERO, a, t, s, u) + +#if NOUVEAU_DRIVER == 0xc0 +const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = +#else +const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = +#endif +{ + C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM, A8B8G8R8, TD), + F3(A, B8G8R8X8_UNORM, BGRX8_UNORM, B, G, R, xx, UNORM, A8B8G8R8, TD), + C4(A, B8G8R8A8_SRGB, BGRA8_SRGB, B, G, R, A, UNORM, A8B8G8R8, TD), + F3(A, B8G8R8X8_SRGB, BGRX8_SRGB, B, G, R, xx, UNORM, A8B8G8R8, TD), + C4(A, R8G8B8A8_UNORM, RGBA8_UNORM, R, G, B, A, UNORM, A8B8G8R8, IB), + F3(A, R8G8B8X8_UNORM, RGBX8_UNORM, R, G, B, xx, UNORM, A8B8G8R8, TB), + C4(A, R8G8B8A8_SRGB, RGBA8_SRGB, R, G, B, A, UNORM, A8B8G8R8, TB), + F3(A, R8G8B8X8_SRGB, RGBX8_SRGB, R, G, B, xx, UNORM, A8B8G8R8, TB), + + ZX(B, Z16_UNORM, Z16_UNORM, R, R, R, xx, UNORM, Z16, TZ), + ZX(A, Z32_FLOAT, Z32_FLOAT, R, R, R, xx, FLOAT, ZF32, TZ), + ZX(A, Z24X8_UNORM, Z24_X8_UNORM, R, R, R, xx, UNORM, X8Z24, TZ), + SZ(A, X8Z24_UNORM, S8_Z24_UNORM, G, G, G, xx, UNORM, Z24S8, TZ), + ZS(A, Z24_UNORM_S8_UINT, Z24_S8_UNORM, R, R, R, xx, UNORM, S8Z24, TZ), + SZ(A, S8_UINT_Z24_UNORM, S8_Z24_UNORM, G, G, G, xx, UNORM, Z24S8, TZ), + ZS(A, Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, R, R, R, xx, FLOAT, ZF32_X24S8, TZ), + + SX(A, S8_UINT, R, R8, T), + SX(A, X24S8_UINT, G, S8Z24, T), + SX(A, S8X24_UINT, R, Z24S8, T), + SX(A, X32_S8X24_UINT, G, ZF32_X24S8, T), + + F3(A, B5G6R5_UNORM, B5G6R5_UNORM, B, G, R, xx, UNORM, B5G6R5, TD), + C4(A, B5G5R5A1_UNORM, BGR5_A1_UNORM, B, G, R, A, UNORM, A1B5G5R5, TD), + F3(A, B5G5R5X1_UNORM, BGR5_X1_UNORM, B, G, R, xx, UNORM, A1B5G5R5, TD), + C4(A, B4G4R4A4_UNORM, NONE, B, G, R, A, UNORM, A4B4G4R4, T), + F3(A, B4G4R4X4_UNORM, NONE, B, G, R, xx, UNORM, A4B4G4R4, T), + F3(A, R9G9B9E5_FLOAT, NONE, R, G, B, xx, FLOAT, E5B9G9R9_SHAREDEXP, T), + + C4(A, R10G10B10A2_UNORM, RGB10_A2_UNORM, R, G, B, A, UNORM, A2B10G10R10, IB), + C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, TD), + C4(A, R10G10B10A2_SNORM, NONE, R, G, B, A, SNORM, A2B10G10R10, T), + C4(A, B10G10R10A2_SNORM, NONE, B, G, R, A, SNORM, A2B10G10R10, T), + C4(A, R10G10B10A2_UINT, RGB10_A2_UINT, R, G, B, A, UINT, A2B10G10R10, TR), + C4(A, B10G10R10A2_UINT, RGB10_A2_UINT, B, G, R, A, UINT, A2B10G10R10, T), + + F3(A, R11G11B10_FLOAT, R11G11B10_FLOAT, R, G, B, xx, FLOAT, BF10GF11RF11, IB), + + F3(A, L8_UNORM, R8_UNORM, R, R, R, xx, UNORM, R8, TB), + F3(A, L8_SRGB, R8_UNORM, R, R, R, xx, UNORM, R8, TB), + F3(A, L8_SNORM, R8_SNORM, R, R, R, xx, SNORM, R8, TC), + I3(A, L8_SINT, R8_SINT, R, R, R, xx, SINT, R8, TR), + I3(A, L8_UINT, R8_UINT, R, R, R, xx, UINT, R8, TR), + F3(A, L16_UNORM, R16_UNORM, R, R, R, xx, UNORM, R16, TC), + F3(A, L16_SNORM, R16_SNORM, R, R, R, xx, SNORM, R16, TC), + F3(A, L16_FLOAT, R16_FLOAT, R, R, R, xx, FLOAT, R16, TB), + I3(A, L16_SINT, R16_SINT, R, R, R, xx, SINT, R16, TR), + I3(A, L16_UINT, R16_UINT, R, R, R, xx, UINT, R16, TR), + F3(A, L32_FLOAT, R32_FLOAT, R, R, R, xx, FLOAT, R32, TB), + I3(A, L32_SINT, R32_SINT, R, R, R, xx, SINT, R32, TR), + I3(A, L32_UINT, R32_UINT, R, R, R, xx, UINT, R32, TR), + + C4(A, I8_UNORM, R8_UNORM, R, R, R, R, UNORM, R8, TR), + C4(A, I8_SNORM, R8_SNORM, R, R, R, R, SNORM, R8, TR), + C4(A, I8_SINT, R8_SINT, R, R, R, R, SINT, R8, TR), + C4(A, I8_UINT, R8_UINT, R, R, R, R, UINT, R8, TR), + C4(A, I16_UNORM, R16_UNORM, R, R, R, R, UNORM, R16, TR), + C4(A, I16_SNORM, R16_SNORM, R, R, R, R, SNORM, R16, TR), + C4(A, I16_FLOAT, R16_FLOAT, R, R, R, R, FLOAT, R16, TR), + C4(A, I16_SINT, R16_SINT, R, R, R, R, SINT, R16, TR), + C4(A, I16_UINT, R16_UINT, R, R, R, R, UINT, R16, TR), + C4(A, I32_FLOAT, R32_FLOAT, R, R, R, R, FLOAT, R32, TR), + C4(A, I32_SINT, R32_SINT, R, R, R, R, SINT, R32, TR), + C4(A, I32_UINT, R32_UINT, R, R, R, R, UINT, R32, TR), + + A1(A, A8_UNORM, A8_UNORM, xx, xx, xx, R, UNORM, R8, TB), + A1(A, A8_SNORM, R8_SNORM, xx, xx, xx, R, SNORM, R8, T), + A1(A, A8_SINT, R8_SINT, xx, xx, xx, R, SINT, R8, T), + A1(A, A8_UINT, R8_UINT, xx, xx, xx, R, UINT, R8, T), + A1(A, A16_UNORM, R16_UNORM, xx, xx, xx, R, UNORM, R16, T), + A1(A, A16_SNORM, R16_SNORM, xx, xx, xx, R, SNORM, R16, T), + A1(A, A16_FLOAT, R16_FLOAT, xx, xx, xx, R, FLOAT, R16, T), + A1(A, A16_SINT, R16_SINT, xx, xx, xx, R, SINT, R16, T), + A1(A, A16_UINT, R16_UINT, xx, xx, xx, R, UINT, R16, T), + A1(A, A32_FLOAT, R32_FLOAT, xx, xx, xx, R, FLOAT, R32, T), + A1(A, A32_SINT, R32_SINT, xx, xx, xx, R, SINT, R32, T), + A1(A, A32_UINT, R32_UINT, xx, xx, xx, R, UINT, R32, T), + + C4(A, L4A4_UNORM, NONE, R, R, R, G, UNORM, G4R4, T), + C4(A, L8A8_UNORM, RG8_UNORM, R, R, R, G, UNORM, G8R8, T), + C4(A, L8A8_SNORM, RG8_SNORM, R, R, R, G, SNORM, G8R8, T), + C4(A, L8A8_SRGB, RG8_UNORM, R, R, R, G, UNORM, G8R8, T), + C4(A, L8A8_SINT, RG8_SINT, R, R, R, G, SINT, G8R8, T), + C4(A, L8A8_UINT, RG8_UINT, R, R, R, G, UINT, G8R8, T), + C4(A, L16A16_UNORM, RG16_UNORM, R, R, R, G, UNORM, R16_G16, T), + C4(A, L16A16_SNORM, RG16_SNORM, R, R, R, G, SNORM, R16_G16, T), + C4(A, L16A16_FLOAT, RG16_FLOAT, R, R, R, G, FLOAT, R16_G16, T), + C4(A, L16A16_SINT, RG16_SINT, R, R, R, G, SINT, R16_G16, T), + C4(A, L16A16_UINT, RG16_UINT, R, R, R, G, UINT, R16_G16, T), + C4(A, L32A32_FLOAT, RG32_FLOAT, R, R, R, G, FLOAT, R32_G32, T), + C4(A, L32A32_SINT, RG32_SINT, R, R, R, G, SINT, R32_G32, T), + C4(A, L32A32_UINT, RG32_UINT, R, R, R, G, UINT, R32_G32, T), + + F3(A, DXT1_RGB, NONE, R, G, B, xx, UNORM, DXT1, T), + F3(A, DXT1_SRGB, NONE, R, G, B, xx, UNORM, DXT1, T), + C4(A, DXT1_RGBA, NONE, R, G, B, A, UNORM, DXT1, T), + C4(A, DXT1_SRGBA, NONE, R, G, B, A, UNORM, DXT1, T), + C4(A, DXT3_RGBA, NONE, R, G, B, A, UNORM, DXT23, T), + C4(A, DXT3_SRGBA, NONE, R, G, B, A, UNORM, DXT23, T), + C4(A, DXT5_RGBA, NONE, R, G, B, A, UNORM, DXT45, T), + C4(A, DXT5_SRGBA, NONE, R, G, B, A, UNORM, DXT45, T), + + F1(A, RGTC1_UNORM, NONE, R, xx, xx, xx, UNORM, DXN1, T), + F1(A, RGTC1_SNORM, NONE, R, xx, xx, xx, SNORM, DXN1, T), + F2(A, RGTC2_UNORM, NONE, R, G, xx, xx, UNORM, DXN2, T), + F2(A, RGTC2_SNORM, NONE, R, G, xx, xx, SNORM, DXN2, T), + F3(A, LATC1_UNORM, NONE, R, R, R, xx, UNORM, DXN1, T), + F3(A, LATC1_SNORM, NONE, R, R, R, xx, SNORM, DXN1, T), + C4(A, LATC2_UNORM, NONE, R, R, R, G, UNORM, DXN2, T), + C4(A, LATC2_SNORM, NONE, R, R, R, G, SNORM, DXN2, T), + + C4(C, BPTC_RGBA_UNORM, NONE, R, G, B, A, UNORM, BC7U, t), + C4(C, BPTC_SRGBA, NONE, R, G, B, A, UNORM, BC7U, t), + F3(C, BPTC_RGB_FLOAT, NONE, R, G, B, xx, FLOAT, BC6H_SF16, t), + F3(C, BPTC_RGB_UFLOAT, NONE, R, G, B, xx, FLOAT, BC6H_UF16, t), + + C4(A, R32G32B32A32_FLOAT, RGBA32_FLOAT, R, G, B, A, FLOAT, R32_G32_B32_A32, IB), + C4(A, R32G32B32A32_UNORM, NONE, R, G, B, A, UNORM, R32_G32_B32_A32, T), + C4(A, R32G32B32A32_SNORM, NONE, R, G, B, A, SNORM, R32_G32_B32_A32, T), + C4(A, R32G32B32A32_SINT, RGBA32_SINT, R, G, B, A, SINT, R32_G32_B32_A32, IR), + C4(A, R32G32B32A32_UINT, RGBA32_UINT, R, G, B, A, UINT, R32_G32_B32_A32, IR), + F3(A, R32G32B32X32_FLOAT, RGBX32_FLOAT, R, G, B, xx, FLOAT, R32_G32_B32_A32, TB), + I3(A, R32G32B32X32_SINT, RGBX32_SINT, R, G, B, xx, SINT, R32_G32_B32_A32, TR), + I3(A, R32G32B32X32_UINT, RGBX32_UINT, R, G, B, xx, UINT, R32_G32_B32_A32, TR), + + F3(C, R32G32B32_FLOAT, NONE, R, G, B, xx, FLOAT, R32_G32_B32, t), + I3(C, R32G32B32_SINT, NONE, R, G, B, xx, SINT, R32_G32_B32, t), + I3(C, R32G32B32_UINT, NONE, R, G, B, xx, UINT, R32_G32_B32, t), + + F2(A, R32G32_FLOAT, RG32_FLOAT, R, G, xx, xx, FLOAT, R32_G32, IB), + F2(A, R32G32_UNORM, NONE, R, G, xx, xx, UNORM, R32_G32, T), + F2(A, R32G32_SNORM, NONE, R, G, xx, xx, SNORM, R32_G32, T), + I2(A, R32G32_SINT, RG32_SINT, R, G, xx, xx, SINT, R32_G32, IR), + I2(A, R32G32_UINT, RG32_UINT, R, G, xx, xx, UINT, R32_G32, IR), + + F1(A, R32_FLOAT, R32_FLOAT, R, xx, xx, xx, FLOAT, R32, IB), + F1(A, R32_UNORM, NONE, R, xx, xx, xx, UNORM, R32, T), + F1(A, R32_SNORM, NONE, R, xx, xx, xx, SNORM, R32, T), + I1(A, R32_SINT, R32_SINT, R, xx, xx, xx, SINT, R32, IR), + I1(A, R32_UINT, R32_UINT, R, xx, xx, xx, UINT, R32, IR), + + C4(A, R16G16B16A16_FLOAT, RGBA16_FLOAT, R, G, B, A, FLOAT, R16_G16_B16_A16, IB), + C4(A, R16G16B16A16_UNORM, RGBA16_UNORM, R, G, B, A, UNORM, R16_G16_B16_A16, IC), + C4(A, R16G16B16A16_SNORM, RGBA16_SNORM, R, G, B, A, SNORM, R16_G16_B16_A16, IC), + C4(A, R16G16B16A16_SINT, RGBA16_SINT, R, G, B, A, SINT, R16_G16_B16_A16, IR), + C4(A, R16G16B16A16_UINT, RGBA16_UINT, R, G, B, A, UINT, R16_G16_B16_A16, IR), + F3(A, R16G16B16X16_FLOAT, RGBX16_FLOAT, R, G, B, xx, FLOAT, R16_G16_B16_A16, TB), + F3(A, R16G16B16X16_UNORM, RGBA16_UNORM, R, G, B, xx, UNORM, R16_G16_B16_A16, T), + F3(A, R16G16B16X16_SNORM, RGBA16_SNORM, R, G, B, xx, SNORM, R16_G16_B16_A16, T), + I3(A, R16G16B16X16_SINT, RGBA16_SINT, R, G, B, xx, SINT, R16_G16_B16_A16, T), + I3(A, R16G16B16X16_UINT, RGBA16_UINT, R, G, B, xx, UINT, R16_G16_B16_A16, T), + + F2(A, R16G16_FLOAT, RG16_FLOAT, R, G, xx, xx, FLOAT, R16_G16, IB), + F2(A, R16G16_UNORM, RG16_UNORM, R, G, xx, xx, UNORM, R16_G16, IC), + F2(A, R16G16_SNORM, RG16_SNORM, R, G, xx, xx, SNORM, R16_G16, IC), + I2(A, R16G16_SINT, RG16_SINT, R, G, xx, xx, SINT, R16_G16, IR), + I2(A, R16G16_UINT, RG16_UINT, R, G, xx, xx, UINT, R16_G16, IR), + + F1(A, R16_FLOAT, R16_FLOAT, R, xx, xx, xx, FLOAT, R16, IB), + F1(A, R16_UNORM, R16_UNORM, R, xx, xx, xx, UNORM, R16, IC), + F1(A, R16_SNORM, R16_SNORM, R, xx, xx, xx, SNORM, R16, IC), + I1(A, R16_SINT, R16_SINT, R, xx, xx, xx, SINT, R16, IR), + I1(A, R16_UINT, R16_UINT, R, xx, xx, xx, UINT, R16, IR), + + C4(A, R8G8B8A8_SNORM, RGBA8_SNORM, R, G, B, A, SNORM, A8B8G8R8, IC), + C4(A, R8G8B8A8_SINT, RGBA8_SINT, R, G, B, A, SINT, A8B8G8R8, IR), + C4(A, R8G8B8A8_UINT, RGBA8_UINT, R, G, B, A, UINT, A8B8G8R8, IR), + F3(A, R8G8B8X8_SNORM, RGBA8_SNORM, R, G, B, xx, SNORM, A8B8G8R8, T), + I3(A, R8G8B8X8_SINT, RGBA8_SINT, R, G, B, xx, SINT, A8B8G8R8, T), + I3(A, R8G8B8X8_UINT, RGBA8_UINT, R, G, B, xx, UINT, A8B8G8R8, T), + + F2(A, R8G8_UNORM, RG8_UNORM, R, G, xx, xx, UNORM, G8R8, IB), + F2(A, R8G8_SNORM, RG8_SNORM, R, G, xx, xx, SNORM, G8R8, IC), + I2(A, R8G8_SINT, RG8_SINT, R, G, xx, xx, SINT, G8R8, IR), + I2(A, R8G8_UINT, RG8_UINT, R, G, xx, xx, UINT, G8R8, IR), + + F1(A, R8_UNORM, R8_UNORM, R, xx, xx, xx, UNORM, R8, IB), + F1(A, R8_SNORM, R8_SNORM, R, xx, xx, xx, SNORM, R8, IC), + I1(A, R8_SINT, R8_SINT, R, xx, xx, xx, SINT, R8, IR), + I1(A, R8_UINT, R8_UINT, R, xx, xx, xx, UINT, R8, IR), + + F3(A, R8G8_B8G8_UNORM, NONE, R, G, B, xx, UNORM, G8B8G8R8, T), + F3(A, G8R8_B8R8_UNORM, NONE, G, R, B, xx, UNORM, G8B8G8R8, T), + F3(A, G8R8_G8B8_UNORM, NONE, R, G, B, xx, UNORM, B8G8R8G8, T), + F3(A, R8G8_R8B8_UNORM, NONE, G, R, B, xx, UNORM, B8G8R8G8, T), + + F1(A, R1_UNORM, BITMAP, R, xx, xx, xx, UNORM, R1, T), + + C4(A, R4A4_UNORM, NONE, R, ZERO, ZERO, G, UNORM, G4R4, T), + C4(A, R8A8_UNORM, NONE, R, ZERO, ZERO, G, UNORM, G8R8, T), + C4(A, A4R4_UNORM, NONE, G, ZERO, ZERO, R, UNORM, G4R4, T), + C4(A, A8R8_UNORM, NONE, G, ZERO, ZERO, R, UNORM, G8R8, T), + + SF(A, R8SG8SB8UX8U_NORM, 0, R, G, B, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, A8B8G8R8, T), + SF(A, R5SG5SB6U_NORM, 0, R, G, B, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, B6G5R5, T), +}; #if NOUVEAU_DRIVER == 0xc0 # define NVXX_3D_VAF_SIZE(s) NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##s @@ -92,353 +325,138 @@ # define NVXX_3D_VAF_TYPE(t) NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_##t #endif -#define TBLENT_A_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u, br) \ - [PIPE_FORMAT_##pf] = { \ - sf, \ - (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \ - (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \ - (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \ - (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \ - (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ - (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ - (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ - (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ - NV50_TIC_0_FMT_##sz, \ - NVXX_3D_VAF_SIZE(sz) | \ - NVXX_3D_VAF_TYPE(t0) | (br << 31), \ - U_##u \ - } - -#define TBLENT_B_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \ +#define VF_x(pf, type, size, bgra) 0 +#define VF_A(pf, type, size, bgra) \ + NVXX_3D_VAF_SIZE(size) | NVXX_3D_VAF_TYPE(type) | (bgra << 31) +#define VF(c, pf, type, size, bgra) \ [PIPE_FORMAT_##pf] = { \ - sf, \ - (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \ - (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \ - (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \ - (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \ - (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ - (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ - (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ - (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ - NV50_TIC_0_FMT_##sz, 0, U_##u \ + VF_##c(pf, type, size, bgra), \ + PIPE_BIND_VERTEX_BUFFER \ } -#define C4A(p, n, r, g, b, a, t, s, u, br) \ - TBLENT_A_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u, br) -#define C4B(p, n, r, g, b, a, t, s, u) \ - TBLENT_B_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u) - -#define ZXB(p, n, r, g, b, a, t, s, u) \ - TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ - r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) -#define ZSB(p, n, r, g, b, a, t, s, u) \ - TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ - r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) -#define SZB(p, n, r, g, b, a, t, s, u) \ - TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ - r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u) -#define SXB(p, r, s, u) \ - TBLENT_B_(p, NV50_ZETA_FORMAT_NONE, \ - r, r, r, r, UINT, UINT, UINT, UINT, s, u) - -#define F3A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, g, b, ONE_FLOAT, t, s, u, 0) -#define I3A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, g, b, ONE_INT, t, s, u, 0) -#define F3B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, g, b, ONE_FLOAT, t, s, u) -#define I3B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, g, b, ONE_INT, t, s, u) - -#define F2A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, g, ZERO, ONE_FLOAT, t, s, u, 0) -#define I2A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, g, ZERO, ONE_INT, t, s, u, 0) -#define F2B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, g, ZERO, ONE_FLOAT, t, s, u) -#define I2B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, g, ZERO, ONE_INT, t, s, u) - -#define F1A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u, 0) -#define I1A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, ZERO, ZERO, ONE_INT, t, s, u, 0) -#define F1B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u) -#define I1B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, ZERO, ZERO, ONE_INT, t, s, u) - -#define A1B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, ZERO, ZERO, ZERO, a, t, s, u) - #if NOUVEAU_DRIVER == 0xc0 -const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = +const struct nvc0_vertex_format nvc0_vertex_format[PIPE_FORMAT_COUNT] = #else -const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = +const struct nv50_vertex_format nv50_vertex_format[PIPE_FORMAT_COUNT] = #endif { - C4A(B8G8R8A8_UNORM, BGRA8_UNORM, C2, C1, C0, C3, UNORM, 8_8_8_8, TDV, 1), - F3A(B8G8R8X8_UNORM, BGRX8_UNORM, C2, C1, C0, xx, UNORM, 8_8_8_8, TD), - C4A(B8G8R8A8_SRGB, BGRA8_SRGB, C2, C1, C0, C3, UNORM, 8_8_8_8, TD, 1), - F3A(B8G8R8X8_SRGB, BGRX8_SRGB, C2, C1, C0, xx, UNORM, 8_8_8_8, TD), - C4A(R8G8B8A8_UNORM, RGBA8_UNORM, C0, C1, C2, C3, UNORM, 8_8_8_8, IBV, 0), - F3A(R8G8B8X8_UNORM, RGBX8_UNORM, C0, C1, C2, xx, UNORM, 8_8_8_8, TB), - C4A(R8G8B8A8_SRGB, RGBA8_SRGB, C0, C1, C2, C3, UNORM, 8_8_8_8, TB, 0), - F3B(R8G8B8X8_SRGB, RGBX8_SRGB, C0, C1, C2, xx, UNORM, 8_8_8_8, TB), - - ZXB(Z16_UNORM, Z16_UNORM, C0, C0, C0, xx, UNORM, Z16, TZ), - ZXB(Z32_FLOAT, Z32_FLOAT, C0, C0, C0, xx, FLOAT, Z32, TZ), - ZXB(Z24X8_UNORM, Z24_X8_UNORM, C0, C0, C0, xx, UNORM, Z24_X8, TZ), - SZB(X8Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ), - ZSB(Z24_UNORM_S8_UINT, Z24_S8_UNORM, C0, C0, C0, xx, UNORM, Z24_S8, TZ), - SZB(S8_UINT_Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ), - ZSB(Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, C0, C0, C0, xx, FLOAT, - Z32_S8_X24, TZ), - - SXB(S8_UINT, C0, 8, T), - SXB(X24S8_UINT, C1, Z24_S8, T), - SXB(S8X24_UINT, C0, S8_Z24, T), - SXB(X32_S8X24_UINT, C1, Z32_S8_X24, T), - - F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD), - C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD), - F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD), - C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T), - F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T), - F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T), - - C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2, - IBV, 0), - C4A(B10G10R10A2_UNORM, BGR10_A2_UNORM, C2, C1, C0, C3, UNORM, 10_10_10_2, - TDV, 1), - C4A(R10G10B10A2_SNORM, NONE, C0, C1, C2, C3, SNORM, 10_10_10_2, TV, 0), - C4A(B10G10R10A2_SNORM, NONE, C2, C1, C0, C3, SNORM, 10_10_10_2, TV, 1), - C4A(R10G10B10A2_UINT, RGB10_A2_UINT, C0, C1, C2, C3, UINT, 10_10_10_2, TRV, 0), - C4A(B10G10R10A2_UINT, RGB10_A2_UINT, C2, C1, C0, C3, UINT, 10_10_10_2, TV, 0), - - F3A(R11G11B10_FLOAT, R11G11B10_FLOAT, C0, C1, C2, xx, FLOAT, 11_11_10, IBV), - - F3B(L8_UNORM, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB), - F3B(L8_SRGB, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB), - F3B(L8_SNORM, R8_SNORM, C0, C0, C0, xx, SNORM, 8, TC), - I3B(L8_SINT, R8_SINT, C0, C0, C0, xx, SINT, 8, TR), - I3B(L8_UINT, R8_UINT, C0, C0, C0, xx, UINT, 8, TR), - F3B(L16_UNORM, R16_UNORM, C0, C0, C0, xx, UNORM, 16, TC), - F3B(L16_SNORM, R16_SNORM, C0, C0, C0, xx, SNORM, 16, TC), - F3B(L16_FLOAT, R16_FLOAT, C0, C0, C0, xx, FLOAT, 16, TB), - I3B(L16_SINT, R16_SINT, C0, C0, C0, xx, SINT, 16, TR), - I3B(L16_UINT, R16_UINT, C0, C0, C0, xx, UINT, 16, TR), - F3B(L32_FLOAT, R32_FLOAT, C0, C0, C0, xx, FLOAT, 32, TB), - I3B(L32_SINT, R32_SINT, C0, C0, C0, xx, SINT, 32, TR), - I3B(L32_UINT, R32_UINT, C0, C0, C0, xx, UINT, 32, TR), - - C4B(I8_UNORM, R8_UNORM, C0, C0, C0, C0, UNORM, 8, TR), - C4B(I8_SNORM, R8_SNORM, C0, C0, C0, C0, SNORM, 8, TR), - C4B(I8_SINT, R8_SINT, C0, C0, C0, C0, SINT, 8, TR), - C4B(I8_UINT, R8_UINT, C0, C0, C0, C0, UINT, 8, TR), - C4B(I16_UNORM, R16_UNORM, C0, C0, C0, C0, UNORM, 16, TR), - C4B(I16_SNORM, R16_SNORM, C0, C0, C0, C0, SNORM, 16, TR), - C4B(I16_FLOAT, R16_FLOAT, C0, C0, C0, C0, FLOAT, 16, TR), - C4B(I16_SINT, R16_SINT, C0, C0, C0, C0, SINT, 16, TR), - C4B(I16_UINT, R16_UINT, C0, C0, C0, C0, UINT, 16, TR), - C4B(I32_FLOAT, R32_FLOAT, C0, C0, C0, C0, FLOAT, 32, TR), - C4B(I32_SINT, R32_SINT, C0, C0, C0, C0, SINT, 32, TR), - C4B(I32_UINT, R32_UINT, C0, C0, C0, C0, UINT, 32, TR), - - A1B(A8_UNORM, A8_UNORM, xx, xx, xx, C0, UNORM, 8, TB), - A1B(A8_SNORM, R8_SNORM, xx, xx, xx, C0, SNORM, 8, T), - A1B(A8_SINT, R8_SINT, xx, xx, xx, C0, SINT, 8, T), - A1B(A8_UINT, R8_UINT, xx, xx, xx, C0, UINT, 8, T), - A1B(A16_UNORM, R16_UNORM, xx, xx, xx, C0, UNORM, 16, T), - A1B(A16_SNORM, R16_SNORM, xx, xx, xx, C0, SNORM, 16, T), - A1B(A16_FLOAT, R16_FLOAT, xx, xx, xx, C0, FLOAT, 16, T), - A1B(A16_SINT, R16_SINT, xx, xx, xx, C0, SINT, 16, T), - A1B(A16_UINT, R16_UINT, xx, xx, xx, C0, UINT, 16, T), - A1B(A32_FLOAT, R32_FLOAT, xx, xx, xx, C0, FLOAT, 32, T), - A1B(A32_SINT, R32_SINT, xx, xx, xx, C0, SINT, 32, T), - A1B(A32_UINT, R32_UINT, xx, xx, xx, C0, UINT, 32, T), - - C4B(L4A4_UNORM, NONE, C0, C0, C0, C1, UNORM, 4_4, T), - C4B(L8A8_UNORM, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T), - C4B(L8A8_SNORM, RG8_SNORM, C0, C0, C0, C1, SNORM, 8_8, T), - C4B(L8A8_SRGB, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T), - C4B(L8A8_SINT, RG8_SINT, C0, C0, C0, C1, SINT, 8_8, T), - C4B(L8A8_UINT, RG8_UINT, C0, C0, C0, C1, UINT, 8_8, T), - C4B(L16A16_UNORM, RG16_UNORM, C0, C0, C0, C1, UNORM, 16_16, T), - C4B(L16A16_SNORM, RG16_SNORM, C0, C0, C0, C1, SNORM, 16_16, T), - C4B(L16A16_FLOAT, RG16_FLOAT, C0, C0, C0, C1, FLOAT, 16_16, T), - C4B(L16A16_SINT, RG16_SINT, C0, C0, C0, C1, SINT, 16_16, T), - C4B(L16A16_UINT, RG16_UINT, C0, C0, C0, C1, UINT, 16_16, T), - C4B(L32A32_FLOAT, RG32_FLOAT, C0, C0, C0, C1, FLOAT, 32_32, T), - C4B(L32A32_SINT, RG32_SINT, C0, C0, C0, C1, SINT, 32_32, T), - C4B(L32A32_UINT, RG32_UINT, C0, C0, C0, C1, UINT, 32_32, T), - - F3B(DXT1_RGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T), - F3B(DXT1_SRGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T), - C4B(DXT1_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T), - C4B(DXT1_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T), - C4B(DXT3_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T), - C4B(DXT3_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T), - C4B(DXT5_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T), - C4B(DXT5_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T), - - F1B(RGTC1_UNORM, NONE, C0, xx, xx, xx, UNORM, RGTC1, T), - F1B(RGTC1_SNORM, NONE, C0, xx, xx, xx, SNORM, RGTC1, T), - F2B(RGTC2_UNORM, NONE, C0, C1, xx, xx, UNORM, RGTC2, T), - F2B(RGTC2_SNORM, NONE, C0, C1, xx, xx, SNORM, RGTC2, T), - F3B(LATC1_UNORM, NONE, C0, C0, C0, xx, UNORM, RGTC1, T), - F3B(LATC1_SNORM, NONE, C0, C0, C0, xx, SNORM, RGTC1, T), - C4B(LATC2_UNORM, NONE, C0, C0, C0, C1, UNORM, RGTC2, T), - C4B(LATC2_SNORM, NONE, C0, C0, C0, C1, SNORM, RGTC2, T), - - C4B(BPTC_RGBA_UNORM, NONE, C0, C1, C2, C3, UNORM, BPTC, t), - C4B(BPTC_SRGBA, NONE, C0, C1, C2, C3, UNORM, BPTC, t), - F3B(BPTC_RGB_FLOAT, NONE, C0, C1, C2, xx, FLOAT, BPTC_FLOAT, t), - F3B(BPTC_RGB_UFLOAT, NONE, C0, C1, C2, xx, FLOAT, BPTC_UFLOAT, t), - - C4A(R32G32B32A32_FLOAT, RGBA32_FLOAT, C0, C1, C2, C3, FLOAT, 32_32_32_32, - IBV, 0), - C4A(R32G32B32A32_UNORM, NONE, C0, C1, C2, C3, UNORM, 32_32_32_32, TV, 0), - C4A(R32G32B32A32_SNORM, NONE, C0, C1, C2, C3, SNORM, 32_32_32_32, TV, 0), - C4A(R32G32B32A32_SINT, RGBA32_SINT, C0, C1, C2, C3, SINT, 32_32_32_32, - IRV, 0), - C4A(R32G32B32A32_UINT, RGBA32_UINT, C0, C1, C2, C3, UINT, 32_32_32_32, - IRV, 0), - F3B(R32G32B32X32_FLOAT, RGBX32_FLOAT, C0, C1, C2, xx, FLOAT, 32_32_32_32, TB), - I3B(R32G32B32X32_SINT, RGBX32_SINT, C0, C1, C2, xx, SINT, 32_32_32_32, TR), - I3B(R32G32B32X32_UINT, RGBX32_UINT, C0, C1, C2, xx, UINT, 32_32_32_32, TR), - - F2A(R32G32_FLOAT, RG32_FLOAT, C0, C1, xx, xx, FLOAT, 32_32, IBV), - F2A(R32G32_UNORM, NONE, C0, C1, xx, xx, UNORM, 32_32, TV), - F2A(R32G32_SNORM, NONE, C0, C1, xx, xx, SNORM, 32_32, TV), - I2A(R32G32_SINT, RG32_SINT, C0, C1, xx, xx, SINT, 32_32, IRV), - I2A(R32G32_UINT, RG32_UINT, C0, C1, xx, xx, UINT, 32_32, IRV), - - F1A(R32_FLOAT, R32_FLOAT, C0, xx, xx, xx, FLOAT, 32, IBV), - F1A(R32_UNORM, NONE, C0, xx, xx, xx, UNORM, 32, TV), - F1A(R32_SNORM, NONE, C0, xx, xx, xx, SNORM, 32, TV), - I1A(R32_SINT, R32_SINT, C0, xx, xx, xx, SINT, 32, IRV), - I1A(R32_UINT, R32_UINT, C0, xx, xx, xx, UINT, 32, IRV), - - C4A(R16G16B16A16_FLOAT, RGBA16_FLOAT, C0, C1, C2, C3, FLOAT, 16_16_16_16, - IBV, 0), - C4A(R16G16B16A16_UNORM, RGBA16_UNORM, C0, C1, C2, C3, UNORM, 16_16_16_16, - ICV, 0), - C4A(R16G16B16A16_SNORM, RGBA16_SNORM, C0, C1, C2, C3, SNORM, 16_16_16_16, - ICV, 0), - C4A(R16G16B16A16_SINT, RGBA16_SINT, C0, C1, C2, C3, SINT, 16_16_16_16, - IRV, 0), - C4A(R16G16B16A16_UINT, RGBA16_UINT, C0, C1, C2, C3, UINT, 16_16_16_16, - IRV, 0), - F3B(R16G16B16X16_FLOAT, RGBX16_FLOAT, C0, C1, C2, xx, FLOAT, 16_16_16_16, TB), - F3B(R16G16B16X16_UNORM, RGBA16_UNORM, C0, C1, C2, xx, UNORM, 16_16_16_16, T), - F3B(R16G16B16X16_SNORM, RGBA16_SNORM, C0, C1, C2, xx, SNORM, 16_16_16_16, T), - I3B(R16G16B16X16_SINT, RGBA16_SINT, C0, C1, C2, xx, SINT, 16_16_16_16, T), - I3B(R16G16B16X16_UINT, RGBA16_UINT, C0, C1, C2, xx, UINT, 16_16_16_16, T), - - F2A(R16G16_FLOAT, RG16_FLOAT, C0, C1, xx, xx, FLOAT, 16_16, IBV), - F2A(R16G16_UNORM, RG16_UNORM, C0, C1, xx, xx, UNORM, 16_16, ICV), - F2A(R16G16_SNORM, RG16_SNORM, C0, C1, xx, xx, SNORM, 16_16, ICV), - I2A(R16G16_SINT, RG16_SINT, C0, C1, xx, xx, SINT, 16_16, IRV), - I2A(R16G16_UINT, RG16_UINT, C0, C1, xx, xx, UINT, 16_16, IRV), - - F1A(R16_FLOAT, R16_FLOAT, C0, xx, xx, xx, FLOAT, 16, IBV), - F1A(R16_UNORM, R16_UNORM, C0, xx, xx, xx, UNORM, 16, ICV), - F1A(R16_SNORM, R16_SNORM, C0, xx, xx, xx, SNORM, 16, ICV), - I1A(R16_SINT, R16_SINT, C0, xx, xx, xx, SINT, 16, IRV), - I1A(R16_UINT, R16_UINT, C0, xx, xx, xx, UINT, 16, IRV), - - C4A(R8G8B8A8_SNORM, RGBA8_SNORM, C0, C1, C2, C3, SNORM, 8_8_8_8, ICV, 0), - C4A(R8G8B8A8_SINT, RGBA8_SINT, C0, C1, C2, C3, SINT, 8_8_8_8, IRV, 0), - C4A(R8G8B8A8_UINT, RGBA8_UINT, C0, C1, C2, C3, UINT, 8_8_8_8, IRV, 0), - F3B(R8G8B8X8_SNORM, RGBA8_SNORM, C0, C1, C2, xx, SNORM, 8_8_8_8, T), - I3B(R8G8B8X8_SINT, RGBA8_SINT, C0, C1, C2, xx, SINT, 8_8_8_8, T), - I3B(R8G8B8X8_UINT, RGBA8_UINT, C0, C1, C2, xx, UINT, 8_8_8_8, T), - - F2A(R8G8_UNORM, RG8_UNORM, C0, C1, xx, xx, UNORM, 8_8, IBV), - F2A(R8G8_SNORM, RG8_SNORM, C0, C1, xx, xx, SNORM, 8_8, ICV), - I2A(R8G8_SINT, RG8_SINT, C0, C1, xx, xx, SINT, 8_8, IRV), - I2A(R8G8_UINT, RG8_UINT, C0, C1, xx, xx, UINT, 8_8, IRV), - - F1A(R8_UNORM, R8_UNORM, C0, xx, xx, xx, UNORM, 8, IBV), - F1A(R8_SNORM, R8_SNORM, C0, xx, xx, xx, SNORM, 8, ICV), - I1A(R8_SINT, R8_SINT, C0, xx, xx, xx, SINT, 8, IRV), - I1A(R8_UINT, R8_UINT, C0, xx, xx, xx, UINT, 8, IRV), - - F3B(R8G8_B8G8_UNORM, NONE, C0, C1, C2, xx, UNORM, U8_YA8_V8_YB8, T), - F3B(G8R8_B8R8_UNORM, NONE, C1, C0, C2, xx, UNORM, U8_YA8_V8_YB8, T), - F3B(G8R8_G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, YA8_U8_YB8_V8, T), - F3B(R8G8_R8B8_UNORM, NONE, C1, C0, C2, xx, UNORM, YA8_U8_YB8_V8, T), - - F1B(R1_UNORM, BITMAP, C0, xx, xx, xx, UNORM, BITMAP, T), - - C4B(R4A4_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 4_4, T), - C4B(R8A8_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 8_8, T), - C4B(A4R4_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 4_4, T), - C4B(A8R8_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 8_8, T), - - TBLENT_B_(R8SG8SB8UX8U_NORM, 0, - C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, T), - TBLENT_B_(R5SG5SB6U_NORM, 0, - C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 5_5_6, T), - - /* vertex-only formats: */ - - C4A(R32G32B32A32_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 32_32_32_32, V, 0), - C4A(R32G32B32A32_USCALED, NONE, C0, C1, C2, C3, USCALED, 32_32_32_32, V, 0), - F3A(R32G32B32_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, tV), - F3A(R32G32B32_UNORM, NONE, C0, C1, C2, xx, UNORM, 32_32_32, V), - F3A(R32G32B32_SNORM, NONE, C0, C1, C2, xx, SNORM, 32_32_32, V), - I3A(R32G32B32_SINT, NONE, C0, C1, C2, xx, SINT, 32_32_32, tV), - I3A(R32G32B32_UINT, NONE, C0, C1, C2, xx, UINT, 32_32_32, tV), - F3A(R32G32B32_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 32_32_32, V), - F3A(R32G32B32_USCALED, NONE, C0, C1, C2, xx, USCALED, 32_32_32, V), - F2A(R32G32_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 32_32, V), - F2A(R32G32_USCALED, NONE, C0, C1, xx, xx, USCALED, 32_32, V), - F1A(R32_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 32, V), - F1A(R32_USCALED, NONE, C0, xx, xx, xx, USCALED, 32, V), - - C4A(R16G16B16A16_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 16_16_16_16, V, 0), - C4A(R16G16B16A16_USCALED, NONE, C0, C1, C2, C3, USCALED, 16_16_16_16, V, 0), - F3A(R16G16B16_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 16_16_16, V), - F3A(R16G16B16_UNORM, NONE, C0, C1, C2, xx, UNORM, 16_16_16, V), - F3A(R16G16B16_SNORM, NONE, C0, C1, C2, xx, SNORM, 16_16_16, V), - I3A(R16G16B16_SINT, NONE, C0, C1, C2, xx, SINT, 16_16_16, V), - I3A(R16G16B16_UINT, NONE, C0, C1, C2, xx, UINT, 16_16_16, V), - F3A(R16G16B16_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 16_16_16, V), - F3A(R16G16B16_USCALED, NONE, C0, C1, C2, xx, USCALED, 16_16_16, V), - F2A(R16G16_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 16_16, V), - F2A(R16G16_USCALED, NONE, C0, C1, xx, xx, USCALED, 16_16, V), - F1A(R16_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 16, V), - F1A(R16_USCALED, NONE, C0, xx, xx, xx, USCALED, 16, V), - - C4A(R10G10B10A2_USCALED, NONE, C0, C1, C2, C3, USCALED, 10_10_10_2, V, 0), - C4A(R10G10B10A2_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 10_10_10_2, V, 0), - C4A(B10G10R10A2_USCALED, NONE, C0, C1, C2, C3, USCALED, 10_10_10_2, V, 1), - C4A(B10G10R10A2_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 10_10_10_2, V, 1), - - C4A(R8G8B8A8_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 8_8_8_8, V, 0), - C4A(R8G8B8A8_USCALED, NONE, C0, C1, C2, C3, USCALED, 8_8_8_8, V, 0), - F3A(R8G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, 8_8_8, V), - F3A(R8G8B8_SNORM, NONE, C0, C1, C2, xx, SNORM, 8_8_8, V), - I2A(R8G8B8_SINT, NONE, C0, C1, C2, xx, SINT, 8_8_8, V), - I2A(R8G8B8_UINT, NONE, C0, C1, C2, xx, UINT, 8_8_8, V), - F3A(R8G8B8_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 8_8_8, V), - F3A(R8G8B8_USCALED, NONE, C0, C1, C2, xx, USCALED, 8_8_8, V), - F2A(R8G8_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 8_8, V), - F2A(R8G8_USCALED, NONE, C0, C1, xx, xx, USCALED, 8_8, V), - F1A(R8_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 8, V), - F1A(R8_USCALED, NONE, C0, xx, xx, xx, USCALED, 8, V), + VF(A, B8G8R8A8_UNORM, UNORM, 8_8_8_8, 1), + VF(A, R8G8B8A8_UNORM, UNORM, 8_8_8_8, 0), + + VF(A, R10G10B10A2_UNORM, UNORM, 10_10_10_2, 0), + VF(A, B10G10R10A2_UNORM, UNORM, 10_10_10_2, 1), + VF(A, R10G10B10A2_SNORM, SNORM, 10_10_10_2, 0), + VF(A, B10G10R10A2_SNORM, SNORM, 10_10_10_2, 1), + VF(A, R10G10B10A2_UINT, UINT, 10_10_10_2, 0), + VF(A, B10G10R10A2_UINT, UINT, 10_10_10_2, 0), + + VF(A, R11G11B10_FLOAT, FLOAT, 11_11_10, 0), + + VF(A, R32G32B32A32_FLOAT, FLOAT, 32_32_32_32, 0), + VF(A, R32G32B32A32_UNORM, UNORM, 32_32_32_32, 0), + VF(A, R32G32B32A32_SNORM, SNORM, 32_32_32_32, 0), + VF(A, R32G32B32A32_SINT, SINT, 32_32_32_32, 0), + VF(A, R32G32B32A32_UINT, UINT, 32_32_32_32, 0), + + VF(A, R32G32_FLOAT, FLOAT, 32_32, 0), + VF(A, R32G32_UNORM, UNORM, 32_32, 0), + VF(A, R32G32_SNORM, SNORM, 32_32, 0), + VF(A, R32G32_SINT, SINT, 32_32, 0), + VF(A, R32G32_UINT, UINT, 32_32, 0), + + VF(A, R32_FLOAT, FLOAT, 32, 0), + VF(A, R32_UNORM, UNORM, 32, 0), + VF(A, R32_SNORM, SNORM, 32, 0), + VF(A, R32_SINT, SINT, 32, 0), + VF(A, R32_UINT, UINT, 32, 0), + + VF(A, R16G16B16A16_FLOAT, FLOAT, 16_16_16_16, 0), + VF(A, R16G16B16A16_UNORM, UNORM, 16_16_16_16, 0), + VF(A, R16G16B16A16_SNORM, SNORM, 16_16_16_16, 0), + VF(A, R16G16B16A16_SINT, SINT, 16_16_16_16, 0), + VF(A, R16G16B16A16_UINT, UINT, 16_16_16_16, 0), + + VF(A, R16G16_FLOAT, FLOAT, 16_16, 0), + VF(A, R16G16_UNORM, UNORM, 16_16, 0), + VF(A, R16G16_SNORM, SNORM, 16_16, 0), + VF(A, R16G16_SINT, SINT, 16_16, 0), + VF(A, R16G16_UINT, UINT, 16_16, 0), + + VF(A, R16_FLOAT, FLOAT, 16, 0), + VF(A, R16_UNORM, UNORM, 16, 0), + VF(A, R16_SNORM, SNORM, 16, 0), + VF(A, R16_SINT, SINT, 16, 0), + VF(A, R16_UINT, UINT, 16, 0), + + VF(A, R8G8B8A8_SNORM, SNORM, 8_8_8_8, 0), + VF(A, R8G8B8A8_SINT, SINT, 8_8_8_8, 0), + VF(A, R8G8B8A8_UINT, UINT, 8_8_8_8, 0), + + VF(A, R8G8_UNORM, UNORM, 8_8, 0), + VF(A, R8G8_SNORM, SNORM, 8_8, 0), + VF(A, R8G8_SINT, SINT, 8_8, 0), + VF(A, R8G8_UINT, UINT, 8_8, 0), + + VF(A, R8_UNORM, UNORM, 8, 0), + VF(A, R8_SNORM, SNORM, 8, 0), + VF(A, R8_SINT, SINT, 8, 0), + VF(A, R8_UINT, UINT, 8, 0), + + VF(A, R32G32B32A32_SSCALED, SSCALED, 32_32_32_32, 0), + VF(A, R32G32B32A32_USCALED, USCALED, 32_32_32_32, 0), + VF(A, R32G32B32_FLOAT, FLOAT, 32_32_32, 0), + VF(A, R32G32B32_UNORM, UNORM, 32_32_32, 0), + VF(A, R32G32B32_SNORM, SNORM, 32_32_32, 0), + VF(A, R32G32B32_SINT, SINT, 32_32_32, 0), + VF(A, R32G32B32_UINT, UINT, 32_32_32, 0), + VF(A, R32G32B32_SSCALED, SSCALED, 32_32_32, 0), + VF(A, R32G32B32_USCALED, USCALED, 32_32_32, 0), + VF(A, R32G32_SSCALED, SSCALED, 32_32, 0), + VF(A, R32G32_USCALED, USCALED, 32_32, 0), + VF(A, R32_SSCALED, SSCALED, 32, 0), + VF(A, R32_USCALED, USCALED, 32, 0), + + VF(A, R16G16B16A16_SSCALED, SSCALED, 16_16_16_16, 0), + VF(A, R16G16B16A16_USCALED, USCALED, 16_16_16_16, 0), + VF(A, R16G16B16_FLOAT, FLOAT, 16_16_16, 0), + VF(A, R16G16B16_UNORM, UNORM, 16_16_16, 0), + VF(A, R16G16B16_SNORM, SNORM, 16_16_16, 0), + VF(A, R16G16B16_SINT, SINT, 16_16_16, 0), + VF(A, R16G16B16_UINT, UINT, 16_16_16, 0), + VF(A, R16G16B16_SSCALED, SSCALED, 16_16_16, 0), + VF(A, R16G16B16_USCALED, USCALED, 16_16_16, 0), + VF(A, R16G16_SSCALED, SSCALED, 16_16, 0), + VF(A, R16G16_USCALED, USCALED, 16_16, 0), + VF(A, R16_SSCALED, SSCALED, 16, 0), + VF(A, R16_USCALED, USCALED, 16, 0), + + VF(A, R10G10B10A2_USCALED, USCALED, 10_10_10_2, 0), + VF(A, R10G10B10A2_SSCALED, SSCALED, 10_10_10_2, 0), + VF(A, B10G10R10A2_USCALED, USCALED, 10_10_10_2, 1), + VF(A, B10G10R10A2_SSCALED, SSCALED, 10_10_10_2, 1), + + VF(A, R8G8B8A8_SSCALED, SSCALED, 8_8_8_8, 0), + VF(A, R8G8B8A8_USCALED, USCALED, 8_8_8_8, 0), + VF(A, R8G8B8_UNORM, UNORM, 8_8_8, 0), + VF(A, R8G8B8_SNORM, SNORM, 8_8_8, 0), + VF(A, R8G8B8_SINT, SINT, 8_8_8, 0), + VF(A, R8G8B8_UINT, UINT, 8_8_8, 0), + VF(A, R8G8B8_SSCALED, SSCALED, 8_8_8, 0), + VF(A, R8G8B8_USCALED, USCALED, 8_8_8, 0), + VF(A, R8G8_SSCALED, SSCALED, 8_8, 0), + VF(A, R8G8_USCALED, USCALED, 8_8, 0), + VF(A, R8_SSCALED, SSCALED, 8, 0), + VF(A, R8_USCALED, USCALED, 8, 0), /* FIXED types: not supported natively, converted on VBO push */ - C4B(R32G32B32A32_FIXED, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V), - F3B(R32G32B32_FIXED, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V), - F2B(R32G32_FIXED, NONE, C0, C1, xx, xx, FLOAT, 32_32, V), - F1B(R32_FIXED, NONE, C0, xx, xx, xx, FLOAT, 32, V), + VF(x, R32G32B32A32_FIXED, xx, xx, xx), + VF(x, R32G32B32_FIXED, xx, xx, xx), + VF(x, R32G32_FIXED, xx, xx, xx), + VF(x, R32_FIXED, xx, xx, xx), - C4B(R64G64B64A64_FLOAT, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V), - F3B(R64G64B64_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V), - F2B(R64G64_FLOAT, NONE, C0, C1, xx, xx, FLOAT, 32_32, V), - F1B(R64_FLOAT, NONE, C0, xx, xx, xx, FLOAT, 32, V), + VF(x, R64G64B64A64_FLOAT, xx, xx, xx), + VF(x, R64G64B64_FLOAT, xx, xx, xx), + VF(x, R64G64_FLOAT, xx, xx, xx), + VF(x, R64_FLOAT, xx, xx, xx), }; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c index 79c7023b2d4..be19c0fdc85 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c @@ -218,11 +218,12 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) struct pipe_context *pipe = &nv50->base.pipe; struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + struct pipe_grid_info info = {}; uint32_t mask; uint32_t input[3]; const uint block[3] = { 32, 1, 1 }; const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 }; - int c; + int c, i; if (unlikely(!screen->pm.prog)) { struct nv50_program *prog = CALLOC_STRUCT(nv50_program); @@ -262,7 +263,14 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) pipe->bind_compute_state(pipe, screen->pm.prog); input[0] = hq->bo->offset + hq->base_offset; input[1] = hq->sequence; - pipe->launch_grid(pipe, block, grid, 0, input); + + for (i = 0; i < 3; i++) { + info.block[i] = block[i]; + info.grid[i] = grid[i]; + } + info.pc = 0; + info.input = input; + pipe->launch_grid(pipe, &info); nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 14d0085975b..8d11dd7bf21 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -72,7 +72,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_SHARED); - return (nv50_format_table[format].usage & bindings) == bindings; + return (( nv50_format_table[format].usage | + nv50_vertex_format[format].usage) & bindings) == bindings; } static int @@ -263,8 +264,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_COMPUTE: break; + case PIPE_SHADER_COMPUTE: default: return 0; } @@ -315,6 +316,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_SUPPORTED_IRS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -562,7 +565,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) if (screen->tesla->oclass >= NVA0_3D_CLASS) { BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1); - PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); + PUSH_DATA (push, 0); } BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 2a4983d1020..cce92f0dd5e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -50,6 +50,7 @@ struct nv50_graph_state { uint8_t num_samplers[3]; uint8_t prim_size; uint16_t scissor; + bool seamless_cube_map; }; struct nv50_screen { @@ -156,12 +157,27 @@ nv50_resource_validate(struct nv04_resource *res, uint32_t flags) struct nv50_format { uint32_t rt; - uint32_t tic; + struct { + unsigned format:6; + unsigned type_r:3; + unsigned type_g:3; + unsigned type_b:3; + unsigned type_a:3; + unsigned src_x:3; + unsigned src_y:3; + unsigned src_z:3; + unsigned src_w:3; + } tic; + uint32_t usage; +}; + +struct nv50_vertex_format { uint32_t vtx; uint32_t usage; }; extern const struct nv50_format nv50_format_table[]; +extern const struct nv50_vertex_format nv50_vertex_format[]; static inline void nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index cb040439139..6a09808807a 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -21,6 +21,7 @@ */ #include "pipe/p_defines.h" +#include "util/u_framebuffer.h" #include "util/u_helpers.h" #include "util/u_inlines.h" #include "util/u_transfer.h" @@ -33,7 +34,7 @@ #include "nv50/nv50_query_hw.h" #include "nv50/nv50_3d.xml.h" -#include "nv50/nv50_texture.xml.h" +#include "nv50/g80_texture.xml.h" #include "nouveau_gldefs.h" @@ -437,24 +438,29 @@ nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso) /* ====================== SAMPLERS AND TEXTURES ================================ */ -#define NV50_TSC_WRAP_CASE(n) \ - case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n - static inline unsigned nv50_tsc_wrap_mode(unsigned wrap) { switch (wrap) { - NV50_TSC_WRAP_CASE(REPEAT); - NV50_TSC_WRAP_CASE(MIRROR_REPEAT); - NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); - NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); - NV50_TSC_WRAP_CASE(CLAMP); - NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); - NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); - NV50_TSC_WRAP_CASE(MIRROR_CLAMP); + case PIPE_TEX_WRAP_REPEAT: + return G80_TSC_WRAP_WRAP; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return G80_TSC_WRAP_MIRROR; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return G80_TSC_WRAP_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return G80_TSC_WRAP_BORDER; + case PIPE_TEX_WRAP_CLAMP: + return G80_TSC_WRAP_CLAMP_OGL; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return G80_TSC_WRAP_MIRROR_ONCE_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return G80_TSC_WRAP_MIRROR_ONCE_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return G80_TSC_WRAP_MIRROR_ONCE_CLAMP_OGL; default: NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - return NV50_TSC_WRAP_REPEAT; + return G80_TSC_WRAP_WRAP; } } @@ -474,42 +480,44 @@ nv50_sampler_state_create(struct pipe_context *pipe, switch (cso->mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: - so->tsc[1] = NV50_TSC_1_MAGF_LINEAR; + so->tsc[1] = G80_TSC_1_MAG_FILTER_LINEAR; break; case PIPE_TEX_FILTER_NEAREST: default: - so->tsc[1] = NV50_TSC_1_MAGF_NEAREST; + so->tsc[1] = G80_TSC_1_MAG_FILTER_NEAREST; break; } switch (cso->min_img_filter) { case PIPE_TEX_FILTER_LINEAR: - so->tsc[1] |= NV50_TSC_1_MINF_LINEAR; + so->tsc[1] |= G80_TSC_1_MIN_FILTER_LINEAR; break; case PIPE_TEX_FILTER_NEAREST: default: - so->tsc[1] |= NV50_TSC_1_MINF_NEAREST; + so->tsc[1] |= G80_TSC_1_MIN_FILTER_NEAREST; break; } switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_LINEAR: - so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR; + so->tsc[1] |= G80_TSC_1_MIP_FILTER_LINEAR; break; case PIPE_TEX_MIPFILTER_NEAREST: - so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST; + so->tsc[1] |= G80_TSC_1_MIP_FILTER_NEAREST; break; case PIPE_TEX_MIPFILTER_NONE: default: - so->tsc[1] |= NV50_TSC_1_MIPF_NONE; + so->tsc[1] |= G80_TSC_1_MIP_FILTER_NONE; break; } if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) { if (cso->seamless_cube_map) - so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS; + so->tsc[1] |= GK104_TSC_1_CUBEMAP_INTERFACE_FILTERING; if (!cso->normalized_coords) - so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS; + so->tsc[1] |= GK104_TSC_1_FLOAT_COORD_NORMALIZATION_FORCE_UNNORMALIZED_COORDS; + } else { + so->seamless_cube_map = cso->seamless_cube_map; } if (cso->max_anisotropy >= 16) @@ -521,10 +529,10 @@ nv50_sampler_state_create(struct pipe_context *pipe, so->tsc[0] |= (cso->max_anisotropy >> 1) << 20; if (cso->max_anisotropy >= 4) - so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35; + so->tsc[1] |= 6 << G80_TSC_1_TRILIN_OPT__SHIFT; else if (cso->max_anisotropy >= 2) - so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15; + so->tsc[1] |= 4 << G80_TSC_1_TRILIN_OPT__SHIFT; } if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { @@ -936,21 +944,10 @@ nv50_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct nv50_context *nv50 = nv50_context(pipe); - unsigned i; nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); - for (i = 0; i < fb->nr_cbufs; ++i) - pipe_surface_reference(&nv50->framebuffer.cbufs[i], fb->cbufs[i]); - for (; i < nv50->framebuffer.nr_cbufs; ++i) - pipe_surface_reference(&nv50->framebuffer.cbufs[i], NULL); - - nv50->framebuffer.nr_cbufs = fb->nr_cbufs; - - nv50->framebuffer.width = fb->width; - nv50->framebuffer.height = fb->height; - - pipe_surface_reference(&nv50->framebuffer.zsbuf, fb->zsbuf); + util_copy_framebuffer_state(&nv50->framebuffer, fb); nv50->dirty |= NV50_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 4af969997f2..55369781606 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -2,7 +2,6 @@ #include "util/u_format.h" #include "nv50/nv50_context.h" -#include "nv50/nv50_defs.xml.h" static inline void nv50_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h index e0793bb6ec4..6bc451450b1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h @@ -7,6 +7,7 @@ struct nv50_tsc_entry { int id; uint32_t tsc[8]; + bool seamless_cube_map; }; static inline struct nv50_tsc_entry * diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index c69fa5abb98..4db73cb7fef 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -37,8 +37,8 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_resource.h" -#include "nv50/nv50_defs.xml.h" -#include "nv50/nv50_texture.xml.h" +#include "nv50/g80_defs.xml.h" +#include "nv50/g80_texture.xml.h" /* these are used in nv50_blit.h */ #define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL @@ -64,15 +64,15 @@ nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal) switch (util_format_get_blocksize(format)) { case 1: - return NV50_SURFACE_FORMAT_R8_UNORM; + return G80_SURFACE_FORMAT_R8_UNORM; case 2: - return NV50_SURFACE_FORMAT_R16_UNORM; + return G80_SURFACE_FORMAT_R16_UNORM; case 4: - return NV50_SURFACE_FORMAT_BGRA8_UNORM; + return G80_SURFACE_FORMAT_BGRA8_UNORM; case 8: - return NV50_SURFACE_FORMAT_RGBA16_FLOAT; + return G80_SURFACE_FORMAT_RGBA16_FLOAT; case 16: - return NV50_SURFACE_FORMAT_RGBA32_FLOAT; + return G80_SURFACE_FORMAT_RGBA32_FLOAT; default: return 0; } @@ -628,7 +628,7 @@ nv50_clear_buffer_push(struct pipe_context *pipe, offset &= ~0xff; BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); PUSH_DATA (push, 262144); @@ -638,7 +638,7 @@ nv50_clear_buffer_push(struct pipe_context *pipe, PUSH_DATA (push, buf->address + offset); BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); PUSH_DATA (push, 0); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); PUSH_DATA (push, size); PUSH_DATA (push, 1); @@ -997,12 +997,14 @@ nv50_blitter_make_sampler(struct nv50_blitter *blit) blit->sampler[0].id = -1; - blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT); + blit->sampler[0].tsc[0] = G80_TSC_0_SRGB_CONVERSION | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_U__SHIFT) | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_V__SHIFT) | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_P__SHIFT); blit->sampler[0].tsc[1] = - NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE; + G80_TSC_1_MAG_FILTER_NEAREST | + G80_TSC_1_MIN_FILTER_NEAREST | + G80_TSC_1_MIP_FILTER_NONE; /* clamp to edge, min/max lod = 0, bilinear filtering */ @@ -1010,7 +1012,9 @@ nv50_blitter_make_sampler(struct nv50_blitter *blit) blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0]; blit->sampler[1].tsc[1] = - NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE; + G80_TSC_1_MAG_FILTER_LINEAR | + G80_TSC_1_MIN_FILTER_LINEAR | + G80_TSC_1_MIP_FILTER_NONE; } unsigned diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c index c3f433608df..4b69c3bd504 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c @@ -22,32 +22,24 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_resource.h" -#include "nv50/nv50_texture.xml.h" -#include "nv50/nv50_defs.xml.h" +#include "nv50/g80_texture.xml.h" +#include "nv50/g80_defs.xml.h" #include "util/u_format.h" -#define NV50_TIC_0_SWIZZLE__MASK \ - (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ - NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) - static inline uint32_t -nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int) +nv50_tic_swizzle(const struct nv50_format *fmt, unsigned swz, bool tex_int) { switch (swz) { - case PIPE_SWIZZLE_RED: - return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; - case PIPE_SWIZZLE_GREEN: - return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; - case PIPE_SWIZZLE_BLUE: - return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; - case PIPE_SWIZZLE_ALPHA: - return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_RED : return fmt->tic.src_x; + case PIPE_SWIZZLE_GREEN: return fmt->tic.src_y; + case PIPE_SWIZZLE_BLUE : return fmt->tic.src_z; + case PIPE_SWIZZLE_ALPHA: return fmt->tic.src_w; case PIPE_SWIZZLE_ONE: - return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT; + return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT; case PIPE_SWIZZLE_ZERO: default: - return NV50_TIC_MAP_ZERO; + return G80_TIC_SOURCE_ZERO; } } @@ -73,6 +65,7 @@ nv50_create_texture_view(struct pipe_context *pipe, { const uint32_t class_3d = nouveau_context(pipe)->screen->class_3d; const struct util_format_description *desc; + const struct nv50_format *fmt; uint64_t addr; uint32_t *tic; uint32_t swz[4]; @@ -100,19 +93,23 @@ nv50_create_texture_view(struct pipe_context *pipe, /* TIC[0] */ - tic[0] = nv50_format_table[view->pipe.format].tic; + fmt = &nv50_format_table[view->pipe.format]; tex_int = util_format_is_pure_integer(view->pipe.format); - swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); - swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); - swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); - swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); - tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | - (swz[0] << NV50_TIC_0_MAPR__SHIFT) | - (swz[1] << NV50_TIC_0_MAPG__SHIFT) | - (swz[2] << NV50_TIC_0_MAPB__SHIFT) | - (swz[3] << NV50_TIC_0_MAPA__SHIFT); + swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int); + tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) | + (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) | + (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) | + (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) | + (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) | + (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) | + (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) | + (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) | + (swz[3] << G80_TIC_0_W_SOURCE__SHIFT); addr = mt->base.address; @@ -124,24 +121,24 @@ nv50_create_texture_view(struct pipe_context *pipe, depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; } - tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER; + tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + tic[2] |= G80_TIC_2_SRGB_CONVERSION; if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) - tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + tic[2] |= G80_TIC_2_NORMALIZED_COORDS; if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { if (target == PIPE_BUFFER) { addr += view->pipe.u.buf.first_element * desc->block.bits / 8; - tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER; + tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER; tic[3] = 0; tic[4] = /* width */ view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1; tic[5] = 0; } else { - tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; + tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP; tic[3] = mt->level[0].pitch; tic[4] = mt->base.base.width0; tic[5] = (1 << 16) | (mt->base.base.height0); @@ -162,34 +159,34 @@ nv50_create_texture_view(struct pipe_context *pipe, switch (target) { case PIPE_TEXTURE_1D: - tic[2] |= NV50_TIC_2_TARGET_1D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D; break; case PIPE_TEXTURE_2D: - tic[2] |= NV50_TIC_2_TARGET_2D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D; break; case PIPE_TEXTURE_RECT: - tic[2] |= NV50_TIC_2_TARGET_RECT; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP; break; case PIPE_TEXTURE_3D: - tic[2] |= NV50_TIC_2_TARGET_3D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D; break; case PIPE_TEXTURE_CUBE: depth /= 6; - tic[2] |= NV50_TIC_2_TARGET_CUBE; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP; break; case PIPE_TEXTURE_1D_ARRAY: - tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY; break; case PIPE_TEXTURE_2D_ARRAY: - tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY; break; case PIPE_TEXTURE_CUBE_ARRAY: depth /= 6; - tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY; break; case PIPE_BUFFER: assert(0); /* should be linear and handled above ! */ - tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER | G80_TIC_2_LAYOUT_PITCH; break; default: unreachable("unexpected/invalid texture target"); @@ -202,9 +199,9 @@ nv50_create_texture_view(struct pipe_context *pipe, tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff; tic[5] |= depth << 16; if (class_3d > NV50_3D_CLASS) - tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT; + tic[5] |= mt->base.base.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT; else - tic[5] |= view->pipe.u.tex.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT; + tic[5] |= view->pipe.u.tex.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT; tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */ @@ -213,9 +210,9 @@ nv50_create_texture_view(struct pipe_context *pipe, else tic[7] = 0; - if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS))) + if (unlikely(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS))) if (mt->base.base.last_level) - tic[5] &= ~NV50_TIC_5_LAST_LEVEL__MASK; + tic[5] &= ~G80_TIC_5_MAP_MIP_LEVEL__MASK; return &view->pipe; } @@ -265,7 +262,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s) tic->id = nv50_screen_tic_alloc(nv50->screen, tic); BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); PUSH_DATA (push, 262144); @@ -275,7 +272,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s) PUSH_DATA (push, txc->offset); BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); PUSH_DATA (push, 0); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); PUSH_DATA (push, 32); PUSH_DATA (push, 1); @@ -364,6 +361,7 @@ nv50_validate_tsc(struct nv50_context *nv50, int s) PUSH_DATA (push, (i << 4) | 0); continue; } + nv50->seamless_cube_map = tsc->seamless_cube_map; if (tsc->id < 0) { tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h deleted file mode 100644 index a2b9921f647..00000000000 --- a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h +++ /dev/null @@ -1,306 +0,0 @@ -#ifndef NV50_TEXTURE_XML -#define NV50_TEXTURE_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/envytools/envytools/ -git clone https://github.com/envytools/envytools.git - -The rules-ng-ng source files this header was generated from are: -- rnndb/graph/g80_texture.xml ( 8881 bytes, from 2014-09-25 06:32:11) -- rnndb/copyright.xml ( 6452 bytes, from 2013-05-14 03:57:49) -- rnndb/nvchipsets.xml ( 2759 bytes, from 2014-10-05 01:51:02) -- rnndb/g80_defs.xml ( 18175 bytes, from 2014-09-25 06:32:11) - -Copyright (C) 2006-2014 by the following authors: -- Artur Huillet <[email protected]> (ahuillet) -- Ben Skeggs (darktama, darktama_) -- B. R. <[email protected]> (koala_br) -- Carlos Martin <[email protected]> (carlosmn) -- Christoph Bumiller <[email protected]> (calim, chrisbmr) -- Dawid Gajownik <[email protected]> (gajownik) -- Dmitry Baryshkov -- Dmitry Eremin-Solenikov <[email protected]> (lumag) -- EdB <[email protected]> (edb_) -- Erik Waling <[email protected]> (erikwaling) -- Francisco Jerez <[email protected]> (curro) -- imirkin <[email protected]> (imirkin) -- jb17bsome <[email protected]> (jb17bsome) -- Jeremy Kolb <[email protected]> (kjeremy) -- Laurent Carlier <[email protected]> (lordheavy) -- Luca Barbieri <[email protected]> (lb, lb1) -- Maarten Maathuis <[email protected]> (stillunknown) -- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin) -- Mark Carey <[email protected]> (careym) -- Matthieu Castet <[email protected]> (mat-c) -- nvidiaman <[email protected]> (nvidiaman) -- Patrice Mandin <[email protected]> (pmandin, pmdata) -- Pekka Paalanen <[email protected]> (pq, ppaalanen) -- Peter Popov <[email protected]> (ironpeter) -- Richard Hughes <[email protected]> (hughsient) -- Rudi Cilibrasi <[email protected]> (cilibrar) -- Serge Martin -- Simon Raffeiner -- Stephane Loeuillet <[email protected]> (leroutier) -- Stephane Marchesin <[email protected]> (marcheu) -- sturmflut <[email protected]> (sturmflut) -- Sylvain Munaut <[email protected]> -- Victor Stinner <[email protected]> (haypo) -- Wladmir van der Laan <[email protected]> (miathan6) -- Younes Manton <[email protected]> (ymanton) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -#define NV50_TIC_MAP_ZERO 0x00000000 -#define NV50_TIC_MAP_C0 0x00000002 -#define NV50_TIC_MAP_C1 0x00000003 -#define NV50_TIC_MAP_C2 0x00000004 -#define NV50_TIC_MAP_C3 0x00000005 -#define NV50_TIC_MAP_ONE_INT 0x00000006 -#define NV50_TIC_MAP_ONE_FLOAT 0x00000007 -#define NV50_TIC_TYPE_SNORM 0x00000001 -#define NV50_TIC_TYPE_UNORM 0x00000002 -#define NV50_TIC_TYPE_SINT 0x00000003 -#define NV50_TIC_TYPE_UINT 0x00000004 -#define NV50_TIC_TYPE_SSCALED 0x00000005 -#define NV50_TIC_TYPE_USCALED 0x00000006 -#define NV50_TIC_TYPE_FLOAT 0x00000007 -#define NV50_TSC_WRAP_REPEAT 0x00000000 -#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001 -#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002 -#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003 -#define NV50_TSC_WRAP_CLAMP 0x00000004 -#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005 -#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006 -#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007 -#define NV50_TIC__SIZE 0x00000020 -#define NV50_TIC_0 0x00000000 -#define NV50_TIC_0_MAPA__MASK 0x38000000 -#define NV50_TIC_0_MAPA__SHIFT 27 -#define NV50_TIC_0_MAPB__MASK 0x07000000 -#define NV50_TIC_0_MAPB__SHIFT 24 -#define NV50_TIC_0_MAPG__MASK 0x00e00000 -#define NV50_TIC_0_MAPG__SHIFT 21 -#define NV50_TIC_0_MAPR__MASK 0x001c0000 -#define NV50_TIC_0_MAPR__SHIFT 18 -#define NV50_TIC_0_TYPE3__MASK 0x00038000 -#define NV50_TIC_0_TYPE3__SHIFT 15 -#define NV50_TIC_0_TYPE2__MASK 0x00007000 -#define NV50_TIC_0_TYPE2__SHIFT 12 -#define NV50_TIC_0_TYPE1__MASK 0x00000e00 -#define NV50_TIC_0_TYPE1__SHIFT 9 -#define NV50_TIC_0_TYPE0__MASK 0x000001c0 -#define NV50_TIC_0_TYPE0__SHIFT 6 -#define NV50_TIC_0_FMT__MASK 0x0000003f -#define NV50_TIC_0_FMT__SHIFT 0 -#define NV50_TIC_0_FMT_32_32_32_32 0x00000001 -#define NVC0_TIC_0_FMT_32_32_32 0x00000002 -#define NV50_TIC_0_FMT_16_16_16_16 0x00000003 -#define NV50_TIC_0_FMT_32_32 0x00000004 -#define NV50_TIC_0_FMT_32_8_X24 0x00000005 -#define NV50_TIC_0_FMT_8_8_8_8 0x00000008 -#define NV50_TIC_0_FMT_10_10_10_2 0x00000009 -#define NV50_TIC_0_FMT_16_16 0x0000000c -#define NV50_TIC_0_FMT_24_8 0x0000000d -#define NV50_TIC_0_FMT_8_24 0x0000000e -#define NV50_TIC_0_FMT_32 0x0000000f -#define NVC0_TIC_0_FMT_BPTC_FLOAT 0x00000010 -#define NVC0_TIC_0_FMT_BPTC_UFLOAT 0x00000011 -#define NV50_TIC_0_FMT_4_4_4_4 0x00000012 -#define NV50_TIC_0_FMT_1_5_5_5 0x00000013 -#define NV50_TIC_0_FMT_5_5_5_1 0x00000014 -#define NV50_TIC_0_FMT_5_6_5 0x00000015 -#define NV50_TIC_0_FMT_5_5_6 0x00000016 -#define NVC0_TIC_0_FMT_BPTC 0x00000017 -#define NV50_TIC_0_FMT_8_8 0x00000018 -#define NV50_TIC_0_FMT_16 0x0000001b -#define NV50_TIC_0_FMT_8 0x0000001d -#define NV50_TIC_0_FMT_4_4 0x0000001e -#define NV50_TIC_0_FMT_BITMAP 0x0000001f -#define NV50_TIC_0_FMT_9_9_9_E5 0x00000020 -#define NV50_TIC_0_FMT_11_11_10 0x00000021 -#define NV50_TIC_0_FMT_U8_YA8_V8_YB8 0x00000022 -#define NV50_TIC_0_FMT_YA8_U8_YB8_V8 0x00000023 -#define NV50_TIC_0_FMT_DXT1 0x00000024 -#define NV50_TIC_0_FMT_DXT3 0x00000025 -#define NV50_TIC_0_FMT_DXT5 0x00000026 -#define NV50_TIC_0_FMT_RGTC1 0x00000027 -#define NV50_TIC_0_FMT_RGTC2 0x00000028 -#define NV50_TIC_0_FMT_S8_Z24 0x00000029 -#define NV50_TIC_0_FMT_Z24_X8 0x0000002a -#define NV50_TIC_0_FMT_Z24_S8 0x0000002b -#define NV50_TIC_0_FMT_Z24_C8_MS4_CS4 0x0000002c -#define NV50_TIC_0_FMT_Z24_C8_MS8_CS8 0x0000002d -#define NV50_TIC_0_FMT_Z24_C8_MS4_CS12 0x0000002e -#define NV50_TIC_0_FMT_Z32 0x0000002f -#define NV50_TIC_0_FMT_Z32_S8_X24 0x00000030 -#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS4 0x00000031 -#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS8_CS8 0x00000032 -#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS4 0x00000033 -#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS8_CS8 0x00000034 -#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS4 0x00000035 -#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS8_CS8 0x00000036 -#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS12 0x00000037 -#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS12 0x00000038 -#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS12 0x00000039 -#define NV50_TIC_0_FMT_Z16 0x0000003a - -#define NV50_TIC_1 0x00000004 -#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff -#define NV50_TIC_1_OFFSET_LOW__SHIFT 0 - -#define NV50_TIC_2 0x00000008 -#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff -#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0 -#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400 -#define NV50_TIC_2_TARGET__MASK 0x0003c000 -#define NV50_TIC_2_TARGET__SHIFT 14 -#define NV50_TIC_2_TARGET_1D 0x00000000 -#define NV50_TIC_2_TARGET_2D 0x00004000 -#define NV50_TIC_2_TARGET_3D 0x00008000 -#define NV50_TIC_2_TARGET_CUBE 0x0000c000 -#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000 -#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000 -#define NV50_TIC_2_TARGET_BUFFER 0x00018000 -#define NV50_TIC_2_TARGET_RECT 0x0001c000 -#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000 -#define NV50_TIC_2_LINEAR 0x00040000 -#define NV50_TIC_2_TILE_MODE_X__MASK 0x00380000 -#define NV50_TIC_2_TILE_MODE_X__SHIFT 19 -#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000 -#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22 -#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000 -#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25 -#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000 -#define NV50_TIC_2_2D_UNK0258__SHIFT 28 -#define NV50_TIC_2_NO_BORDER 0x40000000 -#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000 - -#define NV50_TIC_3 0x0000000c -#define NV50_TIC_3_PITCH__MASK 0xffffffff -#define NV50_TIC_3_PITCH__SHIFT 0 - -#define NV50_TIC_4 0x00000010 -#define NV50_TIC_4_WIDTH__MASK 0xffffffff -#define NV50_TIC_4_WIDTH__SHIFT 0 - -#define NV50_TIC_5 0x00000014 -#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000 -#define NV50_TIC_5_LAST_LEVEL__SHIFT 28 -#define NV50_TIC_5_DEPTH__MASK 0x0fff0000 -#define NV50_TIC_5_DEPTH__SHIFT 16 -#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff -#define NV50_TIC_5_HEIGHT__SHIFT 0 - -#define NV50_TIC_7 0x0000001c -#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f -#define NV50_TIC_7_BASE_LEVEL__SHIFT 0 -#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0 -#define NV50_TIC_7_MAX_LEVEL__SHIFT 4 -#define NV50_TIC_7_MS_MODE__MASK 0x0000f000 -#define NV50_TIC_7_MS_MODE__SHIFT 12 -#define NV50_TIC_7_MS_MODE_MS1 0x00000000 -#define NV50_TIC_7_MS_MODE_MS2 0x00001000 -#define NV50_TIC_7_MS_MODE_MS4 0x00002000 -#define NV50_TIC_7_MS_MODE_MS8 0x00003000 -#define NVA3_TIC_7_MS_MODE_MS8_ALT 0x00004000 -#define NVA3_TIC_7_MS_MODE_MS2_ALT 0x00005000 -#define NVC0_TIC_7_MS_MODE_UNK6 0x00006000 -#define NV50_TIC_7_MS_MODE_MS4_CS4 0x00008000 -#define NV50_TIC_7_MS_MODE_MS4_CS12 0x00009000 -#define NV50_TIC_7_MS_MODE_MS8_CS8 0x0000a000 -#define NVC0_TIC_7_MS_MODE_MS8_CS24 0x0000b000 - -#define NV50_TSC__SIZE 0x00000020 -#define NV50_TSC_0 0x00000000 -#define NV50_TSC_0_WRAPS__MASK 0x00000007 -#define NV50_TSC_0_WRAPS__SHIFT 0 -#define NV50_TSC_0_WRAPT__MASK 0x00000038 -#define NV50_TSC_0_WRAPT__SHIFT 3 -#define NV50_TSC_0_WRAPR__MASK 0x000001c0 -#define NV50_TSC_0_WRAPR__SHIFT 6 -#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200 -#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00 -#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10 -#define NV50_TSC_0_SRGB_CONVERSION_ALLOWED 0x00002000 -#define NV50_TSC_0_BOX_S__MASK 0x0001c000 -#define NV50_TSC_0_BOX_S__SHIFT 14 -#define NV50_TSC_0_BOX_T__MASK 0x000e0000 -#define NV50_TSC_0_BOX_T__SHIFT 17 -#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000 -#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20 - -#define NV50_TSC_1 0x00000004 -#define NV50_TSC_1_UNKN_ANISO_15 0x10000000 -#define NV50_TSC_1_UNKN_ANISO_35 0x18000000 -#define NV50_TSC_1_MAGF__MASK 0x00000003 -#define NV50_TSC_1_MAGF__SHIFT 0 -#define NV50_TSC_1_MAGF_NEAREST 0x00000001 -#define NV50_TSC_1_MAGF_LINEAR 0x00000002 -#define NV50_TSC_1_MINF__MASK 0x00000030 -#define NV50_TSC_1_MINF__SHIFT 4 -#define NV50_TSC_1_MINF_NEAREST 0x00000010 -#define NV50_TSC_1_MINF_LINEAR 0x00000020 -#define NV50_TSC_1_MIPF__MASK 0x000000c0 -#define NV50_TSC_1_MIPF__SHIFT 6 -#define NV50_TSC_1_MIPF_NONE 0x00000040 -#define NV50_TSC_1_MIPF_NEAREST 0x00000080 -#define NV50_TSC_1_MIPF_LINEAR 0x000000c0 -#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200 -#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 -#define NV50_TSC_1_LOD_BIAS__SHIFT 12 -#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000 - -#define NV50_TSC_2 0x00000008 -#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff -#define NV50_TSC_2_MIN_LOD__SHIFT 0 -#define NV50_TSC_2_MAX_LOD__MASK 0x00fff000 -#define NV50_TSC_2_MAX_LOD__SHIFT 12 -#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__MASK 0xff000000 -#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__SHIFT 24 - -#define NV50_TSC_3 0x0000000c -#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__MASK 0x000ff000 -#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__SHIFT 12 -#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__MASK 0x0ff00000 -#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__SHIFT 20 - -#define NV50_TSC_4 0x00000010 -#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff -#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0 - -#define NV50_TSC_5 0x00000014 -#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff -#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0 - -#define NV50_TSC_6 0x00000018 -#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff -#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0 - -#define NV50_TSC_7 0x0000001c -#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff -#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0 - - -#endif /* NV50_TEXTURE_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c index 9a3fd1e705f..86a8c159469 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c @@ -3,7 +3,7 @@ #include "nv50/nv50_context.h" -#include "nv50/nv50_defs.xml.h" +#include "nv50/g80_defs.xml.h" struct nv50_transfer { struct pipe_transfer base; @@ -163,7 +163,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv, offset &= ~0xff; BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); PUSH_DATA (push, 262144); @@ -173,7 +173,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv, PUSH_DATA (push, dst->offset + offset); BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); PUSH_DATA (push, 0); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); PUSH_DATA (push, size); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 5369d5207ee..6f60445d8d2 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -76,7 +76,7 @@ nv50_vertex_state_create(struct pipe_context *pipe, enum pipe_format fmt = ve->src_format; so->element[i].pipe = elements[i]; - so->element[i].state = nv50_format_table[fmt].vtx; + so->element[i].state = nv50_vertex_format[fmt].vtx; if (!so->element[i].state) { switch (util_format_get_nr_components(fmt)) { @@ -89,7 +89,7 @@ nv50_vertex_state_create(struct pipe_context *pipe, FREE(so); return NULL; } - so->element[i].state = nv50_format_table[fmt].vtx; + so->element[i].state = nv50_vertex_format[fmt].vtx; so->need_conversion = true; pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK, "Converting vertex element %d, no hw format %s", @@ -816,6 +816,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) PUSH_DATA (push, 0x20); } + if (nv50->screen->base.class_3d >= NVA0_3D_CLASS && + nv50->seamless_cube_map != nv50->state.seamless_cube_map) { + nv50->state.seamless_cube_map = nv50->seamless_cube_map; + BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1); + PUSH_DATA (push, nv50->seamless_cube_map ? NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0); + } + if (nv50->vbo_fifo) { nv50_push_vbo(nv50, info); push->kick_notify = nv50_default_kick_notify; diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h index 0a0e187dc02..3479c343261 100644 --- a/src/gallium/drivers/nouveau/nv_object.xml.h +++ b/src/gallium/drivers/nouveau/nv_object.xml.h @@ -192,6 +192,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVF0_3D_CLASS 0x0000a197 #define NVEA_3D_CLASS 0x0000a297 #define GM107_3D_CLASS 0x0000b097 +#define GM200_3D_CLASS 0x0000b197 #define NV50_2D_CLASS 0x0000502d #define NVC0_2D_CLASS 0x0000902d #define NV50_COMPUTE_CLASS 0x000050c0 @@ -200,6 +201,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC8_COMPUTE_CLASS 0x000092c0 #define NVE4_COMPUTE_CLASS 0x0000a0c0 #define NVF0_COMPUTE_CLASS 0x0000a1c0 +#define GM107_COMPUTE_CLASS 0x0000b0c0 #define NV84_CRYPT_CLASS 0x000074c1 #define BLOB_NVC0_PCOPY1_CLASS 0x000090b8 #define BLOB_NVC0_PCOPY0_CLASS 0x000090b5 diff --git a/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h b/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h new file mode 100644 index 00000000000..a4bc3805f26 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h @@ -0,0 +1,365 @@ +#ifndef GM107_TEXTURE_XML +#define GM107_TEXTURE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/envytools/envytools/ +git clone https://github.com/envytools/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/gm107_texture.xml ( 22057 bytes, from 2016-02-12 03:01:43) +- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40) +- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-04 22:19:11) +- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42) + +Copyright (C) 2006-2016 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- Ilia Mirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define GM107_TIC2__SIZE 0x00000020 +#define GM107_TIC2_0 0x00000000 +#define GM107_TIC2_0_COMPONENTS_SIZES__MASK 0x0000007f +#define GM107_TIC2_0_COMPONENTS_SIZES__SHIFT 0 +#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32_B32_A32 0x00000001 +#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32_B32 0x00000002 +#define GM107_TIC2_0_COMPONENTS_SIZES_R16_G16_B16_A16 0x00000003 +#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32 0x00000004 +#define GM107_TIC2_0_COMPONENTS_SIZES_R32_B24G8 0x00000005 +#define GM107_TIC2_0_COMPONENTS_SIZES_X8B8G8R8 0x00000007 +#define GM107_TIC2_0_COMPONENTS_SIZES_A8B8G8R8 0x00000008 +#define GM107_TIC2_0_COMPONENTS_SIZES_A2B10G10R10 0x00000009 +#define GM107_TIC2_0_COMPONENTS_SIZES_R16_G16 0x0000000c +#define GM107_TIC2_0_COMPONENTS_SIZES_G8R24 0x0000000d +#define GM107_TIC2_0_COMPONENTS_SIZES_G24R8 0x0000000e +#define GM107_TIC2_0_COMPONENTS_SIZES_R32 0x0000000f +#define GM107_TIC2_0_COMPONENTS_SIZES_A4B4G4R4 0x00000012 +#define GM107_TIC2_0_COMPONENTS_SIZES_A5B5G5R1 0x00000013 +#define GM107_TIC2_0_COMPONENTS_SIZES_A1B5G5R5 0x00000014 +#define GM107_TIC2_0_COMPONENTS_SIZES_B5G6R5 0x00000015 +#define GM107_TIC2_0_COMPONENTS_SIZES_B6G5R5 0x00000016 +#define GM107_TIC2_0_COMPONENTS_SIZES_G8R8 0x00000018 +#define GM107_TIC2_0_COMPONENTS_SIZES_R16 0x0000001b +#define GM107_TIC2_0_COMPONENTS_SIZES_Y8_VIDEO 0x0000001c +#define GM107_TIC2_0_COMPONENTS_SIZES_R8 0x0000001d +#define GM107_TIC2_0_COMPONENTS_SIZES_G4R4 0x0000001e +#define GM107_TIC2_0_COMPONENTS_SIZES_R1 0x0000001f +#define GM107_TIC2_0_COMPONENTS_SIZES_E5B9G9R9_SHAREDEXP 0x00000020 +#define GM107_TIC2_0_COMPONENTS_SIZES_BF10GF11RF11 0x00000021 +#define GM107_TIC2_0_COMPONENTS_SIZES_G8B8G8R8 0x00000022 +#define GM107_TIC2_0_COMPONENTS_SIZES_B8G8R8G8 0x00000023 +#define GM107_TIC2_0_COMPONENTS_SIZES_DXT1 0x00000024 +#define GM107_TIC2_0_COMPONENTS_SIZES_DXT23 0x00000025 +#define GM107_TIC2_0_COMPONENTS_SIZES_DXT45 0x00000026 +#define GM107_TIC2_0_COMPONENTS_SIZES_DXN1 0x00000027 +#define GM107_TIC2_0_COMPONENTS_SIZES_DXN2 0x00000028 +#define GM107_TIC2_0_COMPONENTS_SIZES_BC6H_SF16 0x00000010 +#define GM107_TIC2_0_COMPONENTS_SIZES_BC6H_UF16 0x00000011 +#define GM107_TIC2_0_COMPONENTS_SIZES_BC7U 0x00000017 +#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGB 0x00000006 +#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGB_PTA 0x0000000a +#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGBA 0x0000000b +#define GM107_TIC2_0_COMPONENTS_SIZES_EAC 0x00000019 +#define GM107_TIC2_0_COMPONENTS_SIZES_EACX2 0x0000001a +#define GM107_TIC2_0_COMPONENTS_SIZES_Z24S8 0x00000029 +#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24 0x0000002a +#define GM107_TIC2_0_COMPONENTS_SIZES_S8Z24 0x0000002b +#define GM107_TIC2_0_COMPONENTS_SIZES_X4V4Z24__COV4R4V 0x0000002c +#define GM107_TIC2_0_COMPONENTS_SIZES_X4V4Z24__COV8R8V 0x0000002d +#define GM107_TIC2_0_COMPONENTS_SIZES_V8Z24__COV4R12V 0x0000002e +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32 0x0000002f +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X24S8 0x00000030 +#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV4R4V 0x00000031 +#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV8R8V 0x00000032 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV4R4V 0x00000033 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV8R8V 0x00000034 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV4R4V 0x00000035 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV8R8V 0x00000036 +#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV4R12V 0x00000037 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV4R12V 0x00000038 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV4R12V 0x00000039 +#define GM107_TIC2_0_COMPONENTS_SIZES_Z16 0x0000003a +#define GM107_TIC2_0_COMPONENTS_SIZES_V8Z24__COV8R24V 0x0000003b +#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV8R24V 0x0000003c +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV8R24V 0x0000003d +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV8R24V 0x0000003e +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_4X4 0x00000040 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_5X4 0x00000050 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_5X5 0x00000041 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_6X5 0x00000051 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_6X6 0x00000042 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X5 0x00000055 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X6 0x00000052 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X8 0x00000044 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X5 0x00000056 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X6 0x00000057 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X8 0x00000053 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X10 0x00000045 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_12X10 0x00000054 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_12X12 0x00000046 +#define GM107_TIC2_0_R_DATA_TYPE__MASK 0x00000380 +#define GM107_TIC2_0_R_DATA_TYPE__SHIFT 7 +#define GM107_TIC2_0_G_DATA_TYPE__MASK 0x00001c00 +#define GM107_TIC2_0_G_DATA_TYPE__SHIFT 10 +#define GM107_TIC2_0_B_DATA_TYPE__MASK 0x0000e000 +#define GM107_TIC2_0_B_DATA_TYPE__SHIFT 13 +#define GM107_TIC2_0_A_DATA_TYPE__MASK 0x00070000 +#define GM107_TIC2_0_A_DATA_TYPE__SHIFT 16 +#define GM107_TIC2_0_X_SOURCE__MASK 0x00380000 +#define GM107_TIC2_0_X_SOURCE__SHIFT 19 +#define GM107_TIC2_0_Y_SOURCE__MASK 0x01c00000 +#define GM107_TIC2_0_Y_SOURCE__SHIFT 22 +#define GM107_TIC2_0_Z_SOURCE__MASK 0x0e000000 +#define GM107_TIC2_0_Z_SOURCE__SHIFT 25 +#define GM107_TIC2_0_W_SOURCE__MASK 0x70000000 +#define GM107_TIC2_0_W_SOURCE__SHIFT 28 +#define GM107_TIC2_0_PACK_COMPONENTS 0x80000000 + +#define GM107_TIC2_1 0x00000004 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_0__MASK 0xffffffff +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_0__SHIFT 0 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__MASK 0xffffffe0 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__SHIFT 5 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__SHR 5 +#define GM107_TIC2_1_GOB_DEPTH_OFFSET__MASK 0x00000060 +#define GM107_TIC2_1_GOB_DEPTH_OFFSET__SHIFT 5 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__MASK 0xfffffe00 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__SHIFT 9 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__SHR 9 + +#define GM107_TIC2_2 0x00000008 +#define GM107_TIC2_2_ADDRESS_BITS_47_TO_32__MASK 0x0000ffff +#define GM107_TIC2_2_ADDRESS_BITS_47_TO_32__SHIFT 0 +#define GM107_TIC2_2_HEADER_VERSION__MASK 0x00e00000 +#define GM107_TIC2_2_HEADER_VERSION__SHIFT 21 +#define GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER 0x00000000 +#define GM107_TIC2_2_HEADER_VERSION_PITCH_COLORKEY 0x00200000 +#define GM107_TIC2_2_HEADER_VERSION_PITCH 0x00400000 +#define GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR 0x00600000 +#define GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR_COLORKEY 0x00800000 +#define GM107_TIC2_2_RESOURCE_VIEW_COHERENCY_HASH__MASK 0x1e000000 +#define GM107_TIC2_2_RESOURCE_VIEW_COHERENCY_HASH__SHIFT 25 + +#define GM107_TIC2_3 0x0000000c +#define GM107_TIC2_3_WIDTH_MINUS_ONE_BITS_31_TO_16__MASK 0x0000ffff +#define GM107_TIC2_3_WIDTH_MINUS_ONE_BITS_31_TO_16__SHIFT 0 +#define GM107_TIC2_3_PITCH_BITS_20_TO_5__MASK 0x0000ffff +#define GM107_TIC2_3_PITCH_BITS_20_TO_5__SHIFT 0 +#define GM107_TIC2_3_PITCH_BITS_20_TO_5__SHR 5 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MASK 0x00000007 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__SHIFT 0 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MIN 0x00000000 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MAX 0x00000000 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_ONE 0x00000000 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_TWO 0x00000001 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_FOUR 0x00000002 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_EIGHT 0x00000003 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_SIXTEEN 0x00000004 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_THIRTYTWO 0x00000005 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT__MASK 0x00000038 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT__SHIFT 3 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_ONE 0x00000000 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_TWO 0x00000008 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_FOUR 0x00000010 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_EIGHT 0x00000018 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_SIXTEEN 0x00000020 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_THIRTYTWO 0x00000028 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH__MASK 0x000001c0 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH__SHIFT 6 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_ONE 0x00000000 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_TWO 0x00000040 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_FOUR 0x00000080 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_EIGHT 0x000000c0 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_SIXTEEN 0x00000100 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_THIRTYTWO 0x00000140 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS__MASK 0x00001c00 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS__SHIFT 10 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_ONE 0x00000000 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_TWO 0x00000400 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_FOUR 0x00000800 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_EIGHT 0x00000c00 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_SIXTEEN 0x00001000 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_THIRTYTWO 0x00001400 +#define GM107_TIC2_3_GOB_3D 0x00002000 +#define GM107_TIC2_3_LOD_ANISO_QUALITY_2 0x00010000 +#define GM107_TIC2_3_LOD_ANISO_QUALITY__MASK 0x00020000 +#define GM107_TIC2_3_LOD_ANISO_QUALITY__SHIFT 17 +#define GM107_TIC2_3_LOD_ANISO_QUALITY_LOW 0x00000000 +#define GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH 0x00020000 +#define GM107_TIC2_3_LOD_ISO_QUALITY__MASK 0x00040000 +#define GM107_TIC2_3_LOD_ISO_QUALITY__SHIFT 18 +#define GM107_TIC2_3_LOD_ISO_QUALITY_LOW 0x00000000 +#define GM107_TIC2_3_LOD_ISO_QUALITY_HIGH 0x00040000 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER__MASK 0x00180000 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER__SHIFT 19 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_NONE 0x00000000 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_ONE 0x00080000 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_TWO 0x00100000 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_SQRT 0x00180000 +#define GM107_TIC2_3_ANISO_SPREAD_SCALE__MASK 0x03e00000 +#define GM107_TIC2_3_ANISO_SPREAD_SCALE__SHIFT 21 +#define GM107_TIC2_3_USE_HEADER_OPT_CONTROL 0x04000000 +#define GM107_TIC2_3_DEPTH_TEXTURE 0x08000000 +#define GM107_TIC2_3_MAX_MIP_LEVEL__MASK 0xf0000000 +#define GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT 28 + +#define GM107_TIC2_4 0x00000010 +#define GM107_TIC2_4_WIDTH_MINUS_ONE_BITS_15_TO_0__MASK 0x0000ffff +#define GM107_TIC2_4_WIDTH_MINUS_ONE_BITS_15_TO_0__SHIFT 0 +#define GM107_TIC2_4_WIDTH_MINUS_ONE__MASK 0x0000ffff +#define GM107_TIC2_4_WIDTH_MINUS_ONE__SHIFT 0 +#define GM107_TIC2_4_ANISO_SPREAD_MAX_LOG2__MASK 0x00380000 +#define GM107_TIC2_4_ANISO_SPREAD_MAX_LOG2__SHIFT 19 +#define GM107_TIC2_4_SRGB_CONVERSION 0x00400000 +#define GM107_TIC2_4_TEXTURE_TYPE__MASK 0x07800000 +#define GM107_TIC2_4_TEXTURE_TYPE__SHIFT 23 +#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D 0x00000000 +#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D 0x00800000 +#define GM107_TIC2_4_TEXTURE_TYPE_THREE_D 0x01000000 +#define GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP 0x01800000 +#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY 0x02000000 +#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY 0x02800000 +#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER 0x03000000 +#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP 0x03800000 +#define GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY 0x04000000 +#define GM107_TIC2_4_SECTOR_PROMOTION__MASK 0x18000000 +#define GM107_TIC2_4_SECTOR_PROMOTION__SHIFT 27 +#define GM107_TIC2_4_SECTOR_PROMOTION_NO_PROMOTION 0x00000000 +#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V 0x08000000 +#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_H 0x10000000 +#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_4 0x18000000 +#define GM107_TIC2_4_BORDER_SIZE__MASK 0xe0000000 +#define GM107_TIC2_4_BORDER_SIZE__SHIFT 29 +#define GM107_TIC2_4_BORDER_SIZE_ONE 0x00000000 +#define GM107_TIC2_4_BORDER_SIZE_TWO 0x20000000 +#define GM107_TIC2_4_BORDER_SIZE_FOUR 0x40000000 +#define GM107_TIC2_4_BORDER_SIZE_EIGHT 0x60000000 +#define GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR 0xe0000000 + +#define GM107_TIC2_5 0x00000014 +#define GM107_TIC2_5_HEIGHT_MINUS_ONE__MASK 0x0000ffff +#define GM107_TIC2_5_HEIGHT_MINUS_ONE__SHIFT 0 +#define GM107_TIC2_5_DEPTH_MINUS_ONE__MASK 0x3fff0000 +#define GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT 16 +#define GM107_TIC2_5_NORMALIZED_COORDS 0x80000000 + +#define GM107_TIC2_6 0x00000018 +#define GM107_TIC2_6_COLOR_KEY_OP 0x00000001 +#define GM107_TIC2_6_TRILIN_OPT__MASK 0x0000003e +#define GM107_TIC2_6_TRILIN_OPT__SHIFT 1 +#define GM107_TIC2_6_MIP_LOD_BIAS__MASK 0x0007ffc0 +#define GM107_TIC2_6_MIP_LOD_BIAS__SHIFT 6 +#define GM107_TIC2_6_MIP_LOD_BIAS__RADIX 0x00000008 +#define GM107_TIC2_6_ANISO_BIAS__MASK 0x00780000 +#define GM107_TIC2_6_ANISO_BIAS__SHIFT 19 +#define GM107_TIC2_6_ANISO_BIAS__RADIX 0x00000004 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC__MASK 0x01800000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC__SHIFT 23 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_HALF 0x00000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_ONE 0x00800000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO 0x01000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_MAX 0x01800000 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC__MASK 0x06000000 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC__SHIFT 25 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_HALF 0x00000000 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE 0x02000000 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_TWO 0x04000000 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_MAX 0x06000000 +#define GM107_TIC2_6_MAX_ANISOTROPY__MASK 0x38000000 +#define GM107_TIC2_6_MAX_ANISOTROPY__SHIFT 27 +#define GM107_TIC2_6_MAX_ANISOTROPY_1_TO_1 0x00000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1 0x08000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_4_TO_1 0x10000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_6_TO_1 0x18000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_8_TO_1 0x20000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_10_TO_1 0x28000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_12_TO_1 0x30000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_16_TO_1 0x38000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER__MASK 0xc0000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER__SHIFT 30 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_NONE 0x00000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_ONE 0x40000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO 0x80000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_SQRT 0xc0000000 + +#define GM107_TIC2_7 0x0000001c +#define GM107_TIC2_7_COLOR_KEY_VALUE__MASK 0xffffffff +#define GM107_TIC2_7_COLOR_KEY_VALUE__SHIFT 0 +#define GM107_TIC2_7_RES_VIEW_MIN_MIP_LEVEL__MASK 0x0000000f +#define GM107_TIC2_7_RES_VIEW_MIN_MIP_LEVEL__SHIFT 0 +#define GM107_TIC2_7_RES_VIEW_MAX_MIP_LEVEL__MASK 0x000000f0 +#define GM107_TIC2_7_RES_VIEW_MAX_MIP_LEVEL__SHIFT 4 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT__MASK 0x00000f00 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT 8 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_1X1 0x00000000 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X1 0x00000100 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2 0x00000200 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2 0x00000300 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_D3D 0x00000400 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X1_D3D 0x00000500 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X4 0x00000600 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2_VC_4 0x00000800 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2_VC_12 0x00000900 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_VC_8 0x00000a00 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_VC_24 0x00000b00 +#define GM107_TIC2_7_MIN_LOD_CLAMP__MASK 0x00fff000 +#define GM107_TIC2_7_MIN_LOD_CLAMP__SHIFT 12 +#define GM107_TIC2_7_MIN_LOD_CLAMP__RADIX 0x00000008 + + +#endif /* GM107_TEXTURE_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/mme/Makefile b/src/gallium/drivers/nouveau/nvc0/mme/Makefile index 1c0f5835973..52fb0a54812 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/Makefile +++ b/src/gallium/drivers/nouveau/nvc0/mme/Makefile @@ -1,5 +1,5 @@ ENVYAS?=envyas -TARGETS=com9097.mme.h +TARGETS=com9097.mme.h com90c0.mme.h all: $(TARGETS) diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme new file mode 100644 index 00000000000..a9233ad8015 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme @@ -0,0 +1,24 @@ +/* NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT + * + * arg = num_groups_x + * parm[0] = num_groups_y + * parm[1] = num_groups_z + */ +.section #mme90c0_launch_grid_indirect + parm $r2 maddr 0x108e /* GRIDDIM_YX */ + braz $r1 #fail + parm $r3 + braz annul $r2 #fail + braz annul $r3 #fail + send (extrinsrt $r1 $r2 0x0 0x10 0x10) /* num_groups_y << 16 | num_groups_x */ + send $r3 + maddrsend 0xa7 /* COMPUTE_BEGIN */ + maddrsend 0x282 /* UNKA08 */ + maddr 0xda /* LAUNCH */ + send 0x1000 + maddrsend 0x281 /* COMPUTE_END */ + exit maddr 0xd8 /* UNK360 */ + send 0x1 +fail: + exit + nop diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h new file mode 100644 index 00000000000..1dc06e5e690 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h @@ -0,0 +1,19 @@ +uint32_t mme90c0_launch_grid_indirect[] = { + 0x04238251, + 0x00034807, + 0x00000301, +/* 0x000e: fail */ + 0x0002d027, + 0x00029827, + 0x84008842, + 0x00001841, + 0x0029c071, + 0x00a08071, + 0x00368021, + 0x04000041, + 0x00a04071, + 0x003600a1, + 0x00004041, + 0x00000091, + 0x00000011, +}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index 71804343138..0f1265f5db5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -59,53 +59,63 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen, if (ret) return ret; - BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->oclass); /* hardware limit */ - BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1); + BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1); PUSH_DATA (push, screen->mp_count); - BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1); + BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1); PUSH_DATA (push, 0xf); - BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1); + BEGIN_NVC0(push, SUBC_CP(0x02a0), 1); PUSH_DATA (push, 0x8000); /* global memory setup */ - BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); PUSH_DATA (push, 0); - BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100); + BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100); for (i = 0; i <= 0xff; i++) PUSH_DATA (push, (0xc << 28) | (i << 16) | i); - BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); PUSH_DATA (push, 1); /* local memory and cstack setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls->offset); PUSH_DATA (push, screen->tls->offset); - BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2); + BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2); PUSH_DATAh(push, screen->tls->size); PUSH_DATA (push, screen->tls->size); - BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); + BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1); PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); - PUSH_DATA (push, 1 << 24); + BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1); + PUSH_DATA (push, 0xff << 24); /* shared memory setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); + BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1); PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); - BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); - PUSH_DATA (push, 2 << 24); - BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); + BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1); + PUSH_DATA (push, 0xfe << 24); + BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1); PUSH_DATA (push, 0); /* code segment setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); - /* TODO: textures & samplers */ + /* textures */ + BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); + + /* samplers */ + BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); return 0; } @@ -130,7 +140,7 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0) if (likely(prog->code_size)) { if (nvc0_program_upload_code(nvc0, prog)) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; - BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); return true; } @@ -138,13 +148,149 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0) return false; } +static void +nvc0_compute_validate_samplers(struct nvc0_context *nvc0) +{ + bool need_flush = nvc0_validate_tsc(nvc0, 5); + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} + +static void +nvc0_compute_validate_textures(struct nvc0_context *nvc0) +{ + bool need_flush = nvc0_validate_tic(nvc0, 5); + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} + +static void +nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const int s = 5; + + while (nvc0->constbuf_dirty[s]) { + int i = ffs(nvc0->constbuf_dirty[s]) - 1; + nvc0->constbuf_dirty[s] &= ~(1 << i); + + if (nvc0->constbuf[s][i].user) { + struct nouveau_bo *bo = nvc0->screen->uniform_bo; + const unsigned base = s << 16; + const unsigned size = nvc0->constbuf[s][0].size; + assert(i == 0); /* we really only want OpenGL uniforms here */ + assert(nvc0->constbuf[s][0].u.data); + + if (nvc0->state.uniform_buffer_bound[s] < size) { + nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); + + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); + PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); + PUSH_DATAh(push, bo->offset + base); + PUSH_DATA (push, bo->offset + base); + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + } + nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), + base, nvc0->state.uniform_buffer_bound[s], + 0, (size + 3) / 4, + nvc0->constbuf[s][0].u.data); + } else { + struct nv04_resource *res = + nv04_resource(nvc0->constbuf[s][i].u.buf); + if (res) { + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); + PUSH_DATA (push, nvc0->constbuf[s][i].size); + PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); + PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); + PUSH_DATA (push, (i << 8) | 1); + + BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); + + res->cb_bindings[s] |= 1 << i; + } else { + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); + PUSH_DATA (push, (i << 8) | 0); + } + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = 0; + } + } + + BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); +} + +static void +nvc0_compute_validate_driverconst(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10)); + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); + PUSH_DATA (push, (15 << 8) | 1); + + nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST; +} + +static void +nvc0_compute_validate_buffers(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const int s = 5; + int i; + + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); + BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); + PUSH_DATA (push, 512); + + for (i = 0; i < NVC0_MAX_BUFFERS; i++) { + if (nvc0->buffers[s][i].buffer) { + struct nv04_resource *res = + nv04_resource(nvc0->buffers[s][i].buffer); + PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); + PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); + PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); + PUSH_DATA (push, 0); + BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR); + } else { + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + } + } +} + static bool nvc0_compute_state_validate(struct nvc0_context *nvc0) { if (!nvc0_compute_validate_program(nvc0)) return false; - - /* TODO: textures, samplers, surfaces, global memory buffers */ + if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF) + nvc0_compute_validate_constbufs(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_DRIVERCONST) + nvc0_compute_validate_driverconst(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_BUFFERS) + nvc0_compute_validate_buffers(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES) + nvc0_compute_validate_textures(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS) + nvc0_compute_validate_samplers(nvc0); + + /* TODO: surfaces, global memory buffers */ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false); @@ -166,32 +312,29 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) struct nvc0_program *cp = nvc0->compprog; if (cp->parm_size) { - BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, align(cp->parm_size, 0x100)); PUSH_DATAh(push, screen->parm->offset); PUSH_DATA (push, screen->parm->offset); - BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (0 << 8) | 1); /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ - BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4); + BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4); PUSH_DATA (push, 0); PUSH_DATAp(push, input, cp->parm_size / 4); - BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); } } void -nvc0_launch_grid(struct pipe_context *pipe, - const uint *block_layout, const uint *grid_layout, - uint32_t label, - const void *input) +nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; - unsigned s, i; + unsigned s; int ret; ret = !nvc0_compute_state_validate(nvc0); @@ -200,59 +343,69 @@ nvc0_launch_grid(struct pipe_context *pipe, return; } - nvc0_compute_upload_input(nvc0, input); + nvc0_compute_upload_input(nvc0, info->input); - BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); - PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); + BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1); + PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc)); - BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); + BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3); PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); PUSH_DATA (push, 0); PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ - BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); + BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3); PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); - PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); + PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]); PUSH_DATA (push, cp->num_barriers); - BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); + BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1); PUSH_DATA (push, cp->num_gprs); - /* grid/block setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); - PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); - PUSH_DATA (push, grid_layout[2]); - BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); - PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); - PUSH_DATA (push, block_layout[2]); - /* launch preliminary setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); + BEGIN_NVC0(push, NVC0_CP(GRIDID), 1); PUSH_DATA (push, 0x1); - BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1); + BEGIN_NVC0(push, SUBC_CP(0x036c), 1); PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); - /* kernel launching */ - BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); - PUSH_DATA (push, 0x1000); - BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); - PUSH_DATA (push, 0x1); + /* block setup */ + BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2); + PUSH_DATA (push, (info->block[1] << 16) | info->block[0]); + PUSH_DATA (push, info->block[2]); + + if (unlikely(info->indirect)) { + struct nv04_resource *res = nv04_resource(info->indirect); + uint32_t offset = res->offset + info->indirect_offset; + unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT; + + nouveau_pushbuf_space(push, 16, 0, 1); + PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); + PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3)); + nouveau_pushbuf_data(push, res->bo, offset, + NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); + } else { + /* grid setup */ + BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2); + PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]); + PUSH_DATA (push, info->grid[2]); + + /* kernel launching */ + BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_CP(0x0a08), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1); + PUSH_DATA (push, 0x1000); + BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_CP(0x0360), 1); + PUSH_DATA (push, 0x1); + } - /* rebind all the 3D constant buffers - * (looks like binding a CB on COMPUTE clobbers 3D state) */ - nvc0->dirty |= NVC0_NEW_CONSTBUF; + /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ + nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF; for (s = 0; s < 5; s++) { - for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++) - if (nvc0->constbuf[s][i].u.buf) - nvc0->constbuf_dirty[s] |= 1 << i; + nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s]; + nvc0->state.uniform_buffer_bound[s] = 0; } - memset(nvc0->state.uniform_buffer_bound, 0, - sizeof(nvc0->state.uniform_buffer_bound)); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h index 168a6d1bee2..a23f7f39dda 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h @@ -1,7 +1,6 @@ #ifndef NVC0_COMPUTE_H #define NVC0_COMPUTE_H -#include "nv50/nv50_defs.xml.h" #include "nvc0/nvc0_compute.xml.h" bool diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 547b8f5d309..007cccfd10b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -194,8 +194,8 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) { if (nvc0->framebuffer.cbufs[i] && nvc0->framebuffer.cbufs[i]->texture == res) { - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); if (!--ref) return ref; } @@ -204,8 +204,8 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, if (res->bind & PIPE_BIND_DEPTH_STENCIL) { if (nvc0->framebuffer.zsbuf && nvc0->framebuffer.zsbuf->texture == res) { - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); if (!--ref) return ref; } @@ -214,16 +214,16 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, if (res->target == PIPE_BUFFER) { for (i = 0; i < nvc0->num_vtxbufs; ++i) { if (nvc0->vtxbuf[i].buffer == res) { - nvc0->dirty |= NVC0_NEW_ARRAYS; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); + nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX); if (!--ref) return ref; } } if (nvc0->idxbuf.buffer == res) { - nvc0->dirty |= NVC0_NEW_IDXBUF; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX); + nvc0->dirty_3d |= NVC0_NEW_3D_IDXBUF; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX); if (!--ref) return ref; } @@ -233,35 +233,45 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, if (nvc0->textures[s][i] && nvc0->textures[s][i]->texture == res) { nvc0->textures_dirty[s] |= 1 << i; - nvc0->dirty |= NVC0_NEW_TEXTURES; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); if (!--ref) return ref; } } } - for (s = 0; s < 5; ++s) { + for (s = 0; s < 6; ++s) { for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; ++i) { if (!(nvc0->constbuf_valid[s] & (1 << i))) continue; if (!nvc0->constbuf[s][i].user && nvc0->constbuf[s][i].u.buf == res) { - nvc0->dirty |= NVC0_NEW_CONSTBUF; nvc0->constbuf_dirty[s] |= 1 << i; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i)); + if (unlikely(s == 5)) { + nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF; + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i)); + } else { + nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_CB(s, i)); + } if (!--ref) return ref; } } } - for (s = 0; s < 5; ++s) { + for (s = 0; s < 6; ++s) { for (i = 0; i < NVC0_MAX_BUFFERS; ++i) { if (nvc0->buffers[s][i].buffer == res) { nvc0->buffers_dirty[s] |= 1 << i; - nvc0->dirty |= NVC0_NEW_BUFFERS; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF); + if (unlikely(s == 5)) { + nvc0->dirty_cp |= NVC0_NEW_CP_BUFFERS; + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BUF); + } else { + nvc0->dirty_3d |= NVC0_NEW_3D_BUFFERS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BUF); + } if (!--ref) return ref; } @@ -342,7 +352,12 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) if (!nvc0->tcp_empty) goto out_err; /* set the empty tctl prog on next draw in case one is never set */ - nvc0->dirty |= NVC0_NEW_TCTLPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_TCTLPROG; + + /* Do not bind the COMPUTE driver constbuf at screen initialization because + * CBs are aliased between 3D and COMPUTE, but make sure it will be bound if + * a grid is launched later. */ + nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST; /* now that there are no more opportunities for errors, set the current * context if there isn't already one. @@ -358,11 +373,12 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD; - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text); - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo); - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->text); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->uniform_bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->txc); if (screen->compute) { BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text); + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->uniform_bo); BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc); BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm); } @@ -370,13 +386,13 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RDWR; if (screen->poly_cache) - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->poly_cache); if (screen->compute) BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls); flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo); BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo); if (screen->compute) BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 4a6ea867e85..d3e3a818910 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -29,34 +29,35 @@ #include "nvc0/nve4_p2mf.xml.h" #include "nvc0/nvc0_macros.h" -/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */ -#define NVC0_NEW_BLEND (1 << 0) -#define NVC0_NEW_RASTERIZER (1 << 1) -#define NVC0_NEW_ZSA (1 << 2) -#define NVC0_NEW_VERTPROG (1 << 3) -#define NVC0_NEW_TCTLPROG (1 << 4) -#define NVC0_NEW_TEVLPROG (1 << 5) -#define NVC0_NEW_GMTYPROG (1 << 6) -#define NVC0_NEW_FRAGPROG (1 << 7) -#define NVC0_NEW_BLEND_COLOUR (1 << 8) -#define NVC0_NEW_STENCIL_REF (1 << 9) -#define NVC0_NEW_CLIP (1 << 10) -#define NVC0_NEW_SAMPLE_MASK (1 << 11) -#define NVC0_NEW_FRAMEBUFFER (1 << 12) -#define NVC0_NEW_STIPPLE (1 << 13) -#define NVC0_NEW_SCISSOR (1 << 14) -#define NVC0_NEW_VIEWPORT (1 << 15) -#define NVC0_NEW_ARRAYS (1 << 16) -#define NVC0_NEW_VERTEX (1 << 17) -#define NVC0_NEW_CONSTBUF (1 << 18) -#define NVC0_NEW_TEXTURES (1 << 19) -#define NVC0_NEW_SAMPLERS (1 << 20) -#define NVC0_NEW_TFB_TARGETS (1 << 21) -#define NVC0_NEW_IDXBUF (1 << 22) -#define NVC0_NEW_SURFACES (1 << 23) -#define NVC0_NEW_MIN_SAMPLES (1 << 24) -#define NVC0_NEW_TESSFACTOR (1 << 25) -#define NVC0_NEW_BUFFERS (1 << 26) +/* NOTE: must keep NVC0_NEW_3D_...PROG in consecutive bits in this order */ +#define NVC0_NEW_3D_BLEND (1 << 0) +#define NVC0_NEW_3D_RASTERIZER (1 << 1) +#define NVC0_NEW_3D_ZSA (1 << 2) +#define NVC0_NEW_3D_VERTPROG (1 << 3) +#define NVC0_NEW_3D_TCTLPROG (1 << 4) +#define NVC0_NEW_3D_TEVLPROG (1 << 5) +#define NVC0_NEW_3D_GMTYPROG (1 << 6) +#define NVC0_NEW_3D_FRAGPROG (1 << 7) +#define NVC0_NEW_3D_BLEND_COLOUR (1 << 8) +#define NVC0_NEW_3D_STENCIL_REF (1 << 9) +#define NVC0_NEW_3D_CLIP (1 << 10) +#define NVC0_NEW_3D_SAMPLE_MASK (1 << 11) +#define NVC0_NEW_3D_FRAMEBUFFER (1 << 12) +#define NVC0_NEW_3D_STIPPLE (1 << 13) +#define NVC0_NEW_3D_SCISSOR (1 << 14) +#define NVC0_NEW_3D_VIEWPORT (1 << 15) +#define NVC0_NEW_3D_ARRAYS (1 << 16) +#define NVC0_NEW_3D_VERTEX (1 << 17) +#define NVC0_NEW_3D_CONSTBUF (1 << 18) +#define NVC0_NEW_3D_TEXTURES (1 << 19) +#define NVC0_NEW_3D_SAMPLERS (1 << 20) +#define NVC0_NEW_3D_TFB_TARGETS (1 << 21) +#define NVC0_NEW_3D_IDXBUF (1 << 22) +#define NVC0_NEW_3D_SURFACES (1 << 23) +#define NVC0_NEW_3D_MIN_SAMPLES (1 << 24) +#define NVC0_NEW_3D_TESSFACTOR (1 << 25) +#define NVC0_NEW_3D_BUFFERS (1 << 26) +#define NVC0_NEW_3D_DRIVERCONST (1 << 27) #define NVC0_NEW_CP_PROGRAM (1 << 0) #define NVC0_NEW_CP_SURFACES (1 << 1) @@ -64,20 +65,22 @@ #define NVC0_NEW_CP_SAMPLERS (1 << 3) #define NVC0_NEW_CP_CONSTBUF (1 << 4) #define NVC0_NEW_CP_GLOBALS (1 << 5) +#define NVC0_NEW_CP_DRIVERCONST (1 << 6) +#define NVC0_NEW_CP_BUFFERS (1 << 7) /* 3d bufctx (during draw_vbo, blit_3d) */ -#define NVC0_BIND_FB 0 -#define NVC0_BIND_VTX 1 -#define NVC0_BIND_VTX_TMP 2 -#define NVC0_BIND_IDX 3 -#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i)) -#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i)) -#define NVC0_BIND_TFB 244 -#define NVC0_BIND_SUF 245 -#define NVC0_BIND_BUF 246 -#define NVC0_BIND_SCREEN 247 -#define NVC0_BIND_TLS 249 -#define NVC0_BIND_3D_COUNT 250 +#define NVC0_BIND_3D_FB 0 +#define NVC0_BIND_3D_VTX 1 +#define NVC0_BIND_3D_VTX_TMP 2 +#define NVC0_BIND_3D_IDX 3 +#define NVC0_BIND_3D_TEX(s, i) ( 4 + 32 * (s) + (i)) +#define NVC0_BIND_3D_CB(s, i) (164 + 16 * (s) + (i)) +#define NVC0_BIND_3D_TFB 244 +#define NVC0_BIND_3D_SUF 245 +#define NVC0_BIND_3D_BUF 246 +#define NVC0_BIND_3D_SCREEN 247 +#define NVC0_BIND_3D_TLS 249 +#define NVC0_BIND_3D_COUNT 250 /* compute bufctx (during launch_grid) */ #define NVC0_BIND_CP_CB(i) ( 0 + (i)) @@ -87,7 +90,8 @@ #define NVC0_BIND_CP_DESC 50 #define NVC0_BIND_CP_SCREEN 51 #define NVC0_BIND_CP_QUERY 52 -#define NVC0_BIND_CP_COUNT 53 +#define NVC0_BIND_CP_BUF 53 +#define NVC0_BIND_CP_COUNT 54 /* bufctx for other operations */ #define NVC0_BIND_2D 0 @@ -114,7 +118,7 @@ struct nvc0_context { const struct nv50_m2mf_rect *src, uint32_t nblocksx, uint32_t nblocksy); - uint32_t dirty; + uint32_t dirty_3d; /* dirty flags for 3d state */ uint32_t dirty_cp; /* dirty flags for compute state */ struct nvc0_graph_state state; @@ -157,6 +161,7 @@ struct nvc0_context { struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS]; unsigned num_samplers[6]; uint16_t samplers_dirty[6]; + bool seamless_cube_map; uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */ @@ -267,6 +272,8 @@ extern void nvc0_clear(struct pipe_context *, unsigned buffers, extern void nvc0_init_surface_functions(struct nvc0_context *); /* nvc0_tex.c */ +bool nvc0_validate_tic(struct nvc0_context *nvc0, int s); +bool nvc0_validate_tsc(struct nvc0_context *nvc0, int s); bool nve4_validate_tsc(struct nvc0_context *nvc0, int s); void nvc0_validate_textures(struct nvc0_context *); void nvc0_validate_samplers(struct nvc0_context *); @@ -331,11 +338,9 @@ nvc0_video_buffer_create(struct pipe_context *pipe, void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); /* nve4_compute.c */ -void nve4_launch_grid(struct pipe_context *, - const uint *, const uint *, uint32_t, const void *); +void nve4_launch_grid(struct pipe_context *, const struct pipe_grid_info *); /* nvc0_compute.c */ -void nvc0_launch_grid(struct pipe_context *, - const uint *, const uint *, uint32_t, const void *); +void nvc0_launch_grid(struct pipe_context *, const struct pipe_grid_info *); #endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h index 49e176cbd49..eeacc714f3e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h @@ -35,4 +35,6 @@ #define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858 +#define NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT 0x00003860 + #endif /* __NVC0_MACROS_H__ */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 93f211bd5fc..bc884d6c08f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -544,6 +544,9 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->io.texBindBase = NVE4_CP_INPUT_TEX(0); info->io.suInfoBase = NVE4_CP_INPUT_SUF(0); info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0); + } else { + info->io.resInfoCBSlot = 15; + info->io.suInfoBase = 512; } info->io.msInfoCBSlot = 0; info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 721857edecc..f5f9bb39fd9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -846,15 +846,15 @@ nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) /* configure and reset the counter(s) */ if (d == 0) - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_A_SIGSEL(c & 3)), 1); else - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_B_SIGSEL(c & 3)), 1); PUSH_DATA (push, cfg->ctr[i].sig_sel); - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_SRCSEL(c)), 1); PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3)); - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_FUNC(c)), 1); PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_SET(c)), 1); PUSH_DATA (push, 0); } return true; @@ -917,13 +917,13 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) mask_sel &= cfg->ctr[i].src_mask; /* configure and reset the counter(s) */ - BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(c)), 1); + BEGIN_NVC0(push, NVC0_CP(MP_PM_SIGSEL(c)), 1); PUSH_DATA (push, cfg->ctr[i].sig_sel); - BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(c)), 1); + BEGIN_NVC0(push, NVC0_CP(MP_PM_SRCSEL(c)), 1); PUSH_DATA (push, cfg->ctr[i].src_sel | mask_sel); - BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 1); + BEGIN_NVC0(push, NVC0_CP(MP_PM_OP(c)), 1); PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); - BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(c)), 1); + BEGIN_NVC0(push, NVC0_CP(MP_PM_SET(c)), 1); PUSH_DATA (push, 0); } return true; @@ -937,11 +937,12 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) struct nouveau_pushbuf *push = nvc0->base.pushbuf; const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS; struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq); + struct pipe_grid_info info = {}; uint32_t mask; uint32_t input[3]; const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 }; const uint grid[3] = { screen->mp_count, screen->gpc_count, 1 }; - unsigned c; + unsigned c, i; if (unlikely(!screen->pm.prog)) { struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); @@ -965,9 +966,9 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) for (c = 0; c < 8; ++c) if (screen->pm.mp_counter[c]) { if (is_nve4) { - IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0); + IMMED_NVC0(push, NVE4_CP(MP_PM_FUNC(c)), 0); } else { - IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0); + IMMED_NVC0(push, NVC0_CP(MP_PM_OP(c)), 0); } } /* release counters for this query */ @@ -983,13 +984,20 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) hq->bo); PUSH_SPACE(push, 1); - IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0); + IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0); pipe->bind_compute_state(pipe, screen->pm.prog); input[0] = (hq->bo->offset + hq->base_offset); input[1] = (hq->bo->offset + hq->base_offset) >> 32; input[2] = hq->sequence; - pipe->launch_grid(pipe, block, grid, 0, input); + + for (i = 0; i < 3; i++) { + info.block[i] = block[i]; + info.grid[i] = grid[i]; + } + info.pc = 0; + info.input = input; + pipe->launch_grid(pipe, &info); nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY); @@ -1010,9 +1018,9 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) break; mask |= 1 << hsq->ctr[i]; if (is_nve4) { - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(hsq->ctr[i])), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_FUNC(hsq->ctr[i])), 1); } else { - BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(hsq->ctr[i])), 1); + BEGIN_NVC0(push, NVC0_CP(MP_PM_OP(hsq->ctr[i])), 1); } PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index d368fda707d..998e9ea47ef 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -36,6 +36,7 @@ #include "nvc0/nvc0_screen.h" #include "nvc0/mme/com9097.mme.h" +#include "nvc0/mme/com90c0.mme.h" static boolean nvc0_screen_is_format_supported(struct pipe_screen *pscreen, @@ -61,7 +62,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_SHARED); - return (nvc0_format_table[format].usage & bindings) == bindings; + return (( nvc0_format_table[format].usage | + nvc0_vertex_format[format].usage) & bindings) == bindings; } static int @@ -196,6 +198,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_COMPUTE: + if (debug_get_bool_option("NVF0_COMPUTE", false)) + return 1; return (class_3d <= NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0; @@ -262,8 +266,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 0; break; case PIPE_SHADER_COMPUTE: - if (class_3d > NVE4_3D_CLASS) - return 0; + if (!debug_get_bool_option("NVF0_COMPUTE", false)) + if (class_3d > NVE4_3D_CLASS) + return 0; break; default: return 0; @@ -272,6 +277,10 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, switch (param) { case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + if (class_3d >= NVE4_3D_CLASS) + return 0; + return 1 << PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: @@ -336,6 +345,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 16; /* XXX not sure if more are really safe */ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; @@ -598,6 +609,9 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) case 0xf0: case 0x100: case 0x110: + if (debug_get_bool_option("NVF0_COMPUTE", false)) + return nve4_screen_compute_setup(screen, screen->base.pushbuf); + case 0x120: return 0; default: return -1; @@ -660,6 +674,7 @@ nvc0_screen_create(struct nouveau_device *dev) case 0xf0: case 0x100: case 0x110: + case 0x120: break; default: return NULL; @@ -728,6 +743,7 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, screen->nvsw->handle); switch (dev->chipset & ~0xf) { + case 0x120: case 0x110: case 0x100: case 0xf0: @@ -779,6 +795,9 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, screen->fence.bo->offset + 16); switch (dev->chipset & ~0xf) { + case 0x120: + obj_class = GM200_3D_CLASS; + break; case 0x110: obj_class = GM107_3D_CLASS; break; @@ -860,8 +879,7 @@ nvc0_screen_create(struct nouveau_device *dev) BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1); PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH); if (screen->eng3d->oclass < NVE4_3D_CLASS) { - BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); - PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); + IMMED_NVC0(push, NVC0_3D(TEX_MISC), 0); } else { BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1); PUSH_DATA (push, 15); @@ -887,7 +905,7 @@ nvc0_screen_create(struct nouveau_device *dev) */ nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100); - ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 6 << 16, NULL, + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 7 << 16, NULL, &screen->uniform_bo); if (ret) goto fail; @@ -899,8 +917,8 @@ nvc0_screen_create(struct nouveau_device *dev) /* auxiliary constants (6 user clip planes, base instance id) */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); PUSH_DATA (push, (15 << 4) | 1); if (screen->eng3d->oclass >= NVE4_3D_CLASS) { @@ -920,8 +938,8 @@ nvc0_screen_create(struct nouveau_device *dev) /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 256); - PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 5); PUSH_DATA (push, 0); PUSH_DATAf(push, 0.0f); @@ -929,8 +947,8 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); if (screen->base.drm->version >= 0x01000101) { ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); @@ -988,6 +1006,14 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); + if (screen->eng3d->oclass >= GM107_3D_CLASS) { + screen->tic.maxwell = true; + if (screen->eng3d->oclass == GM107_3D_CLASS) { + screen->tic.maxwell = + debug_get_bool_option("NOUVEAU_MAXWELL_TIC", true); + IMMED_NVC0(push, SUBC_3D(0x0f10), screen->tic.maxwell); + } + } BEGIN_NVC0(push, NVC0_3D(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); @@ -1051,6 +1077,7 @@ nvc0_screen_create(struct nouveau_device *dev) MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); + MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 1a56177815c..8487abcf999 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -51,8 +51,9 @@ struct nvc0_graph_state { uint8_t c14_bound; /* whether immediate array constbuf is bound */ uint8_t clip_enable; uint32_t clip_mode; - uint32_t uniform_buffer_bound[5]; + uint32_t uniform_buffer_bound[6]; struct nvc0_transform_feedback_state *tfb; + bool seamless_cube_map; }; struct nvc0_screen { @@ -83,6 +84,7 @@ struct nvc0_screen { void **entries; int next; uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32]; + bool maxwell; } tic; struct { @@ -164,12 +166,27 @@ nvc0_resource_validate(struct nv04_resource *res, uint32_t flags) struct nvc0_format { uint32_t rt; - uint32_t tic; + struct { + unsigned format:7; + unsigned type_r:3; + unsigned type_g:3; + unsigned type_b:3; + unsigned type_a:3; + unsigned src_x:3; + unsigned src_y:3; + unsigned src_z:3; + unsigned src_w:3; + } tic; + uint32_t usage; +}; + +struct nvc0_vertex_format { uint32_t vtx; uint32_t usage; }; extern const struct nvc0_format nvc0_format_table[]; +extern const struct nvc0_vertex_format nvc0_vertex_format[]; static inline void nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 382a18ef153..2f46c436a4c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -37,11 +37,11 @@ nvc0_program_update_context_state(struct nvc0_context *nvc0, if (prog && prog->need_tls) { const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR; if (!nvc0->state.tls_required) - BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls); nvc0->state.tls_required |= 1 << stage; } else { if (nvc0->state.tls_required == (1 << stage)) - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS); nvc0->state.tls_required &= ~(1 << stage); } @@ -152,7 +152,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) NVC0_3D_SHADE_MODEL_SMOOTH); } - if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) { + if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) { return; } @@ -292,9 +292,9 @@ nvc0_tfb_validate(struct nvc0_context *nvc0) } nvc0->state.tfb = tfb; - if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS)) + if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS)) return; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TFB); for (b = 0; b < nvc0->num_tfbbufs; ++b) { struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]); @@ -310,7 +310,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0) buf = nv04_resource(targ->pipe.buffer); - BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR); + BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR); if (!(nvc0->tfbbuf_dirty & (1 << b))) continue; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index cf3d3497c78..7ccce9ff6bf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -21,6 +21,7 @@ */ #include "pipe/p_defines.h" +#include "util/u_framebuffer.h" #include "util/u_helpers.h" #include "util/u_inlines.h" #include "util/u_transfer.h" @@ -32,7 +33,6 @@ #include "nvc0/nvc0_query_hw.h" #include "nvc0/nvc0_3d.xml.h" -#include "nv50/nv50_texture.xml.h" #include "nouveau_gldefs.h" @@ -186,7 +186,7 @@ nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->blend = hwcso; - nvc0->dirty |= NVC0_NEW_BLEND; + nvc0->dirty_3d |= NVC0_NEW_3D_BLEND; } static void @@ -315,7 +315,7 @@ nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->rast = hwcso; - nvc0->dirty |= NVC0_NEW_RASTERIZER; + nvc0->dirty_3d |= NVC0_NEW_3D_RASTERIZER; } static void @@ -393,7 +393,7 @@ nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->zsa = hwcso; - nvc0->dirty |= NVC0_NEW_ZSA; + nvc0->dirty_3d |= NVC0_NEW_3D_ZSA; } static void @@ -449,7 +449,7 @@ nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s, nvc0->num_samplers[s] = nr; - nvc0->dirty |= NVC0_NEW_SAMPLERS; + nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS; } static void @@ -566,7 +566,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, } if (old) { - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); nvc0_screen_tic_unlock(nvc0->screen, old); } @@ -576,7 +576,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, for (i = nr; i < nvc0->num_textures[s]; ++i) { struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); if (old) { - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); nvc0_screen_tic_unlock(nvc0->screen, old); pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); } @@ -584,7 +584,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, nvc0->num_textures[s] = nr; - nvc0->dirty |= NVC0_NEW_TEXTURES; + nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; } static void @@ -594,7 +594,7 @@ nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s, { struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d; const unsigned end = start + nr; - const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0); + const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_3D_TEX(s, 0); int last_valid = -1; unsigned i; @@ -733,7 +733,7 @@ nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->vertprog = hwcso; - nvc0->dirty |= NVC0_NEW_VERTPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_VERTPROG; } static void * @@ -749,7 +749,7 @@ nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->fragprog = hwcso; - nvc0->dirty |= NVC0_NEW_FRAGPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAGPROG; } static void * @@ -765,7 +765,7 @@ nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->gmtyprog = hwcso; - nvc0->dirty |= NVC0_NEW_GMTYPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_GMTYPROG; } static void * @@ -781,7 +781,7 @@ nvc0_tcp_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->tctlprog = hwcso; - nvc0->dirty |= NVC0_NEW_TCTLPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_TCTLPROG; } static void * @@ -797,7 +797,7 @@ nvc0_tep_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->tevlprog = hwcso; - nvc0->dirty |= NVC0_NEW_TEVLPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_TEVLPROG; } static void * @@ -839,7 +839,9 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, const unsigned i = index; if (unlikely(shader == PIPE_SHADER_COMPUTE)) { - assert(!cb || !cb->user_buffer); + if (nvc0->constbuf[s][i].user) + nvc0->constbuf[s][i].u.buf = NULL; + else if (nvc0->constbuf[s][i].u.buf) nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i)); @@ -849,9 +851,9 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nvc0->constbuf[s][i].u.buf = NULL; else if (nvc0->constbuf[s][i].u.buf) - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_CB(s, i)); - nvc0->dirty |= NVC0_NEW_CONSTBUF; + nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF; } nvc0->constbuf_dirty[s] |= 1 << i; @@ -891,7 +893,7 @@ nvc0_set_blend_color(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->blend_colour = *bcol; - nvc0->dirty |= NVC0_NEW_BLEND_COLOUR; + nvc0->dirty_3d |= NVC0_NEW_3D_BLEND_COLOUR; } static void @@ -901,7 +903,7 @@ nvc0_set_stencil_ref(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->stencil_ref = *sr; - nvc0->dirty |= NVC0_NEW_STENCIL_REF; + nvc0->dirty_3d |= NVC0_NEW_3D_STENCIL_REF; } static void @@ -912,7 +914,7 @@ nvc0_set_clip_state(struct pipe_context *pipe, memcpy(nvc0->clip.ucp, clip->ucp, sizeof(clip->ucp)); - nvc0->dirty |= NVC0_NEW_CLIP; + nvc0->dirty_3d |= NVC0_NEW_3D_CLIP; } static void @@ -921,7 +923,7 @@ nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->sample_mask = sample_mask; - nvc0->dirty |= NVC0_NEW_SAMPLE_MASK; + nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLE_MASK; } static void @@ -931,7 +933,7 @@ nvc0_set_min_samples(struct pipe_context *pipe, unsigned min_samples) if (nvc0->min_samples != min_samples) { nvc0->min_samples = min_samples; - nvc0->dirty |= NVC0_NEW_MIN_SAMPLES; + nvc0->dirty_3d |= NVC0_NEW_3D_MIN_SAMPLES; } } @@ -940,23 +942,12 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct nvc0_context *nvc0 = nvc0_context(pipe); - unsigned i; - - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); - for (i = 0; i < fb->nr_cbufs; ++i) - pipe_surface_reference(&nvc0->framebuffer.cbufs[i], fb->cbufs[i]); - for (; i < nvc0->framebuffer.nr_cbufs; ++i) - pipe_surface_reference(&nvc0->framebuffer.cbufs[i], NULL); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); - nvc0->framebuffer.nr_cbufs = fb->nr_cbufs; + util_copy_framebuffer_state(&nvc0->framebuffer, fb); - nvc0->framebuffer.width = fb->width; - nvc0->framebuffer.height = fb->height; - - pipe_surface_reference(&nvc0->framebuffer.zsbuf, fb->zsbuf); - - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; } static void @@ -966,7 +957,7 @@ nvc0_set_polygon_stipple(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->stipple = *stipple; - nvc0->dirty |= NVC0_NEW_STIPPLE; + nvc0->dirty_3d |= NVC0_NEW_3D_STIPPLE; } static void @@ -984,7 +975,7 @@ nvc0_set_scissor_states(struct pipe_context *pipe, continue; nvc0->scissors[start_slot + i] = scissor[i]; nvc0->scissors_dirty |= 1 << (start_slot + i); - nvc0->dirty |= NVC0_NEW_SCISSOR; + nvc0->dirty_3d |= NVC0_NEW_3D_SCISSOR; } } @@ -1003,7 +994,7 @@ nvc0_set_viewport_states(struct pipe_context *pipe, continue; nvc0->viewports[start_slot + i] = vpt[i]; nvc0->viewports_dirty |= 1 << (start_slot + i); - nvc0->dirty |= NVC0_NEW_VIEWPORT; + nvc0->dirty_3d |= NVC0_NEW_3D_VIEWPORT; } } @@ -1017,7 +1008,7 @@ nvc0_set_tess_state(struct pipe_context *pipe, memcpy(nvc0->default_tess_outer, default_tess_outer, 4 * sizeof(float)); memcpy(nvc0->default_tess_inner, default_tess_inner, 2 * sizeof(float)); - nvc0->dirty |= NVC0_NEW_TESSFACTOR; + nvc0->dirty_3d |= NVC0_NEW_3D_TESSFACTOR; } static void @@ -1028,8 +1019,8 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); unsigned i; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); - nvc0->dirty |= NVC0_NEW_ARRAYS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX); + nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS; util_set_vertex_buffers_count(nvc0->vtxbuf, &nvc0->num_vtxbufs, vb, start_slot, count); @@ -1071,20 +1062,20 @@ nvc0_set_index_buffer(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); if (nvc0->idxbuf.buffer) - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX); if (ib) { pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer); nvc0->idxbuf.index_size = ib->index_size; if (ib->buffer) { nvc0->idxbuf.offset = ib->offset; - nvc0->dirty |= NVC0_NEW_IDXBUF; + nvc0->dirty_3d |= NVC0_NEW_3D_IDXBUF; } else { nvc0->idxbuf.user_buffer = ib->user_buffer; - nvc0->dirty &= ~NVC0_NEW_IDXBUF; + nvc0->dirty_3d &= ~NVC0_NEW_3D_IDXBUF; } } else { - nvc0->dirty &= ~NVC0_NEW_IDXBUF; + nvc0->dirty_3d &= ~NVC0_NEW_3D_IDXBUF; pipe_resource_reference(&nvc0->idxbuf.buffer, NULL); } } @@ -1095,7 +1086,7 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->vertex = hwcso; - nvc0->dirty |= NVC0_NEW_VERTEX; + nvc0->dirty_3d |= NVC0_NEW_3D_VERTEX; } static struct pipe_stream_output_target * @@ -1194,7 +1185,7 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe, nvc0->num_tfbbufs = num_targets; if (nvc0->tfbbuf_dirty) - nvc0->dirty |= NVC0_NEW_TFB_TARGETS; + nvc0->dirty_3d |= NVC0_NEW_3D_TFB_TARGETS; } static void @@ -1223,7 +1214,7 @@ nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t, nvc0->surfaces_dirty[t] |= mask; if (t == 0) - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF); else nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF); } @@ -1241,7 +1232,7 @@ nvc0_set_compute_resources(struct pipe_context *pipe, static void nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader, unsigned start_slot, unsigned count, - struct pipe_image_view **views) + struct pipe_image_view *views) { } @@ -1254,7 +1245,7 @@ nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t, const unsigned mask = ((1 << nr) - 1) << start; unsigned i; - assert(t < 5); + assert(t < 6); if (pbuffers) { for (i = start; i < end; ++i) { @@ -1274,7 +1265,11 @@ nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t, } nvc0->buffers_dirty[t] |= mask; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF); + if (t == 5) + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BUF); + else + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BUF); + } static void @@ -1286,7 +1281,10 @@ nvc0_set_shader_buffers(struct pipe_context *pipe, const unsigned s = nvc0_shader_stage(shader); nvc0_bind_buffers_range(nvc0_context(pipe), s, start, nr, buffers); - nvc0_context(pipe)->dirty |= NVC0_NEW_BUFFERS; + if (s == 5) + nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_BUFFERS; + else + nvc0_context(pipe)->dirty_3d |= NVC0_NEW_3D_BUFFERS; } static inline void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index e0d8ab01776..18e79e36b85 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -3,7 +3,6 @@ #include "util/u_math.h" #include "nvc0/nvc0_context.h" -#include "nv50/nv50_defs.xml.h" #if 0 static void @@ -77,7 +76,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; bool serialize = false; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs); @@ -142,7 +141,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; /* only register for writing, otherwise we'd always serialize here */ - BCTX_REFN(nvc0->bufctx_3d, FB, res, WR); + BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR); } if (fb->zsbuf) { @@ -173,7 +172,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; - BCTX_REFN(nvc0->bufctx_3d, FB, &mt->base, WR); + BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR); } else { BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); PUSH_DATA (push, 0); @@ -184,8 +183,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) ms = 1 << ms_mode; BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10)); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); PUSH_DATA (push, 256 + 128); for (i = 0; i < ms; i++) { @@ -240,7 +239,7 @@ nvc0_validate_scissor(struct nvc0_context *nvc0) int i; struct nouveau_pushbuf *push = nvc0->base.pushbuf; - if (!(nvc0->dirty & NVC0_NEW_SCISSOR) && + if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) && nvc0->rast->pipe.scissor == nvc0->state.scissor) return; @@ -318,8 +317,8 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s) BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10)); - PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10)); + PUSH_DATAh(push, bo->offset + (6 << 16) + (s << 10)); + PUSH_DATA (push, bo->offset + (6 << 16) + (s << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1); PUSH_DATA (push, 256); PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); @@ -368,7 +367,7 @@ nvc0_validate_clip(struct nvc0_context *nvc0) if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES) nvc0_check_program_ucps(nvc0, vp, clip_enable); - if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage))) + if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage))) if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES) nvc0_upload_uclip_planes(nvc0, stage); @@ -455,7 +454,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); PUSH_DATA (push, (i << 4) | 1); - BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD); + BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD); nvc0->cb_dirty = 1; /* Force cache flush for UBO. */ res->cb_bindings[s] |= 1 << i; @@ -468,6 +467,11 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) } } } + + /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */ + nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF; + nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5]; + nvc0->state.uniform_buffer_bound[5] = 0; } static void @@ -479,8 +483,8 @@ nvc0_validate_buffers(struct nvc0_context *nvc0) for (s = 0; s < 5; s++) { BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10)); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); PUSH_DATA (push, 512); for (i = 0; i < NVC0_MAX_BUFFERS; i++) { @@ -491,7 +495,7 @@ nvc0_validate_buffers(struct nvc0_context *nvc0) PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); PUSH_DATA (push, 0); - BCTX_REFN(nvc0->bufctx_3d, BUF, res, RDWR); + BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR); } else { PUSH_DATA (push, 0); PUSH_DATA (push, 0); @@ -536,6 +540,25 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0) IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples); } +static void +nvc0_validate_driverconst(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + int i; + + for (i = 0; i < 5; ++i) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); + PUSH_DATA (push, (15 << 4) | 1); + } + + nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST; +} + void nvc0_validate_global_residents(struct nvc0_context *nvc0, struct nouveau_bufctx *bctx, int bin) @@ -629,35 +652,37 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to) else ctx_to->state = ctx_to->screen->save_state; - ctx_to->dirty = ~0; + ctx_to->dirty_3d = ~0; + ctx_to->dirty_cp = ~0; ctx_to->viewports_dirty = ~0; ctx_to->scissors_dirty = ~0; - for (s = 0; s < 5; ++s) { + for (s = 0; s < 6; ++s) { ctx_to->samplers_dirty[s] = ~0; ctx_to->textures_dirty[s] = ~0; ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1; + ctx_to->buffers_dirty[s] = ~0; } /* Reset tfb as the shader that owns it may have been deleted. */ ctx_to->state.tfb = NULL; if (!ctx_to->vertex) - ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS); + ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS); if (!ctx_to->idxbuf.buffer) - ctx_to->dirty &= ~NVC0_NEW_IDXBUF; + ctx_to->dirty_3d &= ~NVC0_NEW_3D_IDXBUF; if (!ctx_to->vertprog) - ctx_to->dirty &= ~NVC0_NEW_VERTPROG; + ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG; if (!ctx_to->fragprog) - ctx_to->dirty &= ~NVC0_NEW_FRAGPROG; + ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG; if (!ctx_to->blend) - ctx_to->dirty &= ~NVC0_NEW_BLEND; + ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND; if (!ctx_to->rast) - ctx_to->dirty &= ~(NVC0_NEW_RASTERIZER | NVC0_NEW_SCISSOR); + ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR); if (!ctx_to->zsa) - ctx_to->dirty &= ~NVC0_NEW_ZSA; + ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA; ctx_to->screen->cur_ctx = ctx_to; } @@ -666,40 +691,41 @@ static struct state_validate { void (*func)(struct nvc0_context *); uint32_t states; } validate_list[] = { - { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER }, - { nvc0_validate_blend, NVC0_NEW_BLEND }, - { nvc0_validate_zsa, NVC0_NEW_ZSA }, - { nvc0_validate_sample_mask, NVC0_NEW_SAMPLE_MASK }, - { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER }, - { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, - { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, - { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, - { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER }, - { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, - { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, - { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG }, - { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, - { nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR }, - { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, - { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER }, - { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA | - NVC0_NEW_RASTERIZER }, - { nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER }, - { nvc0_validate_derived_3, NVC0_NEW_BLEND | NVC0_NEW_FRAMEBUFFER }, - { nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER | - NVC0_NEW_VERTPROG | - NVC0_NEW_TEVLPROG | - NVC0_NEW_GMTYPROG }, - { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, - { nvc0_validate_textures, NVC0_NEW_TEXTURES }, - { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, - { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS }, - { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, - { nvc0_validate_surfaces, NVC0_NEW_SURFACES }, - { nvc0_validate_buffers, NVC0_NEW_BUFFERS }, - { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF }, - { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }, - { nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES }, + { nvc0_validate_fb, NVC0_NEW_3D_FRAMEBUFFER }, + { nvc0_validate_blend, NVC0_NEW_3D_BLEND }, + { nvc0_validate_zsa, NVC0_NEW_3D_ZSA }, + { nvc0_validate_sample_mask, NVC0_NEW_3D_SAMPLE_MASK }, + { nvc0_validate_rasterizer, NVC0_NEW_3D_RASTERIZER }, + { nvc0_validate_blend_colour, NVC0_NEW_3D_BLEND_COLOUR }, + { nvc0_validate_stencil_ref, NVC0_NEW_3D_STENCIL_REF }, + { nvc0_validate_stipple, NVC0_NEW_3D_STIPPLE }, + { nvc0_validate_scissor, NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER }, + { nvc0_validate_viewport, NVC0_NEW_3D_VIEWPORT }, + { nvc0_vertprog_validate, NVC0_NEW_3D_VERTPROG }, + { nvc0_tctlprog_validate, NVC0_NEW_3D_TCTLPROG }, + { nvc0_tevlprog_validate, NVC0_NEW_3D_TEVLPROG }, + { nvc0_validate_tess_state, NVC0_NEW_3D_TESSFACTOR }, + { nvc0_gmtyprog_validate, NVC0_NEW_3D_GMTYPROG }, + { nvc0_fragprog_validate, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER }, + { nvc0_validate_derived_1, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA | + NVC0_NEW_3D_RASTERIZER }, + { nvc0_validate_derived_2, NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER }, + { nvc0_validate_derived_3, NVC0_NEW_3D_BLEND | NVC0_NEW_3D_FRAMEBUFFER }, + { nvc0_validate_clip, NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER | + NVC0_NEW_3D_VERTPROG | + NVC0_NEW_3D_TEVLPROG | + NVC0_NEW_3D_GMTYPROG }, + { nvc0_constbufs_validate, NVC0_NEW_3D_CONSTBUF }, + { nvc0_validate_textures, NVC0_NEW_3D_TEXTURES }, + { nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS }, + { nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS }, + { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS }, + { nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES }, + { nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS }, + { nvc0_idxbuf_validate, NVC0_NEW_3D_IDXBUF }, + { nvc0_tfb_validate, NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG }, + { nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES }, + { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST }, }; bool @@ -712,7 +738,7 @@ nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask) if (nvc0->screen->cur_ctx != nvc0) nvc0_switch_pipe_context(nvc0); - state_mask = nvc0->dirty & mask; + state_mask = nvc0->dirty_3d & mask; if (state_mask) { for (i = 0; i < ARRAY_SIZE(validate_list); ++i) { @@ -721,7 +747,7 @@ nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask) if (state_mask & validate->states) validate->func(nvc0); } - nvc0->dirty &= ~state_mask; + nvc0->dirty_3d &= ~state_mask; nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, false); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index e3843ca1bf1..49577969d3d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -34,8 +34,8 @@ #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_resource.h" -#include "nv50/nv50_defs.xml.h" -#include "nv50/nv50_texture.xml.h" +#include "nv50/g80_defs.xml.h" +#include "nv50/g80_texture.xml.h" /* these are used in nv50_blit.h */ #define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL @@ -54,7 +54,7 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal) /* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */ if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal) - return NV50_SURFACE_FORMAT_A8_UNORM; + return G80_SURFACE_FORMAT_A8_UNORM; /* Hardware values for color formats range from 0xc0 to 0xff, * but the 2D engine doesn't support all of them. @@ -65,15 +65,15 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal) switch (util_format_get_blocksize(format)) { case 1: - return NV50_SURFACE_FORMAT_R8_UNORM; + return G80_SURFACE_FORMAT_R8_UNORM; case 2: - return NV50_SURFACE_FORMAT_RG8_UNORM; + return G80_SURFACE_FORMAT_RG8_UNORM; case 4: - return NV50_SURFACE_FORMAT_BGRA8_UNORM; + return G80_SURFACE_FORMAT_BGRA8_UNORM; case 8: - return NV50_SURFACE_FORMAT_RGBA16_UNORM; + return G80_SURFACE_FORMAT_RGBA16_UNORM; case 16: - return NV50_SURFACE_FORMAT_RGBA32_FLOAT; + return G80_SURFACE_FORMAT_RGBA32_FLOAT; default: assert(0); return 0; @@ -353,7 +353,7 @@ nvc0_clear_render_target(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; } static void @@ -609,7 +609,7 @@ nvc0_clear_buffer(struct pipe_context *pipe, data, data_size); } - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; } static void @@ -678,7 +678,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; } void @@ -693,7 +693,7 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers, uint32_t mode = 0; /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ - if (!nvc0_state_validate(nvc0, NVC0_NEW_FRAMEBUFFER)) + if (!nvc0_state_validate(nvc0, NVC0_NEW_3D_FRAMEBUFFER)) return; if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { @@ -793,7 +793,7 @@ struct nvc0_blitctx struct pipe_sampler_view *texture[2]; struct nv50_tsc_entry *sampler[2]; unsigned min_samples; - uint32_t dirty; + uint32_t dirty_3d; } saved; struct nvc0_rasterizer_stateobj rast; }; @@ -871,12 +871,14 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit) blit->sampler[0].id = -1; - blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT); + blit->sampler[0].tsc[0] = G80_TSC_0_SRGB_CONVERSION | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_U__SHIFT) | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_V__SHIFT) | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_P__SHIFT); blit->sampler[0].tsc[1] = - NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE; + G80_TSC_1_MAG_FILTER_NEAREST | + G80_TSC_1_MIN_FILTER_NEAREST | + G80_TSC_1_MIP_FILTER_NONE; /* clamp to edge, min/max lod = 0, bilinear filtering */ @@ -884,7 +886,9 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit) blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0]; blit->sampler[1].tsc[1] = - NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE; + G80_TSC_1_MAG_FILTER_LINEAR | + G80_TSC_1_MIN_FILTER_LINEAR | + G80_TSC_1_MIP_FILTER_NONE; } static void @@ -1081,19 +1085,19 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx) nvc0->min_samples = 1; - ctx->saved.dirty = nvc0->dirty; + ctx->saved.dirty_3d = nvc0->dirty_3d; nvc0->textures_dirty[4] |= 3; nvc0->samplers_dirty[4] |= 3; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0)); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1)); - nvc0->dirty = NVC0_NEW_FRAMEBUFFER | NVC0_NEW_MIN_SAMPLES | - NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | - NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | - NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS; + nvc0->dirty_3d = NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_MIN_SAMPLES | + NVC0_NEW_3D_VERTPROG | NVC0_NEW_3D_FRAGPROG | + NVC0_NEW_3D_TCTLPROG | NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG | + NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS; } static void @@ -1141,20 +1145,20 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit) nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query, nvc0->cond_cond, nvc0->cond_mode); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0)); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1)); nouveau_scratch_done(&nvc0->base); - nvc0->dirty = blit->saved.dirty | - (NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK | - NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND | - NVC0_NEW_VIEWPORT | - NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS | - NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | - NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | - NVC0_NEW_TFB_TARGETS | NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS); + nvc0->dirty_3d = blit->saved.dirty_3d | + (NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_SAMPLE_MASK | + NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_ZSA | NVC0_NEW_3D_BLEND | + NVC0_NEW_3D_VIEWPORT | + NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS | + NVC0_NEW_3D_VERTPROG | NVC0_NEW_3D_FRAGPROG | + NVC0_NEW_3D_TCTLPROG | NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG | + NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS); nvc0->scissors_dirty |= 1; nvc0->viewports_dirty |= 1; @@ -1263,7 +1267,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) return; } - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, + NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo); nouveau_pushbuf_validate(push); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 4); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 7223f5aecfb..53332400a4f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -22,35 +22,29 @@ #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_resource.h" -#include "nv50/nv50_texture.xml.h" -#include "nv50/nv50_defs.xml.h" +#include "nvc0/gm107_texture.xml.h" +#include "nvc0/nvc0_compute.xml.h" +#include "nv50/g80_texture.xml.h" +#include "nv50/g80_defs.xml.h" #include "util/u_format.h" #define NVE4_TIC_ENTRY_INVALID 0x000fffff #define NVE4_TSC_ENTRY_INVALID 0xfff00000 -#define NV50_TIC_0_SWIZZLE__MASK \ - (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ - NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) - static inline uint32_t -nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int) +nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int) { switch (swz) { - case PIPE_SWIZZLE_RED: - return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; - case PIPE_SWIZZLE_GREEN: - return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; - case PIPE_SWIZZLE_BLUE: - return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; - case PIPE_SWIZZLE_ALPHA: - return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_RED : return fmt->tic.src_x; + case PIPE_SWIZZLE_GREEN: return fmt->tic.src_y; + case PIPE_SWIZZLE_BLUE : return fmt->tic.src_z; + case PIPE_SWIZZLE_ALPHA: return fmt->tic.src_w; case PIPE_SWIZZLE_ONE: - return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT; + return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT; case PIPE_SWIZZLE_ZERO: default: - return NV50_TIC_MAP_ZERO; + return G80_TIC_SOURCE_ZERO; } } @@ -67,14 +61,15 @@ nvc0_create_sampler_view(struct pipe_context *pipe, return nvc0_create_texture_view(pipe, res, templ, flags, templ->target); } -struct pipe_sampler_view * -nvc0_create_texture_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ, - uint32_t flags, - enum pipe_texture_target target) +static struct pipe_sampler_view * +gm107_create_texture_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ, + uint32_t flags, + enum pipe_texture_target target) { const struct util_format_description *desc; + const struct nvc0_format *fmt; uint64_t address; uint32_t *tic; uint32_t swz[4]; @@ -101,45 +96,224 @@ nvc0_create_texture_view(struct pipe_context *pipe, tic = &view->tic[0]; desc = util_format_description(view->pipe.format); + tex_int = util_format_is_pure_integer(view->pipe.format); + + fmt = &nvc0_format_table[view->pipe.format]; + swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int); + + tic[0] = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT; + tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT; + tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT; + tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT; + tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT; + tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT; + tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT; + tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT; + tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT; + + address = mt->base.address; + + tic[3] = GM107_TIC2_3_LOD_ANISO_QUALITY_2; + tic[4] = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V; + tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + tic[4] |= GM107_TIC2_4_SRGB_CONVERSION; + + if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) + tic[5] = GM107_TIC2_5_NORMALIZED_COORDS; + else + tic[5] = 0; + + /* check for linear storage type */ + if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { + if (texture->target == PIPE_BUFFER) { + assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS)); + width = view->pipe.u.buf.last_element - view->pipe.u.buf.first_element; + address += + view->pipe.u.buf.first_element * desc->block.bits / 8; + tic[2] = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER; + tic[3] |= width >> 16; + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER; + tic[4] |= width & 0xffff; + } else { + assert(!(mt->level[0].pitch & 0x1f)); + /* must be 2D texture without mip maps */ + tic[2] = GM107_TIC2_2_HEADER_VERSION_PITCH; + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP; + tic[3] |= mt->level[0].pitch >> 5; + tic[4] |= mt->base.base.width0 - 1; + tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT; + tic[5] |= mt->base.base.height0 - 1; + } + tic[1] = address; + tic[2] |= address >> 32; + tic[6] = 0; + tic[7] = 0; + return &view->pipe; + } + + tic[2] = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR; + tic[3] |= + ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) | + ((mt->level[0].tile_mode & 0xf00) >> 8 << 6); + + depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); - tic[0] = nvc0_format_table[view->pipe.format].tic; + if (mt->base.base.array_size > 1) { + /* there doesn't seem to be a base layer field in TIC */ + address += view->pipe.u.tex.first_layer * mt->layer_stride; + depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; + } + tic[1] = address; + tic[2] |= address >> 32; + + switch (target) { + case PIPE_TEXTURE_1D: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D; + break; + case PIPE_TEXTURE_2D: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D; + break; + case PIPE_TEXTURE_RECT: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D; + break; + case PIPE_TEXTURE_3D: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D; + break; + case PIPE_TEXTURE_CUBE: + depth /= 6; + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP; + break; + case PIPE_TEXTURE_1D_ARRAY: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY; + break; + case PIPE_TEXTURE_CUBE_ARRAY: + depth /= 6; + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY; + break; + default: + unreachable("unexpected/invalid texture target"); + } + + tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ? + GM107_TIC2_3_USE_HEADER_OPT_CONTROL : + GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH | + GM107_TIC2_3_LOD_ISO_QUALITY_HIGH; + + if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) { + width = mt->base.base.width0 << mt->ms_x; + height = mt->base.base.height0 << mt->ms_y; + } else { + width = mt->base.base.width0; + height = mt->base.base.height0; + } + + tic[4] |= width - 1; + + tic[5] |= (height - 1) & 0xffff; + tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT; + tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT; + + /* sampling points: (?) */ + if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) { + tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO; + tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1; + } else { + tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO; + tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE; + } + + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; + tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT; + + return &view->pipe; +} + +static struct pipe_sampler_view * +gf100_create_texture_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ, + uint32_t flags, + enum pipe_texture_target target) +{ + const struct util_format_description *desc; + const struct nvc0_format *fmt; + uint64_t address; + uint32_t *tic; + uint32_t swz[4]; + uint32_t width, height; + uint32_t depth; + struct nv50_tic_entry *view; + struct nv50_miptree *mt; + bool tex_int; + + view = MALLOC_STRUCT(nv50_tic_entry); + if (!view) + return NULL; + mt = nv50_miptree(texture); + + view->pipe = *templ; + view->pipe.reference.count = 1; + view->pipe.texture = NULL; + view->pipe.context = pipe; + + view->id = -1; + + pipe_resource_reference(&view->pipe.texture, texture); + + tic = &view->tic[0]; + + desc = util_format_description(view->pipe.format); + + fmt = &nvc0_format_table[view->pipe.format]; tex_int = util_format_is_pure_integer(view->pipe.format); - swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); - swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); - swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); - swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); - tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | - (swz[0] << NV50_TIC_0_MAPR__SHIFT) | - (swz[1] << NV50_TIC_0_MAPG__SHIFT) | - (swz[2] << NV50_TIC_0_MAPB__SHIFT) | - (swz[3] << NV50_TIC_0_MAPA__SHIFT); + swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int); + tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) | + (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) | + (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) | + (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) | + (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) | + (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) | + (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) | + (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) | + (swz[3] << G80_TIC_0_W_SOURCE__SHIFT); address = mt->base.address; - tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER; + tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + tic[2] |= G80_TIC_2_SRGB_CONVERSION; if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) - tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + tic[2] |= G80_TIC_2_NORMALIZED_COORDS; /* check for linear storage type */ if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { if (texture->target == PIPE_BUFFER) { - assert(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)); + assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS)); address += view->pipe.u.buf.first_element * desc->block.bits / 8; - tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER; + tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER; tic[3] = 0; tic[4] = /* width */ view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1; tic[5] = 0; } else { /* must be 2D texture without mip maps */ - tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; + tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP; tic[3] = mt->level[0].pitch; tic[4] = mt->base.base.width0; tic[5] = (1 << 16) | mt->base.base.height0; @@ -167,30 +341,30 @@ nvc0_create_texture_view(struct pipe_context *pipe, switch (target) { case PIPE_TEXTURE_1D: - tic[2] |= NV50_TIC_2_TARGET_1D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D; break; case PIPE_TEXTURE_2D: - tic[2] |= NV50_TIC_2_TARGET_2D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D; break; case PIPE_TEXTURE_RECT: - tic[2] |= NV50_TIC_2_TARGET_2D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D; break; case PIPE_TEXTURE_3D: - tic[2] |= NV50_TIC_2_TARGET_3D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D; break; case PIPE_TEXTURE_CUBE: depth /= 6; - tic[2] |= NV50_TIC_2_TARGET_CUBE; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP; break; case PIPE_TEXTURE_1D_ARRAY: - tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY; break; case PIPE_TEXTURE_2D_ARRAY: - tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY; break; case PIPE_TEXTURE_CUBE_ARRAY: depth /= 6; - tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY; break; default: unreachable("unexpected/invalid texture target"); @@ -224,6 +398,18 @@ nvc0_create_texture_view(struct pipe_context *pipe, return &view->pipe; } +struct pipe_sampler_view * +nvc0_create_texture_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ, + uint32_t flags, + enum pipe_texture_target target) +{ + if (nvc0_context(pipe)->screen->tic.maxwell) + return gm107_create_texture_view(pipe, texture, templ, flags, target); + return gf100_create_texture_view(pipe, texture, templ, flags, target); +} + static void nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic, struct nv04_resource *res) @@ -244,7 +430,7 @@ nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic, tic->tic[2] |= address >> 32; } -static bool +bool nvc0_validate_tic(struct nvc0_context *nvc0, int s) { uint32_t commands[32]; @@ -285,7 +471,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) need_flush = true; } else if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { - BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); + if (unlikely(s == 5)) + BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1); + else + BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); PUSH_DATA (push, (tic->id << 4) | 1); NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1); } @@ -298,7 +487,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) continue; commands[n++] = (tic->id << 9) | (i << 1) | 1; - BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); + if (unlikely(s == 5)) + BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD); + else + BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD); } for (; i < nvc0->state.num_textures[s]; ++i) commands[n++] = (i << 1) | 0; @@ -306,7 +498,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) nvc0->state.num_textures[s] = nvc0->num_textures[s]; if (n) { - BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n); + if (unlikely(s == 5)) + BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n); + else + BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n); PUSH_DATAp(push, commands, n); } nvc0->textures_dirty[s] = 0; @@ -362,7 +557,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s) nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; nvc0->tex_handles[s][i] |= tic->id; if (dirty) - BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); + BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD); } for (; i < nvc0->state.num_textures[s]; ++i) { nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; @@ -392,7 +587,7 @@ void nvc0_validate_textures(struct nvc0_context *nvc0) } } -static bool +bool nvc0_validate_tsc(struct nvc0_context *nvc0, int s) { uint32_t commands[16]; @@ -410,6 +605,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s) commands[n++] = (i << 4) | 0; continue; } + nvc0->seamless_cube_map = tsc->seamless_cube_map; if (tsc->id < 0) { tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); @@ -428,7 +624,10 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s) nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; if (n) { - BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n); + if (unlikely(s == 5)) + BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n); + else + BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n); PUSH_DATAp(push, commands, n); } nvc0->samplers_dirty[s] = 0; @@ -513,7 +712,7 @@ nve4_set_tex_handles(struct nvc0_context *nvc0) if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) return; - address = nvc0->screen->uniform_bo->offset + (5 << 16); + address = nvc0->screen->uniform_bo->offset + (6 << 16); for (s = 0; s < 5; ++s, address += (1 << 10)) { uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; @@ -686,45 +885,45 @@ nvc0_validate_surfaces(struct nvc0_context *nvc0) static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] = { - [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT, - [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT, - [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT, - [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT, - [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM, - [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM, - [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT, - [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT, - [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM, - [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM, - [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT, - [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT, - [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT, - [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM, -/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */ - [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT, - [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT, - [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT, - [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT, - [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM, - [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM, - [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT, - [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT, - [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM, - [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM, - [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT, - [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT, - [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT, - [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT, - [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT, - [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT, - [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM, - [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM, - [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT, - [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT, - [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM, - [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM, - [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT, - [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT, + [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT, + [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT, + [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT, + [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM, + [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM, + [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT, + [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT, + [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT, + [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT, */ + [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT, + [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT, + [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT, + [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT, + [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM, + [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM, + [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT, + [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT, + [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM, + [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM, + [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT, + [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT, + [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT, + [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT, + [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT, + [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT, + [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM, + [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM, + [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT, + [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT, + [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM, + [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM, + [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT, + [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT, }; /* Auxiliary format description values for surface instructions. diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c index 279c7e93cc8..24d23d29bbf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c @@ -3,8 +3,6 @@ #include "nvc0/nvc0_context.h" -#include "nv50/nv50_defs.xml.h" - struct nvc0_transfer { struct pipe_transfer base; struct nv50_m2mf_rect rect[2]; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 032b3c125cf..647aa10ec35 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -80,7 +80,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, enum pipe_format fmt = ve->src_format; so->element[i].pipe = elements[i]; - so->element[i].state = nvc0_format_table[fmt].vtx; + so->element[i].state = nvc0_vertex_format[fmt].vtx; if (!so->element[i].state) { switch (util_format_get_nr_components(fmt)) { @@ -93,7 +93,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, FREE(so); return NULL; } - so->element[i].state = nvc0_format_table[fmt].vtx; + so->element[i].state = nvc0_vertex_format[fmt].vtx; so->need_conversion = true; pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK, "Converting vertex element %d, no hw format %s", @@ -222,7 +222,7 @@ static inline void nvc0_release_user_vbufs(struct nvc0_context *nvc0) { if (nvc0->vbo_user) { - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP); nouveau_scratch_done(&nvc0->base); } } @@ -257,7 +257,7 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0) address[b] = nouveau_scratch_data(&nvc0->base, vb->user_buffer, base, size, &bo); if (bo) - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, bo_flags, bo); NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size); } @@ -292,7 +292,7 @@ nvc0_update_user_vbufs_shared(struct nvc0_context *nvc0) address = nouveau_scratch_data(&nvc0->base, nvc0->vtxbuf[b].user_buffer, base, size, &bo); if (bo) - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, bo_flags, bo); BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5); PUSH_DATA (push, b); @@ -368,7 +368,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) if (!(refd & (1 << b))) { refd |= 1 << b; - BCTX_REFN(nvc0->bufctx_3d, VTX, res, RD); + BCTX_REFN(nvc0->bufctx_3d, 3D_VTX, res, RD); } } if (nvc0->vbo_user) @@ -412,7 +412,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) PUSH_DATAh(push, buf->address + limit); PUSH_DATA (push, buf->address + limit); - BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD); + BCTX_REFN(nvc0->bufctx_3d, 3D_VTX, buf, RD); } /* If there are more elements than buffers, we might not have unset * fetching on the later elements. @@ -435,7 +435,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) uint8_t vbo_mode; bool update_vertex; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX); assert(vertex); if (unlikely(vertex->need_conversion) || @@ -446,7 +446,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) } const_vbos = vbo_mode ? 0 : nvc0->constant_vbos; - update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) || + update_vertex = (nvc0->dirty_3d & NVC0_NEW_3D_VERTEX) || (const_vbos != nvc0->state.constant_vbos) || (vbo_mode != nvc0->state.vbo_mode); @@ -537,7 +537,7 @@ nvc0_idxbuf_validate(struct nvc0_context *nvc0) PUSH_DATA (push, buf->address + buf->base.width0 - 1); PUSH_DATA (push, nvc0->idxbuf.index_size >> 1); - BCTX_REFN(nvc0->bufctx_3d, IDX, buf, RD); + BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD); } #define NVC0_PRIM_GL_CASE(n) \ @@ -833,8 +833,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) /* Queue things up to let the macros write params to the driver constbuf */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 512); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); + BEGIN_NVC0(push, NVC0_3D(CB_POS), 1); + PUSH_DATA (push, 256 + 128); if (info->indexed) { assert(nvc0->idxbuf.buffer); @@ -947,12 +949,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->indexed && (nvc0->vb_elt_limit >= (info->count * 2)); /* Check whether we want to switch vertex-submission mode. */ - if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_ARRAYS | NVC0_NEW_VERTEX))) { + if (nvc0->vbo_user && !(nvc0->dirty_3d & (NVC0_NEW_3D_ARRAYS | NVC0_NEW_3D_VERTEX))) { if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode) if (nvc0->state.vbo_mode != 3) - nvc0->dirty |= NVC0_NEW_ARRAYS; + nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS; - if (!(nvc0->dirty & NVC0_NEW_ARRAYS) && nvc0->state.vbo_mode == 0) { + if (!(nvc0->dirty_3d & NVC0_NEW_3D_ARRAYS) && nvc0->state.vbo_mode == 0) { if (nvc0->vertex->shared_slots) nvc0_update_user_vbufs_shared(nvc0); else @@ -973,8 +975,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) PUSH_SPACE(push, 9); BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 512); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); if (!info->indirect) { BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3); PUSH_DATA (push, 256 + 128); @@ -984,6 +986,14 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } } + if (nvc0->screen->base.class_3d < NVE4_3D_CLASS && + nvc0->seamless_cube_map != nvc0->state.seamless_cube_map) { + nvc0->state.seamless_cube_map = nvc0->seamless_cube_map; + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(TEX_MISC), + nvc0->seamless_cube_map ? NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0); + } + push->kick_notify = nvc0_draw_vbo_kick_notify; for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c index 9c19ba20a7e..20b6742d8d7 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c @@ -225,7 +225,7 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count) PUSH_DATAh(push, va + size - 1); PUSH_DATA (push, va + size - 1); - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); nouveau_pushbuf_validate(push); @@ -554,7 +554,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ctx.instance_id; } - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP); nouveau_scratch_done(&nvc0->base); } while (inst_count); @@ -629,7 +629,7 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx, data = (uint32_t *)nouveau_scratch_get(&nvc0->base, info->count * index_size, &va, &bo); - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); nouveau_pushbuf_validate(push); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h index 79abe78b77a..4d07546c310 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h @@ -50,9 +50,9 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define NVC0_3D(n) SUBC_3D(NVC0_3D_##n) #define NVE4_3D(n) SUBC_3D(NVE4_3D_##n) -#define SUBC_COMPUTE(m) 1, (m) -#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n) -#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n) +#define SUBC_CP(m) 1, (m) +#define NVC0_CP(n) SUBC_CP(NVC0_COMPUTE_##n) +#define NVE4_CP(n) SUBC_CP(NVE4_COMPUTE_##n) #define SUBC_M2MF(m) 2, (m) #define SUBC_P2MF(m) 2, (m) diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index d3e5676873e..652bc6d83d6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -39,7 +39,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, { struct nouveau_device *dev = screen->base.device; struct nouveau_object *chan = screen->base.channel; - unsigned i; + int i; int ret; uint32_t obj_class; @@ -51,6 +51,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, case 0xe0: obj_class = NVE4_COMPUTE_CLASS; /* GK104 */ break; + case 0x110: + obj_class = GM107_COMPUTE_CLASS; + break; default: NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); return -1; @@ -68,21 +71,21 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, if (ret) return ret; - BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->oclass); - BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(TEMP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls->offset); PUSH_DATA (push, screen->tls->offset); /* No idea why there are 2. Divide size by 2 to be safe. * Actually this might be per-MP TEMP size and looks like I'm only using * 2 MPs instead of all 8. */ - BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3); + BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(0)), 3); PUSH_DATAh(push, screen->tls->size / screen->mp_count); PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); PUSH_DATA (push, 0xff); - BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3); + BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3); PUSH_DATAh(push, screen->tls->size / screen->mp_count); PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); PUSH_DATA (push, 0xff); @@ -92,52 +95,53 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be * accessible. We cannot prevent that at the moment, so expect failure. */ - BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1); + BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1); PUSH_DATA (push, 1 << 24); - BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1); + BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1); PUSH_DATA (push, 2 << 24); - BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); - BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1); + BEGIN_NVC0(push, SUBC_CP(0x0310), 1); PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300); /* NOTE: these do not affect the state used by the 3D object */ - BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3); + BEGIN_NVC0(push, NVE4_CP(TIC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); - BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3); + BEGIN_NVC0(push, NVE4_CP(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); PUSH_DATA (push, screen->txc->offset + 65536); PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); if (obj_class >= NVF0_COMPUTE_CLASS) { - BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1); - PUSH_DATA (push, 0x100); - BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63); - for (i = 63; i >= 1; --i) + /* The blob calls GK110_COMPUTE.FIRMWARE[0x6], along with the args (0x1) + * passed with GK110_COMPUTE.GRAPH.SCRATCH[0x2]. This is currently + * disabled because our firmware doesn't support these commands and the + * GPU hangs if they are used. */ + BEGIN_NIC0(push, SUBC_CP(0x0248), 64); + for (i = 63; i >= 0; i--) PUSH_DATA(push, 0x38000 | i); - IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0); - IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0); + IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0); } - BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1); + BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1); PUSH_DATA (push, 0); /* does not interefere with 3D */ - if (obj_class >= NVF0_COMPUTE_CLASS) - IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + if (obj_class == NVF0_COMPUTE_CLASS) + IMMED_NVC0(push, SUBC_CP(0x02c4), 1); /* MS sample coordinate offsets: these do not work with _ALT modes ! */ - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS); PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 64); PUSH_DATA (push, 1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATA (push, 0); /* 0 */ PUSH_DATA (push, 0); @@ -157,13 +161,13 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, PUSH_DATA (push, 1); #ifdef DEBUG - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 28); PUSH_DATA (push, 1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 8); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 8); PUSH_DATA (push, 1); PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); @@ -174,7 +178,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, PUSH_DATA (push, 0); /* warp cfstack size */ #endif - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); return 0; @@ -201,13 +205,13 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0) * NVE4's surface load/store instructions receive all the information * directly instead of via binding points, so we have to supply them. */ - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i)); PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i)); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 64); PUSH_DATA (push, 1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); nve4_set_surface_info(push, nvc0->surfaces[t][i], screen); @@ -223,7 +227,7 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0) } } if (nvc0->surfaces_dirty[t]) { - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); } @@ -252,7 +256,7 @@ nve4_compute_validate_samplers(struct nvc0_context *nvc0) { bool need_flush = nve4_validate_tsc(nvc0, 5); if (need_flush) { - BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1); + BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1); PUSH_DATA (nvc0->base.pushbuf, 0); } } @@ -281,17 +285,17 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0) address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, address); PUSH_DATA (push, address); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, n * 4); PUSH_DATA (push, 0x1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + n); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, &nvc0->tex_handles[s][i], n); - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); nvc0->textures_dirty[s] = 0; @@ -338,29 +342,29 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input, struct nvc0_program *cp = nvc0->compprog; if (cp->parm_size) { - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset); PUSH_DATA (push, screen->parm->offset); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, cp->parm_size); PUSH_DATA (push, 0x1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4)); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (cp->parm_size / 4)); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, input, cp->parm_size / 4); } - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 7 * 4); PUSH_DATA (push, 0x1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, block_layout, 3); PUSH_DATAp(push, grid_layout, 3); PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); } @@ -429,10 +433,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv, } void -nve4_launch_grid(struct pipe_context *pipe, - const uint *block_layout, const uint *grid_layout, - uint32_t label, - const void *input) +nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; @@ -453,33 +454,34 @@ nve4_launch_grid(struct pipe_context *pipe, if (ret) goto out; - nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout); + nve4_compute_setup_launch_desc(nvc0, desc, info->pc, + info->block, info->grid); #ifdef DEBUG if (debug_get_num_option("NV50_PROG_DEBUG", 0)) nve4_compute_dump_launch_desc(desc); #endif - nve4_compute_upload_input(nvc0, input, block_layout, grid_layout); + nve4_compute_upload_input(nvc0, info->input, info->block, info->grid); /* upload descriptor and flush */ #if 0 - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, desc_gpuaddr); PUSH_DATA (push, desc_gpuaddr); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 256); PUSH_DATA (push, 1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4)); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4)); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1)); PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4); - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE); #endif - BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1); + BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1); PUSH_DATA (push, desc_gpuaddr >> 8); - BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1); + BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1); PUSH_DATA (push, 0x3); - BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); out: @@ -517,13 +519,13 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); PUSH_SPACE(push, 16); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, txc->offset + (tic->id * 32)); PUSH_DATA (push, txc->offset + (tic->id * 32)); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 32); PUSH_DATA (push, 1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, &tic->tic[0], 8); @@ -546,11 +548,11 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; if (n[0]) { - BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]); + BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]); PUSH_DATAp(push, commands[0], n[0]); } if (n[1]) { - BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]); + BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]); PUSH_DATAp(push, commands[1], n[1]); } diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h index 7364a68a579..84f8593b9b6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h @@ -2,7 +2,6 @@ #ifndef NVE4_COMPUTE_H #define NVE4_COMPUTE_H -#include "nv50/nv50_defs.xml.h" #include "nvc0/nve4_compute.xml.h" /* Input space is implemented as c0[], to which we bind the screen->parm bo. diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index a2b7f87855d..15a94d90721 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -319,11 +319,14 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; } break; case PIPE_SHADER_VERTEX: @@ -378,11 +381,14 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; } break; } diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 56c7fb93f73..997e5f0e383 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -553,25 +553,24 @@ void evergreen_emit_cs_shader( } static void evergreen_launch_grid( - struct pipe_context *ctx_, - const uint *block_layout, const uint *grid_layout, - uint32_t pc, const void *input) + struct pipe_context *ctx_, const struct pipe_grid_info *info) { struct r600_context *ctx = (struct r600_context *)ctx_; #ifdef HAVE_OPENCL struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; boolean use_kill; - ctx->cs_shader_state.pc = pc; + ctx->cs_shader_state.pc = info->pc; /* Get the config information for this kernel. */ - r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill); + r600_shader_binary_read_config(&shader->binary, &shader->bc, + info->pc, &use_kill); #endif - COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc); + COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc); - evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); - compute_emit_cs(ctx, block_layout, grid_layout); + evergreen_compute_upload_input(ctx_, info->block, info->grid, info->input); + compute_emit_cs(ctx, info->block, info->grid); } static void evergreen_set_compute_resources(struct pipe_context * ctx_, diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 0fe7c74418d..7eab29c6eb4 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -784,12 +784,12 @@ LLVMModuleRef r600_tgsi_llvm( { struct tgsi_shader_info shader_info; struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base; - radeon_llvm_context_init(ctx); + radeon_llvm_context_init(ctx, "r600--"); LLVMTypeRef Arguments[32]; unsigned ArgumentsCount = 0; for (unsigned i = 0; i < ctx->inputs_count; i++) Arguments[ArgumentsCount++] = LLVMVectorType(bld_base->base.elem_type, 4); - radeon_llvm_create_func(ctx, Arguments, ArgumentsCount); + radeon_llvm_create_func(ctx, NULL, 0, Arguments, ArgumentsCount); for (unsigned i = 0; i < ctx->inputs_count; i++) { LLVMValueRef P = LLVMGetParam(ctx->main_fn, i); LLVMAddAttribute(P, LLVMInRegAttribute); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index c8580d807d7..7fb4108a188 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -532,6 +532,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e } else { return PIPE_SHADER_IR_TGSI; } + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: if (rscreen->b.family == CHIP_CYPRESS || rscreen->b.family == CHIP_CAYMAN || rscreen->b.family == CHIP_ARUBA) @@ -541,6 +543,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: /* due to a bug in the shader compiler, some loops hang diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 324d2719f44..ea028272ccd 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -411,6 +411,7 @@ static const struct debug_named_value common_debug_options[] = { { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." }, { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." }, + { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" }, DEBUG_NAMED_VALUE_END /* must be last */ }; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index e92df876c22..7df617737a7 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -89,6 +89,7 @@ #define DBG_NO_DCC_CLEAR (1llu << 44) #define DBG_NO_RB_PLUS (1llu << 45) #define DBG_SI_SCHED (1llu << 46) +#define DBG_MONOLITHIC_SHADERS (1llu << 47) #define R600_MAP_BUFFER_ALIGNMENT 64 @@ -96,7 +97,7 @@ struct r600_common_context; struct r600_perfcounters; struct radeon_shader_reloc { - char *name; + char name[32]; uint64_t offset; }; diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index af206e43860..1df0c300e85 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1293,6 +1293,7 @@ unsigned r600_translate_colorswap(enum pipe_format format) break; case 4: /* check the middle channels, the 1st and 4th channel can be NONE */ +#ifdef PIPE_ARCH_LITTLE_ENDIAN if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) return V_0280A0_SWAP_STD; /* XYZW */ else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) @@ -1301,6 +1302,16 @@ unsigned r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_ALT; /* ZYXW */ else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,Y)) return V_0280A0_SWAP_ALT_REV; /* WXYZ */ +#else + if (HAS_SWIZZLE(1,W) && HAS_SWIZZLE(2,X)) + return V_0280A0_SWAP_STD; /* ZWXY */ + else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,W)) + return V_0280A0_SWAP_STD_REV; /* YXWZ */ + else if (HAS_SWIZZLE(1,W) && HAS_SWIZZLE(2,Z)) + return V_0280A0_SWAP_ALT; /* XWZY */ + else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) + return V_0280A0_SWAP_ALT_REV; /* YZWX */ +#endif break; } return ~0U; diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c b/src/gallium/drivers/radeon/radeon_elf_util.c index 2e45d439e7a..8aaa85d02f6 100644 --- a/src/gallium/drivers/radeon/radeon_elf_util.c +++ b/src/gallium/drivers/radeon/radeon_elf_util.c @@ -98,7 +98,8 @@ static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols, symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name); reloc->offset = rel.r_offset; - reloc->name = strdup(symbol_name); + strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1); + reloc->name[sizeof(reloc->name)-1] = 0; } } @@ -194,26 +195,3 @@ const unsigned char *radeon_shader_binary_config_start( } return binary->config; } - -void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs, - unsigned reloc_count) -{ - unsigned i; - for (i = 0; i < reloc_count; i++) { - FREE(relocs[i].name); - } - FREE(relocs); -} - -void radeon_shader_binary_free_members(struct radeon_shader_binary *binary, - unsigned free_relocs) -{ - FREE(binary->code); - FREE(binary->config); - FREE(binary->rodata); - - if (free_relocs) { - radeon_shader_binary_free_relocs(binary->relocs, - binary->reloc_count); - } -} diff --git a/src/gallium/drivers/radeon/radeon_elf_util.h b/src/gallium/drivers/radeon/radeon_elf_util.h index ea4ab2f14b2..c2af9e0dfe0 100644 --- a/src/gallium/drivers/radeon/radeon_elf_util.h +++ b/src/gallium/drivers/radeon/radeon_elf_util.h @@ -47,18 +47,4 @@ const unsigned char *radeon_shader_binary_config_start( const struct radeon_shader_binary *binary, uint64_t symbol_offset); -/** - * Free all memory allocated for members of \p binary. This function does - * not free \p binary. - * - * @param free_relocs If false, reolc information will not be freed. - */ -void radeon_shader_binary_free_members(struct radeon_shader_binary *binary, - unsigned free_relocs); - -/** - * Free \p relocs and all member data. - */ -void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs, - unsigned reloc_count); #endif /* RADEON_ELF_UTIL_H */ diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index e967ad2214e..bdee2f8020a 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -113,6 +113,7 @@ struct radeon_llvm_context { struct tgsi_declaration_range *arrays; LLVMValueRef main_fn; + LLVMTypeRef return_type; struct gallivm_state gallivm; }; @@ -158,10 +159,12 @@ void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_bas LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg); -void radeon_llvm_context_init(struct radeon_llvm_context * ctx); +void radeon_llvm_context_init(struct radeon_llvm_context * ctx, + const char *triple); void radeon_llvm_create_func(struct radeon_llvm_context * ctx, - LLVMTypeRef *ParamTypes, unsigned ParamCount); + LLVMTypeRef *return_types, unsigned num_return_elems, + LLVMTypeRef *ParamTypes, unsigned ParamCount); void radeon_llvm_dispose(struct radeon_llvm_context * ctx); diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c index 0dfd9ad4867..da19533b862 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_util.c +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c @@ -55,8 +55,10 @@ unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, static void radeon_llvm_optimize(LLVMModuleRef mod) { +#if HAVE_LLVM < 0x0309 const char *data_layout = LLVMGetDataLayout(mod); LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout); +#endif LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate(); LLVMPassManagerRef pass_manager = LLVMCreatePassManager(); @@ -77,14 +79,18 @@ static void radeon_llvm_optimize(LLVMModuleRef mod) } } +#if HAVE_LLVM < 0x0309 LLVMAddTargetData(TD, pass_manager); +#endif LLVMAddAlwaysInlinerPass(pass_manager); LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager); LLVMRunPassManager(pass_manager, mod); LLVMPassManagerBuilderDispose(builder); LLVMDisposePassManager(pass_manager); +#if HAVE_LLVM < 0x0309 LLVMDisposeTargetData(TD); +#endif } LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index f5e3f6af1a0..c74397fb5c9 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -30,6 +30,7 @@ #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_misc.h" #include "gallivm/lp_bld_swizzle.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" @@ -1520,7 +1521,7 @@ static void emit_up2h(const struct lp_build_tgsi_action *action, } } -void radeon_llvm_context_init(struct radeon_llvm_context * ctx) +void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *triple) { struct lp_type type; @@ -1534,6 +1535,13 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) ctx->gallivm.context = LLVMContextCreate(); ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", ctx->gallivm.context); + LLVMSetTarget(ctx->gallivm.module, + +#if HAVE_LLVM < 0x0306 + "r600--"); +#else + triple); +#endif ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context); struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base; @@ -1693,14 +1701,22 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) } void radeon_llvm_create_func(struct radeon_llvm_context * ctx, + LLVMTypeRef *return_types, unsigned num_return_elems, LLVMTypeRef *ParamTypes, unsigned ParamCount) { - LLVMTypeRef main_fn_type; + LLVMTypeRef main_fn_type, ret_type; LLVMBasicBlockRef main_fn_body; + if (num_return_elems) + ret_type = LLVMStructTypeInContext(ctx->gallivm.context, + return_types, + num_return_elems, true); + else + ret_type = LLVMVoidTypeInContext(ctx->gallivm.context); + /* Setup the function */ - main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context), - ParamTypes, ParamCount, 0); + ctx->return_type = ret_type; + main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0); ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type); main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context, ctx->main_fn, "main_body"); @@ -1710,11 +1726,16 @@ void radeon_llvm_create_func(struct radeon_llvm_context * ctx, void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx) { struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm; + const char *triple = LLVMGetTarget(gallivm->module); + LLVMTargetLibraryInfoRef target_library_info; /* Create the pass manager */ gallivm->passmgr = LLVMCreateFunctionPassManagerForModule( gallivm->module); + target_library_info = gallivm_create_target_library_info(triple); + LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr); + /* This pass should eliminate all the load and store instructions */ LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); @@ -1730,7 +1751,7 @@ void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx) LLVMDisposeBuilder(gallivm->builder); LLVMDisposePassManager(gallivm->passmgr); - + gallivm_dispose_target_library_info(target_library_info); } void radeon_llvm_dispose(struct radeon_llvm_context * ctx) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 7370a113d3d..9f5f4c682bc 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -196,9 +196,7 @@ static unsigned compute_num_waves_for_scratch( } static void si_launch_grid( - struct pipe_context *ctx, - const uint *block_layout, const uint *grid_layout, - uint32_t pc, const void *input) + struct pipe_context *ctx, const struct pipe_grid_info *info) { struct si_context *sctx = (struct si_context*)ctx; struct radeon_winsys_cs *cs = sctx->b.gfx.cs; @@ -232,7 +230,7 @@ static void si_launch_grid( pm4->compute_pkt = true; /* Read the config information */ - si_shader_binary_read_config(&shader->binary, &shader->config, pc); + si_shader_binary_read_config(&shader->binary, &shader->config, info->pc); /* Upload the kernel arguments */ @@ -242,15 +240,16 @@ static void si_launch_grid( kernel_args = sctx->b.ws->buffer_map(input_buffer->buf, sctx->b.gfx.cs, PIPE_TRANSFER_WRITE); for (i = 0; i < 3; i++) { - kernel_args[i] = grid_layout[i]; - kernel_args[i + 3] = grid_layout[i] * block_layout[i]; - kernel_args[i + 6] = block_layout[i]; + kernel_args[i] = info->grid[i]; + kernel_args[i + 3] = info->grid[i] * info->block[i]; + kernel_args[i + 6] = info->block[i]; } num_waves_for_scratch = compute_num_waves_for_scratch( - &sctx->screen->b.info, block_layout, grid_layout); + &sctx->screen->b.info, info->block, info->grid); - memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size); + memcpy(kernel_args + (num_work_size_bytes / 4), info->input, + program->input_size); if (shader->config.scratch_bytes_per_wave > 0) { @@ -291,11 +290,11 @@ static void si_launch_grid( si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0); si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X, - S_00B81C_NUM_THREAD_FULL(block_layout[0])); + S_00B81C_NUM_THREAD_FULL(info->block[0])); si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y, - S_00B820_NUM_THREAD_FULL(block_layout[1])); + S_00B820_NUM_THREAD_FULL(info->block[1])); si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z, - S_00B824_NUM_THREAD_FULL(block_layout[2])); + S_00B824_NUM_THREAD_FULL(info->block[2])); /* Global buffers */ for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) { @@ -323,7 +322,7 @@ static void si_launch_grid( } shader_va = shader->bo->gpu_address; - shader_va += pc; + shader_va += info->pc; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); @@ -375,9 +374,9 @@ static void si_launch_grid( ; si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT); - si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */ - si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */ - si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */ + si_pm4_cmd_add(pm4, info->grid[0]); /* Thread groups DIM_X */ + si_pm4_cmd_add(pm4, info->grid[1]); /* Thread groups DIM_Y */ + si_pm4_cmd_add(pm4, info->grid[2]); /* Thread gropus DIM_Z */ si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */ si_pm4_cmd_end(pm4, false); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index e9d69d2db38..37fd4a25d59 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -22,6 +22,7 @@ */ #include "si_pipe.h" +#include "si_shader.h" #include "si_public.h" #include "sid.h" @@ -448,6 +449,10 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu switch (param) { case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_NATIVE; + + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; + case PIPE_SHADER_CAP_DOUBLES: return HAVE_LLVM >= 0x0307; @@ -511,6 +516,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu return 16; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: return HAVE_LLVM >= 0x0307; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: @@ -522,6 +529,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; } return 0; @@ -530,6 +538,14 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu static void si_destroy_screen(struct pipe_screen* pscreen) { struct si_screen *sscreen = (struct si_screen *)pscreen; + struct si_shader_part *parts[] = { + sscreen->vs_prologs, + sscreen->vs_epilogs, + sscreen->tcs_epilogs, + sscreen->ps_prologs, + sscreen->ps_epilogs + }; + unsigned i; if (!sscreen) return; @@ -537,6 +553,18 @@ static void si_destroy_screen(struct pipe_screen* pscreen) if (!sscreen->b.ws->unref(sscreen->b.ws)) return; + /* Free shader parts. */ + for (i = 0; i < ARRAY_SIZE(parts); i++) { + while (parts[i]) { + struct si_shader_part *part = parts[i]; + + parts[i] = part->next; + radeon_shader_binary_clean(&part->binary); + FREE(part); + } + } + pipe_mutex_destroy(sscreen->shader_parts_mutex); + si_destroy_shader_cache(sscreen); r600_destroy_common_screen(&sscreen->b); } @@ -584,7 +612,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.b.resource_create = r600_resource_create_common; if (!r600_common_screen_init(&sscreen->b, ws) || - !si_init_gs_info(sscreen)) { + !si_init_gs_info(sscreen) || + !si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } @@ -594,6 +623,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.has_cp_dma = true; sscreen->b.has_streamout = true; + pipe_mutex_init(sscreen->shader_parts_mutex); + sscreen->use_monolithic_shaders = + HAVE_LLVM < 0x0308 || + (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0; if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE)) sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b5790d6b564..ef860a58b83 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -80,10 +80,36 @@ #define SI_MAX_BORDER_COLORS 4096 struct si_compute; +struct hash_table; struct si_screen { struct r600_common_screen b; unsigned gs_table_depth; + + /* Whether shaders are monolithic (1-part) or separate (3-part). */ + bool use_monolithic_shaders; + + pipe_mutex shader_parts_mutex; + struct si_shader_part *vs_prologs; + struct si_shader_part *vs_epilogs; + struct si_shader_part *tcs_epilogs; + struct si_shader_part *ps_prologs; + struct si_shader_part *ps_epilogs; + + /* Shader cache in memory. + * + * Design & limitations: + * - The shader cache is per screen (= per process), never saved to + * disk, and skips redundant shader compilations from TGSI to bytecode. + * - It can only be used with one-variant-per-shader support, in which + * case only the main (typically middle) part of shaders is cached. + * - Only VS, TCS, TES, PS are cached, out of which only the hw VS + * variants of VS and TES are cached, so LS and ES aren't. + * - GS and CS aren't cached, but it's certainly possible to cache + * those as well. + */ + pipe_mutex shader_cache_mutex; + struct hash_table *shader_cache; }; struct si_blend_color { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index baa1090e2fb..57458ae1381 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -70,6 +70,12 @@ struct si_shader_context unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ bool is_gs_copy_shader; + + /* Whether to generate the optimized shader variant compiled as a whole + * (without a prolog and epilog) + */ + bool is_monolithic; + int param_streamout_config; int param_streamout_write_index; int param_streamout_offset[4]; @@ -77,6 +83,7 @@ struct si_shader_context int param_rel_auto_id; int param_vs_prim_id; int param_instance_id; + int param_vertex_index0; int param_tes_u; int param_tes_v; int param_tes_rel_patch_id; @@ -96,14 +103,17 @@ struct si_shader_context LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; LLVMValueRef gs_next_vertex[4]; + LLVMValueRef return_value; LLVMTypeRef voidt; LLVMTypeRef i1; LLVMTypeRef i8; LLVMTypeRef i32; + LLVMTypeRef i64; LLVMTypeRef i128; LLVMTypeRef f32; LLVMTypeRef v16i8; + LLVMTypeRef v2i32; LLVMTypeRef v4i32; LLVMTypeRef v4f32; LLVMTypeRef v8i32; @@ -118,9 +128,17 @@ static struct si_shader_context *si_shader_context( static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_screen *sscreen, struct si_shader *shader, - LLVMTargetMachineRef tm, - struct tgsi_shader_info *info); + LLVMTargetMachineRef tm); +/* Ideally pass the sample mask input to the PS epilog as v13, which + * is its usual location, so that the shader doesn't have to add v_mov. + */ +#define PS_EPILOG_SAMPLEMASK_MIN_LOC 13 + +/* The VS location of the PrimitiveID input is the same in the epilog, + * so that the main shader part doesn't have to move it. + */ +#define VS_EPILOG_PRIMID_LOC 2 #define PERSPECTIVE_BASE 0 #define LINEAR_BASE 9 @@ -196,6 +214,10 @@ static LLVMValueRef unpack_param(struct si_shader_context *ctx, LLVMValueRef value = LLVMGetParam(ctx->radeon_bld.main_fn, param); + if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind) + value = bitcast(&ctx->radeon_bld.soa.bld_base, + TGSI_TYPE_UNSIGNED, value); + if (rshift) value = LLVMBuildLShr(gallivm->builder, value, lp_build_const_int32(gallivm, rshift), ""); @@ -375,7 +397,7 @@ static LLVMValueRef build_indexed_load_const( static LLVMValueRef get_instance_index_for_fetch( struct radeon_llvm_context *radeon_bld, - unsigned divisor) + unsigned param_start_instance, unsigned divisor) { struct si_shader_context *ctx = si_shader_context(&radeon_bld->soa.bld_base); @@ -389,8 +411,8 @@ static LLVMValueRef get_instance_index_for_fetch( result = LLVMBuildUDiv(gallivm->builder, result, lp_build_const_int32(gallivm, divisor), ""); - return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam( - radeon_bld->main_fn, SI_PARAM_START_INSTANCE), ""); + return LLVMBuildAdd(gallivm->builder, result, + LLVMGetParam(radeon_bld->main_fn, param_start_instance), ""); } static void declare_input_vs( @@ -402,7 +424,8 @@ static void declare_input_vs( struct gallivm_state *gallivm = base->gallivm; struct si_shader_context *ctx = si_shader_context(&radeon_bld->soa.bld_base); - unsigned divisor = ctx->shader->key.vs.instance_divisors[input_index]; + unsigned divisor = + ctx->shader->key.vs.prolog.instance_divisors[input_index]; unsigned chan; @@ -424,10 +447,16 @@ static void declare_input_vs( /* Build the attribute offset */ attribute_offset = lp_build_const_int32(gallivm, 0); - if (divisor) { + if (!ctx->is_monolithic) { + buffer_index = LLVMGetParam(radeon_bld->main_fn, + ctx->param_vertex_index0 + + input_index); + } else if (divisor) { /* Build index from instance ID, start instance and divisor */ - ctx->shader->uses_instanceid = true; - buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, divisor); + ctx->shader->info.uses_instanceid = true; + buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, + SI_PARAM_START_INSTANCE, + divisor); } else { /* Load the buffer index for vertices. */ LLVMValueRef vertex_id = LLVMGetParam(ctx->radeon_bld.main_fn, @@ -853,7 +882,8 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) static unsigned select_interp_param(struct si_shader_context *ctx, unsigned param) { - if (!ctx->shader->key.ps.force_persample_interp) + if (!ctx->shader->key.ps.prolog.force_persample_interp || + !ctx->is_monolithic) return param; /* If the shader doesn't use center/centroid, just return the parameter. @@ -923,7 +953,7 @@ static void interp_fs_input(struct si_shader_context *ctx, intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; if (semantic_name == TGSI_SEMANTIC_COLOR && - ctx->shader->key.ps.color_two_side) { + ctx->shader->key.ps.prolog.color_two_side) { LLVMValueRef args[4]; LLVMValueRef is_face_positive; LLVMValueRef back_attr_number; @@ -997,6 +1027,7 @@ static void declare_input_fs( unsigned input_index, const struct tgsi_full_declaration *decl) { + struct lp_build_context *base = &radeon_bld->soa.bld_base.base; struct si_shader_context *ctx = si_shader_context(&radeon_bld->soa.bld_base); struct si_shader *shader = ctx->shader; @@ -1004,6 +1035,26 @@ static void declare_input_fs( LLVMValueRef interp_param = NULL; int interp_param_idx; + /* Get colors from input VGPRs (set by the prolog). */ + if (!ctx->is_monolithic && + decl->Semantic.Name == TGSI_SEMANTIC_COLOR) { + unsigned i = decl->Semantic.Index; + unsigned colors_read = shader->selector->info.colors_read; + unsigned mask = colors_read >> (i * 4); + unsigned offset = SI_PARAM_POS_FIXED_PT + 1 + + (i ? util_bitcount(colors_read & 0xf) : 0); + + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] = + mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef; + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] = + mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef; + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] = + mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef; + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] = + mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef; + return; + } + interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate, decl->Interp.Location); if (interp_param_idx == -1) @@ -1330,12 +1381,12 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { const union si_shader_key *key = &ctx->shader->key; - unsigned col_formats = key->ps.spi_shader_col_format; + unsigned col_formats = key->ps.epilog.spi_shader_col_format; int cbuf = target - V_008DFC_SQ_EXP_MRT; assert(cbuf >= 0 && cbuf < 8); spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf; - is_int8 = (key->ps.color_is_int8 >> cbuf) & 0x1; + is_int8 = (key->ps.epilog.color_is_int8 >> cbuf) & 0x1; } args[4] = uint->zero; /* COMPR flag */ @@ -1488,13 +1539,13 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; - if (ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) { + if (ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER) { LLVMValueRef alpha_ref = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_ALPHA_REF); LLVMValueRef alpha_pass = lp_build_cmp(&bld_base->base, - ctx->shader->key.ps.alpha_func, + ctx->shader->key.ps.epilog.alpha_func, alpha, alpha_ref); LLVMValueRef arg = lp_build_select(&bld_base->base, @@ -1511,7 +1562,8 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, } static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base, - LLVMValueRef alpha) + LLVMValueRef alpha, + unsigned samplemask_param) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -1519,7 +1571,7 @@ static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context * /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */ coverage = LLVMGetParam(ctx->radeon_bld.main_fn, - SI_PARAM_SAMPLE_COVERAGE); + samplemask_param); coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage); coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32", @@ -1841,7 +1893,8 @@ handle_semantic: case TGSI_SEMANTIC_COLOR: case TGSI_SEMANTIC_BCOLOR: target = V_008DFC_SQ_EXP_PARAM + param_count; - shader->vs_output_param_offset[i] = param_count; + assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); + shader->info.vs_output_param_offset[i] = param_count; param_count++; break; case TGSI_SEMANTIC_CLIPDIST: @@ -1855,7 +1908,8 @@ handle_semantic: case TGSI_SEMANTIC_TEXCOORD: case TGSI_SEMANTIC_GENERIC: target = V_008DFC_SQ_EXP_PARAM + param_count; - shader->vs_output_param_offset[i] = param_count; + assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); + shader->info.vs_output_param_offset[i] = param_count; param_count++; break; default: @@ -1883,7 +1937,7 @@ handle_semantic: } } - shader->nr_param_exports = param_count; + shader->info.nr_param_exports = param_count; /* We need to add the position output manually if it's missing. */ if (!pos_args[0][0]) { @@ -1945,7 +1999,7 @@ handle_semantic: for (i = 0; i < 4; i++) if (pos_args[i][0]) - shader->nr_pos_exports++; + shader->info.nr_pos_exports++; pos_idx = 0; for (i = 0; i < 4; i++) { @@ -1955,7 +2009,7 @@ handle_semantic: /* Specify the target we are exporting */ pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++); - if (pos_idx == shader->nr_pos_exports) + if (pos_idx == shader->info.nr_pos_exports) /* Specify that this is the last export */ pos_args[i][2] = uint->one; @@ -1989,7 +2043,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, invocation_id, bld_base->uint_bld.zero, "")); /* Determine the layout of one tess factor element in the buffer. */ - switch (shader->key.tcs.prim_mode) { + switch (shader->key.tcs.epilog.prim_mode) { case PIPE_PRIM_LINES: stride = 2; /* 2 dwords, 1 vec2 store */ outer_comps = 2; @@ -2061,14 +2115,51 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef invocation_id; + LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset; + rel_patch_id = get_rel_patch_id(ctx); invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5); + tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx); - si_write_tess_factors(bld_base, - get_rel_patch_id(ctx), - invocation_id, - get_tcs_out_current_patch_data_offset(ctx)); + if (!ctx->is_monolithic) { + /* Return epilog parameters from this function. */ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef ret = ctx->return_value; + LLVMValueRef rw_buffers, rw0, rw1, tf_soffset; + unsigned vgpr; + + /* RW_BUFFERS pointer */ + rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_RW_BUFFERS); + rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, ""); + rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, ""); + rw0 = LLVMBuildExtractElement(builder, rw_buffers, + bld_base->uint_bld.zero, ""); + rw1 = LLVMBuildExtractElement(builder, rw_buffers, + bld_base->uint_bld.one, ""); + ret = LLVMBuildInsertValue(builder, ret, rw0, 0, ""); + ret = LLVMBuildInsertValue(builder, ret, rw1, 1, ""); + + /* Tess factor buffer soffset is after user SGPRs. */ + tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_TESS_FACTOR_OFFSET); + ret = LLVMBuildInsertValue(builder, ret, tf_soffset, + SI_TCS_NUM_USER_SGPR, ""); + + /* VGPRs */ + rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id); + invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id); + tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset); + + vgpr = SI_TCS_NUM_USER_SGPR + 1; + ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, ""); + ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, ""); + ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, ""); + ctx->return_value = ret; + return; + } + + si_write_tess_factors(bld_base, rel_patch_id, invocation_id, tf_lds_offset); } static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base) @@ -2214,16 +2305,26 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) ""); } - /* Export PrimitiveID when PS needs it. */ - if (si_vs_exports_prim_id(ctx->shader)) { - outputs[i].name = TGSI_SEMANTIC_PRIMID; - outputs[i].sid = 0; - outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, - get_primitive_id(bld_base, 0)); - outputs[i].values[1] = bld_base->base.undef; - outputs[i].values[2] = bld_base->base.undef; - outputs[i].values[3] = bld_base->base.undef; - i++; + if (ctx->is_monolithic) { + /* Export PrimitiveID when PS needs it. */ + if (si_vs_exports_prim_id(ctx->shader)) { + outputs[i].name = TGSI_SEMANTIC_PRIMID; + outputs[i].sid = 0; + outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, + get_primitive_id(bld_base, 0)); + outputs[i].values[1] = bld_base->base.undef; + outputs[i].values[2] = bld_base->base.undef; + outputs[i].values[3] = bld_base->base.undef; + i++; + } + } else { + /* Return the primitive ID from the LLVM function. */ + ctx->return_value = + LLVMBuildInsertValue(gallivm->builder, + ctx->return_value, + bitcast(bld_base, TGSI_TYPE_FLOAT, + get_primitive_id(bld_base, 0)), + VS_EPILOG_PRIMID_LOC, ""); } si_llvm_export_vs(bld_base, outputs, i); @@ -2284,6 +2385,7 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, LLVMValueRef *color, unsigned index, + unsigned samplemask_param, bool is_last) { struct si_shader_context *ctx = si_shader_context(bld_base); @@ -2291,30 +2393,31 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, int i; /* Clamp color */ - if (ctx->shader->key.ps.clamp_color) + if (ctx->shader->key.ps.epilog.clamp_color) for (i = 0; i < 4; i++) color[i] = radeon_llvm_saturate(bld_base, color[i]); /* Alpha to one */ - if (ctx->shader->key.ps.alpha_to_one) + if (ctx->shader->key.ps.epilog.alpha_to_one) color[3] = base->one; /* Alpha test */ if (index == 0 && - ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) + ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS) si_alpha_test(bld_base, color[3]); /* Line & polygon smoothing */ - if (ctx->shader->key.ps.poly_line_smoothing) - color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]); + if (ctx->shader->key.ps.epilog.poly_line_smoothing) + color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3], + samplemask_param); /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (ctx->shader->key.ps.last_cbuf > 0) { + if (ctx->shader->key.ps.epilog.last_cbuf > 0) { LLVMValueRef args[8][9]; int c, last = -1; /* Get the export arguments, also find out what the last one is. */ - for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) { + for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) { si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + c, args[c]); if (args[c][0] != bld_base->uint_bld.zero) @@ -2322,7 +2425,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, } /* Emit all exports. */ - for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) { + for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) { if (is_last && last == c) { args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ args[c][2] = bld_base->uint_bld.one; /* DONE bit */ @@ -2385,11 +2488,11 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) * Otherwise, find the last color export. */ if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) { - unsigned spi_format = shader->key.ps.spi_shader_col_format; + unsigned spi_format = shader->key.ps.epilog.spi_shader_col_format; /* Don't export NULL and return if alpha-test is enabled. */ - if (shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS && - shader->key.ps.alpha_func != PIPE_FUNC_NEVER && + if (shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS && + shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER && (spi_format & 0xf) == 0) spi_format |= V_028714_SPI_SHADER_32_AR; @@ -2400,10 +2503,10 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) continue; /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (shader->key.ps.last_cbuf > 0) { + if (shader->key.ps.epilog.last_cbuf > 0) { /* Just set this if any of the colorbuffers are enabled. */ if (spi_format & - ((1llu << (4 * (shader->key.ps.last_cbuf + 1))) - 1)) + ((1llu << (4 * (shader->key.ps.epilog.last_cbuf + 1))) - 1)) last_color_export = i; continue; } @@ -2445,6 +2548,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) ctx->radeon_bld.soa.outputs[i][j], ""); si_export_mrt_color(bld_base, color, semantic_index, + SI_PARAM_SAMPLE_COVERAGE, last_color_export == i); break; default: @@ -2458,6 +2562,100 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) si_export_mrt_z(bld_base, depth, stencil, samplemask); } +/** + * Return PS outputs in this order: + * + * v[0:3] = color0.xyzw + * v[4:7] = color1.xyzw + * ... + * vN+0 = Depth + * vN+1 = Stencil + * vN+2 = SampleMask + * vN+3 = SampleMaskIn (used for OpenGL smoothing) + * + * The alpha-ref SGPR is returned via its original location. + */ +static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct si_shader *shader = ctx->shader; + struct lp_build_context *base = &bld_base->base; + struct tgsi_shader_info *info = &shader->selector->info; + LLVMBuilderRef builder = base->gallivm->builder; + unsigned i, j, first_vgpr, vgpr; + + LLVMValueRef color[8][4] = {}; + LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; + LLVMValueRef ret; + + /* Read the output values. */ + for (i = 0; i < info->num_outputs; i++) { + unsigned semantic_name = info->output_semantic_name[i]; + unsigned semantic_index = info->output_semantic_index[i]; + + switch (semantic_name) { + case TGSI_SEMANTIC_COLOR: + assert(semantic_index < 8); + for (j = 0; j < 4; j++) { + LLVMValueRef ptr = ctx->radeon_bld.soa.outputs[i][j]; + LLVMValueRef result = LLVMBuildLoad(builder, ptr, ""); + color[semantic_index][j] = result; + } + break; + case TGSI_SEMANTIC_POSITION: + depth = LLVMBuildLoad(builder, + ctx->radeon_bld.soa.outputs[i][2], ""); + break; + case TGSI_SEMANTIC_STENCIL: + stencil = LLVMBuildLoad(builder, + ctx->radeon_bld.soa.outputs[i][1], ""); + break; + case TGSI_SEMANTIC_SAMPLEMASK: + samplemask = LLVMBuildLoad(builder, + ctx->radeon_bld.soa.outputs[i][0], ""); + break; + default: + fprintf(stderr, "Warning: SI unhandled fs output type:%d\n", + semantic_name); + } + } + + /* Fill the return structure. */ + ret = ctx->return_value; + + /* Set SGPRs. */ + ret = LLVMBuildInsertValue(builder, ret, + bitcast(bld_base, TGSI_TYPE_SIGNED, + LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_ALPHA_REF)), + SI_SGPR_ALPHA_REF, ""); + + /* Set VGPRs */ + first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1; + for (i = 0; i < ARRAY_SIZE(color); i++) { + if (!color[i][0]) + continue; + + for (j = 0; j < 4; j++) + ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, ""); + } + if (depth) + ret = LLVMBuildInsertValue(builder, ret, depth, vgpr++, ""); + if (stencil) + ret = LLVMBuildInsertValue(builder, ret, stencil, vgpr++, ""); + if (samplemask) + ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, ""); + + /* Add the input sample mask for smoothing at the end. */ + if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC) + vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC; + ret = LLVMBuildInsertValue(builder, ret, + LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_SAMPLE_COVERAGE), vgpr++, ""); + + ctx->return_value = ret; +} + static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data); @@ -2536,13 +2734,12 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) /** * Load an image view, fmask view. or sampler state descriptor. */ -static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx, - LLVMValueRef index, enum desc_type type) +static LLVMValueRef get_sampler_desc_custom(struct si_shader_context *ctx, + LLVMValueRef list, LLVMValueRef index, + enum desc_type type) { struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, - SI_PARAM_SAMPLERS); switch (type) { case DESC_IMAGE: @@ -2558,12 +2755,21 @@ static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx, /* The sampler state is at [12:15]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), ""); - ptr = LLVMBuildPointerCast(builder, ptr, - const_array(ctx->v4i32, 0), ""); + list = LLVMBuildPointerCast(builder, list, + const_array(ctx->v4i32, 0), ""); break; } - return build_indexed_load_const(ctx, ptr, index); + return build_indexed_load_const(ctx, list, index); +} + +static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx, + LLVMValueRef index, enum desc_type type) +{ + LLVMValueRef list = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_SAMPLERS); + + return get_sampler_desc_custom(ctx, list, index, type); } static void tex_fetch_ptrs( @@ -3546,6 +3752,30 @@ static const struct lp_build_tgsi_action interp_action = { .emit = build_interp_intrinsic, }; +static void si_create_function(struct si_shader_context *ctx, + LLVMTypeRef *returns, unsigned num_returns, + LLVMTypeRef *params, unsigned num_params, + int last_array_pointer, int last_sgpr) +{ + int i; + + radeon_llvm_create_func(&ctx->radeon_bld, returns, num_returns, + params, num_params); + radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type); + ctx->return_value = LLVMGetUndef(ctx->radeon_bld.return_type); + + for (i = 0; i <= last_sgpr; ++i) { + LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i); + + /* We tell llvm that array inputs are passed by value to allow Sinking pass + * to move load. Inputs are constant so this is fine. */ + if (i <= last_array_pointer) + LLVMAddAttribute(P, LLVMByValAttribute); + else + LLVMAddAttribute(P, LLVMInRegAttribute); + } +} + static void create_meta_data(struct si_shader_context *ctx) { struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm; @@ -3579,15 +3809,57 @@ static void declare_streamout_params(struct si_shader_context *ctx, } } +static unsigned llvm_get_type_size(LLVMTypeRef type) +{ + LLVMTypeKind kind = LLVMGetTypeKind(type); + + switch (kind) { + case LLVMIntegerTypeKind: + return LLVMGetIntTypeWidth(type) / 8; + case LLVMFloatTypeKind: + return 4; + case LLVMPointerTypeKind: + return 8; + case LLVMVectorTypeKind: + return LLVMGetVectorSize(type) * + llvm_get_type_size(LLVMGetElementType(type)); + default: + assert(0); + return 0; + } +} + +static void declare_tess_lds(struct si_shader_context *ctx) +{ + struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; + LLVMTypeRef i32 = ctx->radeon_bld.soa.bld_base.uint_bld.elem_type; + + /* This is the upper bound, maximum is 32 inputs times 32 vertices */ + unsigned vertex_data_dw_size = 32*32*4; + unsigned patch_data_dw_size = 32*4; + /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */ + unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size; + unsigned lds_dwords = patch_dw_size; + + /* The actual size is computed outside of the shader to reduce + * the number of shader variants. */ + ctx->lds = + LLVMAddGlobalInAddressSpace(gallivm->module, + LLVMArrayType(i32, lds_dwords), + "tess_lds", + LOCAL_ADDR_SPACE); +} + static void create_function(struct si_shader_context *ctx) { struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; struct si_shader *shader = ctx->shader; - LLVMTypeRef params[SI_NUM_PARAMS], v2i32, v3i32; - unsigned i, last_array_pointer, last_sgpr, num_params; + LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32; + LLVMTypeRef returns[16+32*4]; + unsigned i, last_array_pointer, last_sgpr, num_params, num_return_sgprs; + unsigned num_returns = 0; - v2i32 = LLVMVectorType(ctx->i32, 2); v3i32 = LLVMVectorType(ctx->i32, 3); params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS); @@ -3630,6 +3902,20 @@ static void create_function(struct si_shader_context *ctx) params[ctx->param_rel_auto_id = num_params++] = ctx->i32; params[ctx->param_vs_prim_id = num_params++] = ctx->i32; params[ctx->param_instance_id = num_params++] = ctx->i32; + + if (!ctx->is_monolithic && + !ctx->is_gs_copy_shader) { + /* Vertex load indices. */ + ctx->param_vertex_index0 = num_params; + + for (i = 0; i < shader->selector->info.num_inputs; i++) + params[num_params++] = ctx->i32; + + /* PrimitiveID output. */ + if (!shader->key.vs.as_es && !shader->key.vs.as_ls) + for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++) + returns[num_returns++] = ctx->f32; + } break; case TGSI_PROCESSOR_TESS_CTRL: @@ -3643,6 +3929,15 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_PATCH_ID] = ctx->i32; params[SI_PARAM_REL_IDS] = ctx->i32; num_params = SI_PARAM_REL_IDS+1; + + if (!ctx->is_monolithic) { + /* PARAM_TESS_FACTOR_OFFSET is after user SGPRs. */ + for (i = 0; i <= SI_TCS_NUM_USER_SGPR; i++) + returns[num_returns++] = ctx->i32; /* SGPRs */ + + for (i = 0; i < 3; i++) + returns[num_returns++] = ctx->f32; /* VGPRs */ + } break; case TGSI_PROCESSOR_TESS_EVAL: @@ -3663,6 +3958,11 @@ static void create_function(struct si_shader_context *ctx) params[ctx->param_tes_v = num_params++] = ctx->f32; params[ctx->param_tes_rel_patch_id = num_params++] = ctx->i32; params[ctx->param_tes_patch_id = num_params++] = ctx->i32; + + /* PrimitiveID output. */ + if (!ctx->is_monolithic && !shader->key.tes.as_es) + for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++) + returns[num_returns++] = ctx->f32; break; case TGSI_PROCESSOR_GEOMETRY: @@ -3686,13 +3986,13 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_ALPHA_REF] = ctx->f32; params[SI_PARAM_PRIM_MASK] = ctx->i32; last_sgpr = SI_PARAM_PRIM_MASK; - params[SI_PARAM_PERSP_SAMPLE] = v2i32; - params[SI_PARAM_PERSP_CENTER] = v2i32; - params[SI_PARAM_PERSP_CENTROID] = v2i32; + params[SI_PARAM_PERSP_SAMPLE] = ctx->v2i32; + params[SI_PARAM_PERSP_CENTER] = ctx->v2i32; + params[SI_PARAM_PERSP_CENTROID] = ctx->v2i32; params[SI_PARAM_PERSP_PULL_MODEL] = v3i32; - params[SI_PARAM_LINEAR_SAMPLE] = v2i32; - params[SI_PARAM_LINEAR_CENTER] = v2i32; - params[SI_PARAM_LINEAR_CENTROID] = v2i32; + params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32; + params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32; + params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32; params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32; params[SI_PARAM_POS_X_FLOAT] = ctx->f32; params[SI_PARAM_POS_Y_FLOAT] = ctx->f32; @@ -3701,8 +4001,39 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_FRONT_FACE] = ctx->i32; params[SI_PARAM_ANCILLARY] = ctx->i32; params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32; - params[SI_PARAM_POS_FIXED_PT] = ctx->f32; + params[SI_PARAM_POS_FIXED_PT] = ctx->i32; num_params = SI_PARAM_POS_FIXED_PT+1; + + if (!ctx->is_monolithic) { + /* Color inputs from the prolog. */ + if (shader->selector->info.colors_read) { + unsigned num_color_elements = + util_bitcount(shader->selector->info.colors_read); + + assert(num_params + num_color_elements <= ARRAY_SIZE(params)); + for (i = 0; i < num_color_elements; i++) + params[num_params++] = ctx->f32; + } + + /* Outputs for the epilog. */ + num_return_sgprs = SI_SGPR_ALPHA_REF + 1; + num_returns = + num_return_sgprs + + util_bitcount(shader->selector->info.colors_written) * 4 + + shader->selector->info.writes_z + + shader->selector->info.writes_stencil + + shader->selector->info.writes_samplemask + + 1 /* SampleMaskIn */; + + num_returns = MAX2(num_returns, + num_return_sgprs + + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1); + + for (i = 0; i < num_return_sgprs; i++) + returns[i] = ctx->i32; + for (; i < num_returns; i++) + returns[i] = ctx->f32; + } break; default: @@ -3711,20 +4042,38 @@ static void create_function(struct si_shader_context *ctx) } assert(num_params <= Elements(params)); - radeon_llvm_create_func(&ctx->radeon_bld, params, num_params); - radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type); - - for (i = 0; i <= last_sgpr; ++i) { - LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i); - /* We tell llvm that array inputs are passed by value to allow Sinking pass - * to move load. Inputs are constant so this is fine. */ - if (i <= last_array_pointer) - LLVMAddAttribute(P, LLVMByValAttribute); - else - LLVMAddAttribute(P, LLVMInRegAttribute); + si_create_function(ctx, returns, num_returns, params, + num_params, last_array_pointer, last_sgpr); + + /* Reserve register locations for VGPR inputs the PS prolog may need. */ + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && + !ctx->is_monolithic) { + radeon_llvm_add_attribute(ctx->radeon_bld.main_fn, + "InitialPSInputAddr", + S_0286D0_PERSP_SAMPLE_ENA(1) | + S_0286D0_PERSP_CENTER_ENA(1) | + S_0286D0_PERSP_CENTROID_ENA(1) | + S_0286D0_LINEAR_SAMPLE_ENA(1) | + S_0286D0_LINEAR_CENTER_ENA(1) | + S_0286D0_LINEAR_CENTROID_ENA(1) | + S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_POS_FIXED_PT_ENA(1)); } + shader->info.num_input_sgprs = 0; + shader->info.num_input_vgprs = 0; + + for (i = 0; i <= last_sgpr; ++i) + shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4; + + /* Unused fragment shader inputs are eliminated by the compiler, + * so we don't know yet how many there will be. + */ + if (ctx->type != TGSI_PROCESSOR_FRAGMENT) + for (; i < num_params; ++i) + shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4; + if (bld_base->info && (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 || bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 || @@ -3740,22 +4089,8 @@ static void create_function(struct si_shader_context *ctx) if ((ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) || ctx->type == TGSI_PROCESSOR_TESS_CTRL || - ctx->type == TGSI_PROCESSOR_TESS_EVAL) { - /* This is the upper bound, maximum is 32 inputs times 32 vertices */ - unsigned vertex_data_dw_size = 32*32*4; - unsigned patch_data_dw_size = 32*4; - /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */ - unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size; - unsigned lds_dwords = patch_dw_size; - - /* The actual size is computed outside of the shader to reduce - * the number of shader variants. */ - ctx->lds = - LLVMAddGlobalInAddressSpace(gallivm->module, - LLVMArrayType(ctx->i32, lds_dwords), - "tess_lds", - LOCAL_ADDR_SPACE); - } + ctx->type == TGSI_PROCESSOR_TESS_EVAL) + declare_tess_lds(ctx); } static void preload_constants(struct si_shader_context *ctx) @@ -3887,6 +4222,49 @@ static void preload_ring_buffers(struct si_shader_context *ctx) } } +static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, + LLVMValueRef param_sampler_views, + unsigned param_pos_fixed_pt) +{ + struct lp_build_tgsi_context *bld_base = + &ctx->radeon_bld.soa.bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_emit_data result = {}; + struct tgsi_full_instruction inst = {}; + LLVMValueRef desc, sampler_index, address[2], pix; + + /* Use the fixed-point gl_FragCoord input. + * Since the stipple pattern is 32x32 and it repeats, just get 5 bits + * per coordinate to get the repeating effect. + */ + address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5); + address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5); + + /* Load the sampler view descriptor. */ + sampler_index = lp_build_const_int32(gallivm, SI_POLY_STIPPLE_SAMPLER); + desc = get_sampler_desc_custom(ctx, param_sampler_views, + sampler_index, DESC_IMAGE); + + /* Load the texel. */ + inst.Instruction.Opcode = TGSI_OPCODE_TXF; + inst.Texture.Texture = TGSI_TEXTURE_2D_MSAA; /* = use load, not load_mip */ + result.inst = &inst; + set_tex_fetch_args(ctx, &result, TGSI_OPCODE_TXF, + inst.Texture.Texture, + desc, NULL, address, ARRAY_SIZE(address), 0xf); + build_tex_intrinsic(&tex_action, bld_base, &result); + + /* Kill the thread accordingly. */ + pix = LLVMBuildExtractElement(gallivm->builder, result.output[0], + lp_build_const_int32(gallivm, 3), ""); + pix = bitcast(bld_base, TGSI_TYPE_FLOAT, pix); + pix = LLVMBuildFNeg(gallivm->builder, pix, ""); + + lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", + LLVMVoidTypeInContext(gallivm->context), + &pix, 1, 0); +} + void si_shader_binary_read_config(struct radeon_shader_binary *binary, struct si_shader_config *conf, unsigned symbol_offset) @@ -3972,41 +4350,70 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx, } } +static unsigned si_get_shader_binary_size(struct si_shader *shader) +{ + unsigned size = shader->binary.code_size; + + if (shader->prolog) + size += shader->prolog->binary.code_size; + if (shader->epilog) + size += shader->epilog->binary.code_size; + return size; +} + int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) { - const struct radeon_shader_binary *binary = &shader->binary; - unsigned code_size = binary->code_size + binary->rodata_size; + const struct radeon_shader_binary *prolog = + shader->prolog ? &shader->prolog->binary : NULL; + const struct radeon_shader_binary *epilog = + shader->epilog ? &shader->epilog->binary : NULL; + const struct radeon_shader_binary *mainb = &shader->binary; + unsigned bo_size = si_get_shader_binary_size(shader) + + (!epilog ? mainb->rodata_size : 0); unsigned char *ptr; + assert(!prolog || !prolog->rodata_size); + assert((!prolog && !epilog) || !mainb->rodata_size); + assert(!epilog || !epilog->rodata_size); + r600_resource_reference(&shader->bo, NULL); shader->bo = si_resource_create_custom(&sscreen->b.b, PIPE_USAGE_IMMUTABLE, - code_size); + bo_size); if (!shader->bo) return -ENOMEM; + /* Upload. */ ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL, PIPE_TRANSFER_READ_WRITE); - util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size); - if (binary->rodata_size > 0) { - ptr += binary->code_size; - util_memcpy_cpu_to_le32(ptr, binary->rodata, - binary->rodata_size); + + if (prolog) { + util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size); + ptr += prolog->code_size; } + util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size); + ptr += mainb->code_size; + + if (epilog) + util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size); + else if (mainb->rodata_size > 0) + util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size); + sscreen->b.ws->buffer_unmap(shader->bo->buf); return 0; } static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary, - struct pipe_debug_callback *debug) + struct pipe_debug_callback *debug, + const char *name) { char *line, *p; unsigned i, count; if (binary->disasm_string) { - fprintf(stderr, "\nShader Disassembly:\n\n"); - fprintf(stderr, "%s\n", binary->disasm_string); + fprintf(stderr, "Shader %s disassembly:\n", name); + fprintf(stderr, "%s", binary->disasm_string); if (debug && debug->debug_message) { /* Very long debug messages are cut off, so send the @@ -4036,7 +4443,7 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary "Shader Disassembly End"); } } else { - fprintf(stderr, "SI CODE:\n"); + fprintf(stderr, "Shader %s binary:\n", name); for (i = 0; i < binary->code_size; i += 4) { fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3], binary->code[i + 2], @@ -4115,16 +4522,60 @@ static void si_shader_dump_stats(struct si_screen *sscreen, max_simd_waves); } +static const char *si_get_shader_name(struct si_shader *shader, + unsigned processor) +{ + switch (processor) { + case TGSI_PROCESSOR_VERTEX: + if (shader->key.vs.as_es) + return "Vertex Shader as ES"; + else if (shader->key.vs.as_ls) + return "Vertex Shader as LS"; + else + return "Vertex Shader as VS"; + case TGSI_PROCESSOR_TESS_CTRL: + return "Tessellation Control Shader"; + case TGSI_PROCESSOR_TESS_EVAL: + if (shader->key.tes.as_es) + return "Tessellation Evaluation Shader as ES"; + else + return "Tessellation Evaluation Shader as VS"; + case TGSI_PROCESSOR_GEOMETRY: + if (shader->gs_copy_shader == NULL) + return "GS Copy Shader as VS"; + else + return "Geometry Shader"; + case TGSI_PROCESSOR_FRAGMENT: + return "Pixel Shader"; + case TGSI_PROCESSOR_COMPUTE: + return "Compute Shader"; + default: + return "Unknown Shader"; + } +} + void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor) { - if (r600_can_dump_shader(&sscreen->b, processor)) - if (!(sscreen->b.debug_flags & DBG_NO_ASM)) - si_shader_dump_disassembly(&shader->binary, debug); + if (r600_can_dump_shader(&sscreen->b, processor) && + !(sscreen->b.debug_flags & DBG_NO_ASM)) { + fprintf(stderr, "\n%s:\n", si_get_shader_name(shader, processor)); + + if (shader->prolog) + si_shader_dump_disassembly(&shader->prolog->binary, + debug, "prolog"); + + si_shader_dump_disassembly(&shader->binary, debug, "main"); + + if (shader->epilog) + si_shader_dump_disassembly(&shader->epilog->binary, + debug, "epilog"); + fprintf(stderr, "\n"); + } si_shader_dump_stats(sscreen, &shader->config, shader->selector ? shader->selector->info.num_inputs : 0, - shader->binary.code_size, debug, processor); + si_get_shader_binary_size(shader), debug, processor); } int si_compile_llvm(struct si_screen *sscreen, @@ -4177,6 +4628,19 @@ int si_compile_llvm(struct si_screen *sscreen, FREE(binary->global_symbol_offsets); binary->config = NULL; binary->global_symbol_offsets = NULL; + + /* Some shaders can't have rodata because their binaries can be + * concatenated. + */ + if (binary->rodata_size && + (processor == TGSI_PROCESSOR_VERTEX || + processor == TGSI_PROCESSOR_TESS_CTRL || + processor == TGSI_PROCESSOR_TESS_EVAL || + processor == TGSI_PROCESSOR_FRAGMENT)) { + fprintf(stderr, "radeonsi: The shader can't have rodata."); + return -EINVAL; + } + return r; } @@ -4196,7 +4660,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0])); - si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm, gsinfo); + si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm); ctx->type = TGSI_PROCESSOR_VERTEX; ctx->is_gs_copy_shader = true; @@ -4241,7 +4705,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs); - LLVMBuildRetVoid(bld_base->base.gallivm->builder); + LLVMBuildRet(gallivm->builder, ctx->return_value); /* Dump LLVM IR before any optimization passes */ if (sscreen->b.debug_flags & DBG_PREOPT_IR && @@ -4278,35 +4742,38 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f) switch (shader) { case PIPE_SHADER_VERTEX: fprintf(f, " instance_divisors = {"); - for (i = 0; i < Elements(key->vs.instance_divisors); i++) + for (i = 0; i < Elements(key->vs.prolog.instance_divisors); i++) fprintf(f, !i ? "%u" : ", %u", - key->vs.instance_divisors[i]); + key->vs.prolog.instance_divisors[i]); fprintf(f, "}\n"); fprintf(f, " as_es = %u\n", key->vs.as_es); fprintf(f, " as_ls = %u\n", key->vs.as_ls); - fprintf(f, " export_prim_id = %u\n", key->vs.export_prim_id); + fprintf(f, " export_prim_id = %u\n", key->vs.epilog.export_prim_id); break; case PIPE_SHADER_TESS_CTRL: - fprintf(f, " prim_mode = %u\n", key->tcs.prim_mode); + fprintf(f, " prim_mode = %u\n", key->tcs.epilog.prim_mode); break; case PIPE_SHADER_TESS_EVAL: fprintf(f, " as_es = %u\n", key->tes.as_es); - fprintf(f, " export_prim_id = %u\n", key->tes.export_prim_id); + fprintf(f, " export_prim_id = %u\n", key->tes.epilog.export_prim_id); break; case PIPE_SHADER_GEOMETRY: break; case PIPE_SHADER_FRAGMENT: - fprintf(f, " spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format); - fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf); - fprintf(f, " color_two_side = %u\n", key->ps.color_two_side); - fprintf(f, " alpha_func = %u\n", key->ps.alpha_func); - fprintf(f, " alpha_to_one = %u\n", key->ps.alpha_to_one); - fprintf(f, " poly_stipple = %u\n", key->ps.poly_stipple); - fprintf(f, " clamp_color = %u\n", key->ps.clamp_color); + fprintf(f, " prolog.color_two_side = %u\n", key->ps.prolog.color_two_side); + fprintf(f, " prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple); + fprintf(f, " prolog.force_persample_interp = %u\n", key->ps.prolog.force_persample_interp); + fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format); + fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8); + fprintf(f, " epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf); + fprintf(f, " epilog.alpha_func = %u\n", key->ps.epilog.alpha_func); + fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.epilog.alpha_to_one); + fprintf(f, " epilog.poly_line_smoothing = %u\n", key->ps.epilog.poly_line_smoothing); + fprintf(f, " epilog.clamp_color = %u\n", key->ps.epilog.clamp_color); break; default: @@ -4317,13 +4784,12 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f) static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_screen *sscreen, struct si_shader *shader, - LLVMTargetMachineRef tm, - struct tgsi_shader_info *info) + LLVMTargetMachineRef tm) { struct lp_build_tgsi_context *bld_base; memset(ctx, 0, sizeof(*ctx)); - radeon_llvm_context_init(&ctx->radeon_bld); + radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--"); ctx->tm = tm; ctx->screen = sscreen; if (shader && shader->selector) @@ -4336,15 +4802,18 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context); ctx->i8 = LLVMInt8TypeInContext(ctx->radeon_bld.gallivm.context); ctx->i32 = LLVMInt32TypeInContext(ctx->radeon_bld.gallivm.context); - ctx->i128 = LLVMInt128TypeInContext(ctx->radeon_bld.gallivm.context); + ctx->i64 = LLVMInt64TypeInContext(ctx->radeon_bld.gallivm.context); + ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128); ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context); ctx->v16i8 = LLVMVectorType(ctx->i8, 16); + ctx->v2i32 = LLVMVectorType(ctx->i32, 2); ctx->v4i32 = LLVMVectorType(ctx->i32, 4); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); bld_base = &ctx->radeon_bld.soa.bld_base; - bld_base->info = info; + if (shader && shader->selector) + bld_base->info = &shader->selector->info; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action; @@ -4380,40 +4849,31 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32"; } -int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, - struct si_shader *shader, - struct pipe_debug_callback *debug) +int si_compile_tgsi_shader(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + bool is_monolithic, + struct pipe_debug_callback *debug) { struct si_shader_selector *sel = shader->selector; - struct tgsi_token *tokens = sel->tokens; struct si_shader_context ctx; struct lp_build_tgsi_context *bld_base; - struct tgsi_shader_info stipple_shader_info; LLVMModuleRef mod; int r = 0; - bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT && - shader->key.ps.poly_stipple; - - if (poly_stipple) { - tokens = util_pstipple_create_fragment_shader(tokens, NULL, - SI_POLY_STIPPLE_SAMPLER, - TGSI_FILE_SYSTEM_VALUE); - tgsi_scan_shader(tokens, &stipple_shader_info); - } /* Dump TGSI code before doing TGSI->LLVM conversion in case the * conversion fails. */ if (r600_can_dump_shader(&sscreen->b, sel->info.processor) && !(sscreen->b.debug_flags & DBG_NO_TGSI)) { si_dump_shader_key(sel->type, &shader->key, stderr); - tgsi_dump(tokens, 0); + tgsi_dump(sel->tokens, 0); si_dump_streamout(&sel->so); } - si_init_shader_ctx(&ctx, sscreen, shader, tm, - poly_stipple ? &stipple_shader_info : &sel->info); + si_init_shader_ctx(&ctx, sscreen, shader, tm); + ctx.is_monolithic = is_monolithic; - shader->uses_instanceid = sel->info.uses_instanceid; + shader->info.uses_instanceid = sel->info.uses_instanceid; bld_base = &ctx.radeon_bld.soa.bld_base; ctx.radeon_bld.load_system_value = declare_system_value; @@ -4447,7 +4907,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, break; case TGSI_PROCESSOR_FRAGMENT: ctx.radeon_bld.load_input = declare_input_fs; - bld_base->emit_epilogue = si_llvm_emit_fs_epilogue; + if (is_monolithic) + bld_base->emit_epilogue = si_llvm_emit_fs_epilogue; + else + bld_base->emit_epilogue = si_llvm_return_fs_outputs; break; default: assert(!"Unsupported shader type"); @@ -4461,6 +4924,14 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, preload_streamout_buffers(&ctx); preload_ring_buffers(&ctx); + if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT && + shader->key.ps.prolog.poly_stipple) { + LLVMValueRef views = LLVMGetParam(ctx.radeon_bld.main_fn, + SI_PARAM_SAMPLERS); + si_llvm_emit_polygon_stipple(&ctx, views, + SI_PARAM_POS_FIXED_PT); + } + if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { int i; for (i = 0; i < 4; i++) { @@ -4470,12 +4941,12 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, } } - if (!lp_build_tgsi_llvm(bld_base, tokens)) { + if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); goto out; } - LLVMBuildRetVoid(bld_base->base.gallivm->builder); + LLVMBuildRet(bld_base->base.gallivm->builder, ctx.return_value); mod = bld_base->base.gallivm->module; /* Dump LLVM IR before any optimization passes */ @@ -4492,16 +4963,49 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, goto out; } - si_shader_dump(sscreen, shader, debug, ctx.type); + radeon_llvm_dispose(&ctx.radeon_bld); - r = si_shader_binary_upload(sscreen, shader); - if (r) { - fprintf(stderr, "LLVM failed to upload shader\n"); - goto out; + /* Calculate the number of fragment input VGPRs. */ + if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { + shader->info.num_input_vgprs = 0; + shader->info.face_vgpr_index = -1; + + if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_PERSP_PULL_MODEL_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 3; + if (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_FRONT_FACE_ENA(shader->config.spi_ps_input_addr)) { + shader->info.face_vgpr_index = shader->info.num_input_vgprs; + shader->info.num_input_vgprs += 1; + } + if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; } - radeon_llvm_dispose(&ctx.radeon_bld); - if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { shader->gs_copy_shader = CALLOC_STRUCT(si_shader); shader->gs_copy_shader->selector = shader->selector; @@ -4517,11 +5021,968 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, out: for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++) FREE(ctx.constants[i]); - if (poly_stipple) - tgsi_free_tokens(tokens); return r; } +/** + * Create, compile and return a shader part (prolog or epilog). + * + * \param sscreen screen + * \param list list of shader parts of the same category + * \param key shader part key + * \param tm LLVM target machine + * \param debug debug callback + * \param compile the callback responsible for compilation + * \return non-NULL on success + */ +static struct si_shader_part * +si_get_shader_part(struct si_screen *sscreen, + struct si_shader_part **list, + union si_shader_part_key *key, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + bool (*compile)(struct si_screen *, + LLVMTargetMachineRef, + struct pipe_debug_callback *, + struct si_shader_part *)) +{ + struct si_shader_part *result; + + pipe_mutex_lock(sscreen->shader_parts_mutex); + + /* Find existing. */ + for (result = *list; result; result = result->next) { + if (memcmp(&result->key, key, sizeof(*key)) == 0) { + pipe_mutex_unlock(sscreen->shader_parts_mutex); + return result; + } + } + + /* Compile a new one. */ + result = CALLOC_STRUCT(si_shader_part); + result->key = *key; + if (!compile(sscreen, tm, debug, result)) { + FREE(result); + pipe_mutex_unlock(sscreen->shader_parts_mutex); + return NULL; + } + + result->next = *list; + *list = result; + pipe_mutex_unlock(sscreen->shader_parts_mutex); + return result; +} + +/** + * Create a vertex shader prolog. + * + * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values). + * All inputs are returned unmodified. The vertex load indices are + * stored after them, which will used by the API VS for fetching inputs. + * + * For example, the expected outputs for instance_divisors[] = {0, 1, 2} are: + * input_v0, + * input_v1, + * input_v2, + * input_v3, + * (VertexID + BaseVertex), + * (InstanceID + StartInstance), + * (InstanceID / 2 + StartInstance) + */ +static bool si_compile_vs_prolog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader shader = {}; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + LLVMTypeRef *params, *returns; + LLVMValueRef ret, func; + int last_sgpr, num_params, num_returns, i; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, &shader, tm); + ctx.type = TGSI_PROCESSOR_VERTEX; + ctx.param_vertex_id = key->vs_prolog.num_input_sgprs; + ctx.param_instance_id = key->vs_prolog.num_input_sgprs + 3; + + /* 4 preloaded VGPRs + vertex load indices as prolog outputs */ + params = alloca((key->vs_prolog.num_input_sgprs + 4) * + sizeof(LLVMTypeRef)); + returns = alloca((key->vs_prolog.num_input_sgprs + 4 + + key->vs_prolog.last_input + 1) * + sizeof(LLVMTypeRef)); + num_params = 0; + num_returns = 0; + + /* Declare input and output SGPRs. */ + num_params = 0; + for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { + params[num_params++] = ctx.i32; + returns[num_returns++] = ctx.i32; + } + last_sgpr = num_params - 1; + + /* 4 preloaded VGPRs (outputs must be floats) */ + for (i = 0; i < 4; i++) { + params[num_params++] = ctx.i32; + returns[num_returns++] = ctx.f32; + } + + /* Vertex load indices. */ + for (i = 0; i <= key->vs_prolog.last_input; i++) + returns[num_returns++] = ctx.f32; + + /* Create the function. */ + si_create_function(&ctx, returns, num_returns, params, + num_params, -1, last_sgpr); + func = ctx.radeon_bld.main_fn; + + /* Copy inputs to outputs. This should be no-op, as the registers match, + * but it will prevent the compiler from overwriting them unintentionally. + */ + ret = ctx.return_value; + for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { + LLVMValueRef p = LLVMGetParam(func, i); + ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); + } + for (i = num_params - 4; i < num_params; i++) { + LLVMValueRef p = LLVMGetParam(func, i); + p = LLVMBuildBitCast(gallivm->builder, p, ctx.f32, ""); + ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); + } + + /* Compute vertex load indices from instance divisors. */ + for (i = 0; i <= key->vs_prolog.last_input; i++) { + unsigned divisor = key->vs_prolog.states.instance_divisors[i]; + LLVMValueRef index; + + if (divisor) { + /* InstanceID / Divisor + StartInstance */ + index = get_instance_index_for_fetch(&ctx.radeon_bld, + SI_SGPR_START_INSTANCE, + divisor); + } else { + /* VertexID + BaseVertex */ + index = LLVMBuildAdd(gallivm->builder, + LLVMGetParam(func, ctx.param_vertex_id), + LLVMGetParam(func, SI_SGPR_BASE_VERTEX), ""); + } + + index = LLVMBuildBitCast(gallivm->builder, index, ctx.f32, ""); + ret = LLVMBuildInsertValue(gallivm->builder, ret, index, + num_params++, ""); + } + + /* Compile. */ + LLVMBuildRet(gallivm->builder, ret); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Vertex Shader Prolog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + +/** + * Compile the vertex shader epilog. This is also used by the tessellation + * evaluation shader compiled as VS. + * + * The input is PrimitiveID. + * + * If PrimitiveID is required by the pixel shader, export it. + * Otherwise, do nothing. + */ +static bool si_compile_vs_epilog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base; + LLVMTypeRef params[5]; + int num_params, i; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, NULL, tm); + ctx.type = TGSI_PROCESSOR_VERTEX; + + /* Declare input VGPRs. */ + num_params = key->vs_epilog.states.export_prim_id ? + (VS_EPILOG_PRIMID_LOC + 1) : 0; + assert(num_params <= ARRAY_SIZE(params)); + + for (i = 0; i < num_params; i++) + params[i] = ctx.f32; + + /* Create the function. */ + si_create_function(&ctx, NULL, 0, params, num_params, + -1, -1); + + /* Emit exports. */ + if (key->vs_epilog.states.export_prim_id) { + struct lp_build_context *base = &bld_base->base; + struct lp_build_context *uint = &bld_base->uint_bld; + LLVMValueRef args[9]; + + args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */ + args[1] = uint->zero; /* whether the EXEC mask is valid */ + args[2] = uint->zero; /* DONE bit */ + args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_PARAM + + key->vs_epilog.prim_id_param_offset); + args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ + args[5] = LLVMGetParam(ctx.radeon_bld.main_fn, + VS_EPILOG_PRIMID_LOC); /* X */ + args[6] = uint->undef; /* Y */ + args[7] = uint->undef; /* Z */ + args[8] = uint->undef; /* W */ + + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); + } + + /* Compile. */ + LLVMBuildRet(gallivm->builder, ctx.return_value); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Vertex Shader Epilog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + +/** + * Create & compile a vertex shader epilog. This a helper used by VS and TES. + */ +static bool si_get_vs_epilog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug, + struct si_vs_epilog_bits *states) +{ + union si_shader_part_key epilog_key; + + memset(&epilog_key, 0, sizeof(epilog_key)); + epilog_key.vs_epilog.states = *states; + + /* Set up the PrimitiveID output. */ + if (shader->key.vs.epilog.export_prim_id) { + unsigned index = shader->selector->info.num_outputs; + unsigned offset = shader->info.nr_param_exports++; + + epilog_key.vs_epilog.prim_id_param_offset = offset; + assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset)); + shader->info.vs_output_param_offset[index] = offset; + } + + shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs, + &epilog_key, tm, debug, + si_compile_vs_epilog); + return shader->epilog != NULL; +} + +/** + * Select and compile (or reuse) vertex shader parts (prolog & epilog). + */ +static bool si_shader_select_vs_parts(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + struct tgsi_shader_info *info = &shader->selector->info; + union si_shader_part_key prolog_key; + unsigned i; + + /* Get the prolog. */ + memset(&prolog_key, 0, sizeof(prolog_key)); + prolog_key.vs_prolog.states = shader->key.vs.prolog; + prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs; + prolog_key.vs_prolog.last_input = MAX2(1, info->num_inputs) - 1; + + /* The prolog is a no-op if there are no inputs. */ + if (info->num_inputs) { + shader->prolog = + si_get_shader_part(sscreen, &sscreen->vs_prologs, + &prolog_key, tm, debug, + si_compile_vs_prolog); + if (!shader->prolog) + return false; + } + + /* Get the epilog. */ + if (!shader->key.vs.as_es && !shader->key.vs.as_ls && + !si_get_vs_epilog(sscreen, tm, shader, debug, + &shader->key.vs.epilog)) + return false; + + /* Set the instanceID flag. */ + for (i = 0; i < info->num_inputs; i++) + if (prolog_key.vs_prolog.states.instance_divisors[i]) + shader->info.uses_instanceid = true; + + return true; +} + +/** + * Select and compile (or reuse) TES parts (epilog). + */ +static bool si_shader_select_tes_parts(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + if (shader->key.tes.as_es) + return true; + + /* TES compiled as VS. */ + return si_get_vs_epilog(sscreen, tm, shader, debug, + &shader->key.tes.epilog); +} + +/** + * Compile the TCS epilog. This writes tesselation factors to memory based on + * the output primitive type of the tesselator (determined by TES). + */ +static bool si_compile_tcs_epilog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader shader = {}; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base; + LLVMTypeRef params[16]; + LLVMValueRef func; + int last_array_pointer, last_sgpr, num_params; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, &shader, tm); + ctx.type = TGSI_PROCESSOR_TESS_CTRL; + shader.key.tcs.epilog = key->tcs_epilog.states; + + /* Declare inputs. Only RW_BUFFERS and TESS_FACTOR_OFFSET are used. */ + params[SI_PARAM_RW_BUFFERS] = const_array(ctx.v16i8, SI_NUM_RW_BUFFERS); + last_array_pointer = SI_PARAM_RW_BUFFERS; + params[SI_PARAM_CONST_BUFFERS] = ctx.i64; + params[SI_PARAM_SAMPLERS] = ctx.i64; + params[SI_PARAM_UNUSED] = ctx.i64; + params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32; + params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32; + params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32; + params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx.i32; + last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET; + num_params = last_sgpr + 1; + + params[num_params++] = ctx.i32; /* patch index within the wave (REL_PATCH_ID) */ + params[num_params++] = ctx.i32; /* invocation ID within the patch */ + params[num_params++] = ctx.i32; /* LDS offset where tess factors should be loaded from */ + + /* Create the function. */ + si_create_function(&ctx, NULL, 0, params, num_params, + last_array_pointer, last_sgpr); + declare_tess_lds(&ctx); + func = ctx.radeon_bld.main_fn; + + si_write_tess_factors(bld_base, + LLVMGetParam(func, last_sgpr + 1), + LLVMGetParam(func, last_sgpr + 2), + LLVMGetParam(func, last_sgpr + 3)); + + /* Compile. */ + LLVMBuildRet(gallivm->builder, ctx.return_value); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Tessellation Control Shader Epilog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + +/** + * Select and compile (or reuse) TCS parts (epilog). + */ +static bool si_shader_select_tcs_parts(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + union si_shader_part_key epilog_key; + + /* Get the epilog. */ + memset(&epilog_key, 0, sizeof(epilog_key)); + epilog_key.tcs_epilog.states = shader->key.tcs.epilog; + + shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs, + &epilog_key, tm, debug, + si_compile_tcs_epilog); + return shader->epilog != NULL; +} + +/** + * Compile the pixel shader prolog. This handles: + * - two-side color selection and interpolation + * - overriding interpolation parameters for the API PS + * - polygon stippling + * + * All preloaded SGPRs and VGPRs are passed through unmodified unless they are + * overriden by other states. (e.g. per-sample interpolation) + * Interpolated colors are stored after the preloaded VGPRs. + */ +static bool si_compile_ps_prolog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader shader = {}; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + LLVMTypeRef *params; + LLVMValueRef ret, func; + int last_sgpr, num_params, num_returns, i, num_color_channels; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, &shader, tm); + ctx.type = TGSI_PROCESSOR_FRAGMENT; + shader.key.ps.prolog = key->ps_prolog.states; + + /* Number of inputs + 8 color elements. */ + params = alloca((key->ps_prolog.num_input_sgprs + + key->ps_prolog.num_input_vgprs + 8) * + sizeof(LLVMTypeRef)); + + /* Declare inputs. */ + num_params = 0; + for (i = 0; i < key->ps_prolog.num_input_sgprs; i++) + params[num_params++] = ctx.i32; + last_sgpr = num_params - 1; + + for (i = 0; i < key->ps_prolog.num_input_vgprs; i++) + params[num_params++] = ctx.f32; + + /* Declare outputs (same as inputs + add colors if needed) */ + num_returns = num_params; + num_color_channels = util_bitcount(key->ps_prolog.colors_read); + for (i = 0; i < num_color_channels; i++) + params[num_returns++] = ctx.f32; + + /* Create the function. */ + si_create_function(&ctx, params, num_returns, params, + num_params, -1, last_sgpr); + func = ctx.radeon_bld.main_fn; + + /* Copy inputs to outputs. This should be no-op, as the registers match, + * but it will prevent the compiler from overwriting them unintentionally. + */ + ret = ctx.return_value; + for (i = 0; i < num_params; i++) { + LLVMValueRef p = LLVMGetParam(func, i); + ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); + } + + /* Polygon stippling. */ + if (key->ps_prolog.states.poly_stipple) { + /* POS_FIXED_PT is always last. */ + unsigned pos = key->ps_prolog.num_input_sgprs + + key->ps_prolog.num_input_vgprs - 1; + LLVMValueRef ptr[2], views; + + /* Get the pointer to sampler views. */ + ptr[0] = LLVMGetParam(func, SI_SGPR_SAMPLERS); + ptr[1] = LLVMGetParam(func, SI_SGPR_SAMPLERS+1); + views = lp_build_gather_values(gallivm, ptr, 2); + views = LLVMBuildBitCast(gallivm->builder, views, ctx.i64, ""); + views = LLVMBuildIntToPtr(gallivm->builder, views, + const_array(ctx.v8i32, SI_NUM_SAMPLERS), ""); + + si_llvm_emit_polygon_stipple(&ctx, views, pos); + } + + /* Interpolate colors. */ + for (i = 0; i < 2; i++) { + unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf; + unsigned face_vgpr = key->ps_prolog.num_input_sgprs + + key->ps_prolog.face_vgpr_index; + LLVMValueRef interp[2], color[4]; + LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL; + + if (!writemask) + continue; + + /* If the interpolation qualifier is not CONSTANT (-1). */ + if (key->ps_prolog.color_interp_vgpr_index[i] != -1) { + unsigned interp_vgpr = key->ps_prolog.num_input_sgprs + + key->ps_prolog.color_interp_vgpr_index[i]; + + interp[0] = LLVMGetParam(func, interp_vgpr); + interp[1] = LLVMGetParam(func, interp_vgpr + 1); + interp_ij = lp_build_gather_values(gallivm, interp, 2); + interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij, + ctx.v2i32, ""); + } + + /* Use the absolute location of the input. */ + prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR); + + if (key->ps_prolog.states.color_two_side) { + face = LLVMGetParam(func, face_vgpr); + face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, ""); + } + + interp_fs_input(&ctx, + key->ps_prolog.color_attr_index[i], + TGSI_SEMANTIC_COLOR, i, + key->ps_prolog.num_interp_inputs, + key->ps_prolog.colors_read, interp_ij, + prim_mask, face, color); + + while (writemask) { + unsigned chan = u_bit_scan(&writemask); + ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan], + num_params++, ""); + } + } + + /* Force per-sample interpolation. */ + if (key->ps_prolog.states.force_persample_interp) { + unsigned i, base = key->ps_prolog.num_input_sgprs; + LLVMValueRef persp_sample[2], linear_sample[2]; + + /* Read PERSP_SAMPLE. */ + for (i = 0; i < 2; i++) + persp_sample[i] = LLVMGetParam(func, base + i); + /* Overwrite PERSP_CENTER. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + persp_sample[i], base + 2 + i, ""); + /* Overwrite PERSP_CENTROID. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + persp_sample[i], base + 4 + i, ""); + /* Read LINEAR_SAMPLE. */ + for (i = 0; i < 2; i++) + linear_sample[i] = LLVMGetParam(func, base + 6 + i); + /* Overwrite LINEAR_CENTER. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + linear_sample[i], base + 8 + i, ""); + /* Overwrite LINEAR_CENTROID. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + linear_sample[i], base + 10 + i, ""); + } + + /* Compile. */ + LLVMBuildRet(gallivm->builder, ret); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Fragment Shader Prolog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + +/** + * Compile the pixel shader epilog. This handles everything that must be + * emulated for pixel shader exports. (alpha-test, format conversions, etc) + */ +static bool si_compile_ps_epilog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader shader = {}; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base; + LLVMTypeRef params[16+8*4+3]; + LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; + int last_array_pointer, last_sgpr, num_params, i; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, &shader, tm); + ctx.type = TGSI_PROCESSOR_FRAGMENT; + shader.key.ps.epilog = key->ps_epilog.states; + + /* Declare input SGPRs. */ + params[SI_PARAM_RW_BUFFERS] = ctx.i64; + params[SI_PARAM_CONST_BUFFERS] = ctx.i64; + params[SI_PARAM_SAMPLERS] = ctx.i64; + params[SI_PARAM_UNUSED] = ctx.i64; + params[SI_PARAM_ALPHA_REF] = ctx.f32; + last_array_pointer = -1; + last_sgpr = SI_PARAM_ALPHA_REF; + + /* Declare input VGPRs. */ + num_params = (last_sgpr + 1) + + util_bitcount(key->ps_epilog.colors_written) * 4 + + key->ps_epilog.writes_z + + key->ps_epilog.writes_stencil + + key->ps_epilog.writes_samplemask; + + num_params = MAX2(num_params, + last_sgpr + 1 + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1); + + assert(num_params <= ARRAY_SIZE(params)); + + for (i = last_sgpr + 1; i < num_params; i++) + params[i] = ctx.f32; + + /* Create the function. */ + si_create_function(&ctx, NULL, 0, params, num_params, + last_array_pointer, last_sgpr); + /* Disable elimination of unused inputs. */ + radeon_llvm_add_attribute(ctx.radeon_bld.main_fn, + "InitialPSInputAddr", 0xffffff); + + /* Process colors. */ + unsigned vgpr = last_sgpr + 1; + unsigned colors_written = key->ps_epilog.colors_written; + int last_color_export = -1; + + /* Find the last color export. */ + if (!key->ps_epilog.writes_z && + !key->ps_epilog.writes_stencil && + !key->ps_epilog.writes_samplemask) { + unsigned spi_format = key->ps_epilog.states.spi_shader_col_format; + + /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ + if (colors_written == 0x1 && key->ps_epilog.states.last_cbuf > 0) { + /* Just set this if any of the colorbuffers are enabled. */ + if (spi_format & + ((1llu << (4 * (key->ps_epilog.states.last_cbuf + 1))) - 1)) + last_color_export = 0; + } else { + for (i = 0; i < 8; i++) + if (colors_written & (1 << i) && + (spi_format >> (i * 4)) & 0xf) + last_color_export = i; + } + } + + while (colors_written) { + LLVMValueRef color[4]; + int mrt = u_bit_scan(&colors_written); + + for (i = 0; i < 4; i++) + color[i] = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++); + + si_export_mrt_color(bld_base, color, mrt, + num_params - 1, + mrt == last_color_export); + } + + /* Process depth, stencil, samplemask. */ + if (key->ps_epilog.writes_z) + depth = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++); + if (key->ps_epilog.writes_stencil) + stencil = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++); + if (key->ps_epilog.writes_samplemask) + samplemask = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++); + + if (depth || stencil || samplemask) + si_export_mrt_z(bld_base, depth, stencil, samplemask); + else if (last_color_export == -1) + si_export_null(bld_base); + + /* Compile. */ + LLVMBuildRetVoid(gallivm->builder); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Fragment Shader Epilog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + +/** + * Select and compile (or reuse) pixel shader parts (prolog & epilog). + */ +static bool si_shader_select_ps_parts(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + struct tgsi_shader_info *info = &shader->selector->info; + union si_shader_part_key prolog_key; + union si_shader_part_key epilog_key; + unsigned i; + + /* Get the prolog. */ + memset(&prolog_key, 0, sizeof(prolog_key)); + prolog_key.ps_prolog.states = shader->key.ps.prolog; + prolog_key.ps_prolog.colors_read = info->colors_read; + prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs; + prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs; + + if (info->colors_read) { + unsigned *color = shader->selector->color_attr_index; + + if (shader->key.ps.prolog.color_two_side) { + /* BCOLORs are stored after the last input. */ + prolog_key.ps_prolog.num_interp_inputs = info->num_inputs; + prolog_key.ps_prolog.face_vgpr_index = shader->info.face_vgpr_index; + shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1); + } + + for (i = 0; i < 2; i++) { + unsigned location = info->input_interpolate_loc[color[i]]; + + if (!(info->colors_read & (0xf << i*4))) + continue; + + prolog_key.ps_prolog.color_attr_index[i] = color[i]; + + /* Force per-sample interpolation for the colors here. */ + if (shader->key.ps.prolog.force_persample_interp) + location = TGSI_INTERPOLATE_LOC_SAMPLE; + + switch (info->input_interpolate[color[i]]) { + case TGSI_INTERPOLATE_CONSTANT: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + case TGSI_INTERPOLATE_COLOR: + switch (location) { + case TGSI_INTERPOLATE_LOC_SAMPLE: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0; + shader->config.spi_ps_input_ena |= + S_0286CC_PERSP_SAMPLE_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTER: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2; + shader->config.spi_ps_input_ena |= + S_0286CC_PERSP_CENTER_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4; + shader->config.spi_ps_input_ena |= + S_0286CC_PERSP_CENTROID_ENA(1); + break; + default: + assert(0); + } + break; + case TGSI_INTERPOLATE_LINEAR: + switch (location) { + case TGSI_INTERPOLATE_LOC_SAMPLE: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6; + shader->config.spi_ps_input_ena |= + S_0286CC_LINEAR_SAMPLE_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTER: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8; + shader->config.spi_ps_input_ena |= + S_0286CC_LINEAR_CENTER_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10; + shader->config.spi_ps_input_ena |= + S_0286CC_LINEAR_CENTROID_ENA(1); + break; + default: + assert(0); + } + break; + default: + assert(0); + } + } + } + + /* The prolog is a no-op if these aren't set. */ + if (prolog_key.ps_prolog.colors_read || + prolog_key.ps_prolog.states.force_persample_interp || + prolog_key.ps_prolog.states.poly_stipple) { + shader->prolog = + si_get_shader_part(sscreen, &sscreen->ps_prologs, + &prolog_key, tm, debug, + si_compile_ps_prolog); + if (!shader->prolog) + return false; + } + + /* Get the epilog. */ + memset(&epilog_key, 0, sizeof(epilog_key)); + epilog_key.ps_epilog.colors_written = info->colors_written; + epilog_key.ps_epilog.writes_z = info->writes_z; + epilog_key.ps_epilog.writes_stencil = info->writes_stencil; + epilog_key.ps_epilog.writes_samplemask = info->writes_samplemask; + epilog_key.ps_epilog.states = shader->key.ps.epilog; + + shader->epilog = + si_get_shader_part(sscreen, &sscreen->ps_epilogs, + &epilog_key, tm, debug, + si_compile_ps_epilog); + if (!shader->epilog) + return false; + + /* Enable POS_FIXED_PT if polygon stippling is enabled. */ + if (shader->key.ps.prolog.poly_stipple) { + shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1); + assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr)); + } + + /* Set up the enable bits for per-sample shading if needed. */ + if (shader->key.ps.prolog.force_persample_interp) { + if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) || + G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena)) { + shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA; + shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA; + shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1); + } + if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) || + G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena)) { + shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA; + shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA; + shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1); + } + } + + /* POW_W_FLOAT requires that one of the perspective weights is enabled. */ + if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) && + !(shader->config.spi_ps_input_ena & 0xf)) { + shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1); + assert(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr)); + } + + /* At least one pair of interpolation weights must be enabled. */ + if (!(shader->config.spi_ps_input_ena & 0x7f)) { + shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1); + assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr)); + } + + /* The sample mask input is always enabled, because the API shader always + * passes it through to the epilog. Disable it here if it's unused. + */ + if (!shader->key.ps.epilog.poly_line_smoothing && + !shader->selector->info.reads_samplemask) + shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA; + + return true; +} + +int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + struct si_shader *mainp = shader->selector->main_shader_part; + int r; + + /* LS and ES are always compiled on demand. */ + if (!mainp || + (shader->selector->type == PIPE_SHADER_VERTEX && + (shader->key.vs.as_es || shader->key.vs.as_ls)) || + (shader->selector->type == PIPE_SHADER_TESS_EVAL && + shader->key.tes.as_es)) { + /* Monolithic shader (compiled as a whole, has many variants, + * may take a long time to compile). + */ + r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug); + if (r) + return r; + } else { + /* The shader consists of 2-3 parts: + * + * - the middle part is the user shader, it has 1 variant only + * and it was compiled during the creation of the shader + * selector + * - the prolog part is inserted at the beginning + * - the epilog part is inserted at the end + * + * The prolog and epilog have many (but simple) variants. + */ + + /* Copy the compiled TGSI shader data over. */ + shader->is_binary_shared = true; + shader->binary = mainp->binary; + shader->config = mainp->config; + shader->info.num_input_sgprs = mainp->info.num_input_sgprs; + shader->info.num_input_vgprs = mainp->info.num_input_vgprs; + shader->info.face_vgpr_index = mainp->info.face_vgpr_index; + memcpy(shader->info.vs_output_param_offset, + mainp->info.vs_output_param_offset, + sizeof(mainp->info.vs_output_param_offset)); + shader->info.uses_instanceid = mainp->info.uses_instanceid; + shader->info.nr_pos_exports = mainp->info.nr_pos_exports; + shader->info.nr_param_exports = mainp->info.nr_param_exports; + + /* Select prologs and/or epilogs. */ + switch (shader->selector->type) { + case PIPE_SHADER_VERTEX: + if (!si_shader_select_vs_parts(sscreen, tm, shader, debug)) + return -1; + break; + case PIPE_SHADER_TESS_CTRL: + if (!si_shader_select_tcs_parts(sscreen, tm, shader, debug)) + return -1; + break; + case PIPE_SHADER_TESS_EVAL: + if (!si_shader_select_tes_parts(sscreen, tm, shader, debug)) + return -1; + break; + case PIPE_SHADER_FRAGMENT: + if (!si_shader_select_ps_parts(sscreen, tm, shader, debug)) + return -1; + + /* Make sure we have at least as many VGPRs as there + * are allocated inputs. + */ + shader->config.num_vgprs = MAX2(shader->config.num_vgprs, + shader->info.num_input_vgprs); + break; + } + + /* Update SGPR and VGPR counts. */ + if (shader->prolog) { + shader->config.num_sgprs = MAX2(shader->config.num_sgprs, + shader->prolog->config.num_sgprs); + shader->config.num_vgprs = MAX2(shader->config.num_vgprs, + shader->prolog->config.num_vgprs); + } + if (shader->epilog) { + shader->config.num_sgprs = MAX2(shader->config.num_sgprs, + shader->epilog->config.num_sgprs); + shader->config.num_vgprs = MAX2(shader->config.num_vgprs, + shader->epilog->config.num_vgprs); + } + } + + si_shader_dump(sscreen, shader, debug, shader->selector->info.processor); + + /* Upload. */ + r = si_shader_binary_upload(sscreen, shader); + if (r) { + fprintf(stderr, "LLVM failed to upload shader\n"); + return r; + } + + return 0; +} + void si_shader_destroy(struct si_shader *shader) { if (shader->gs_copy_shader) { @@ -4534,5 +5995,6 @@ void si_shader_destroy(struct si_shader *shader) r600_resource_reference(&shader->bo, NULL); - radeon_shader_binary_clean(&shader->binary); + if (!shader->is_binary_shared) + radeon_shader_binary_clean(&shader->binary); } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index dc75e0330e4..ff5c24d8918 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -75,6 +75,8 @@ struct radeon_shader_binary; struct radeon_shader_reloc; +#define SI_MAX_VS_OUTPUTS 40 + #define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */ #define SI_SGPR_CONST_BUFFERS 2 #define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */ @@ -169,7 +171,7 @@ struct radeon_shader_reloc; #define SI_PARAM_SAMPLE_COVERAGE 20 #define SI_PARAM_POS_FIXED_PT 21 -#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) +#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */ struct si_shader; @@ -181,6 +183,11 @@ struct si_shader_selector { struct si_shader *first_variant; /* immutable after the first variant */ struct si_shader *last_variant; /* mutable */ + /* The compiled TGSI shader expecting a prolog and/or epilog (not + * uploaded to a buffer). + */ + struct si_shader *main_shader_part; + struct tgsi_token *tokens; struct pipe_stream_output_info so; struct tgsi_shader_info info; @@ -199,6 +206,7 @@ struct si_shader_selector { unsigned max_gsvs_emit_size; /* PS parameters. */ + unsigned color_attr_index[2]; unsigned db_shader_control; /* Set 0xf or 0x0 (4 bits) per each written output. * ANDed with spi_shader_col_format. @@ -221,37 +229,103 @@ struct si_shader_selector { * With both: LS | HS | ES | GS | VS | PS */ +/* Common VS bits between the shader key and the prolog key. */ +struct si_vs_prolog_bits { + unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; +}; + +/* Common VS bits between the shader key and the epilog key. */ +struct si_vs_epilog_bits { + unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ + /* TODO: + * - skip clipdist, culldist (including clipvertex code) exports based + * on which clip_plane_enable bits are set + * - skip layer, viewport, clipdist, and culldist parameter exports + * if PS doesn't read them + */ +}; + +/* Common TCS bits between the shader key and the epilog key. */ +struct si_tcs_epilog_bits { + unsigned prim_mode:3; +}; + +/* Common PS bits between the shader key and the prolog key. */ +struct si_ps_prolog_bits { + unsigned color_two_side:1; + /* TODO: add a flatshade bit that skips interpolation for colors */ + unsigned poly_stipple:1; + unsigned force_persample_interp:1; + /* TODO: + * - add force_center_interp if MSAA is disabled and centroid or + * sample are present + * - add force_center_interp_bc_optimize to force center interpolation + * based on the bc_optimize SGPR bit if MSAA is enabled, centroid is + * present and sample isn't present. + */ +}; + +/* Common PS bits between the shader key and the epilog key. */ +struct si_ps_epilog_bits { + unsigned spi_shader_col_format; + unsigned color_is_int8:8; + unsigned last_cbuf:3; + unsigned alpha_func:3; + unsigned alpha_to_one:1; + unsigned poly_line_smoothing:1; + unsigned clamp_color:1; +}; + +union si_shader_part_key { + struct { + struct si_vs_prolog_bits states; + unsigned num_input_sgprs:5; + unsigned last_input:4; + } vs_prolog; + struct { + struct si_vs_epilog_bits states; + unsigned prim_id_param_offset:5; + } vs_epilog; + struct { + struct si_tcs_epilog_bits states; + } tcs_epilog; + struct { + struct si_ps_prolog_bits states; + unsigned num_input_sgprs:5; + unsigned num_input_vgprs:5; + /* Color interpolation and two-side color selection. */ + unsigned colors_read:8; /* color input components read */ + unsigned num_interp_inputs:5; /* BCOLOR is at this location */ + unsigned face_vgpr_index:5; + char color_attr_index[2]; + char color_interp_vgpr_index[2]; /* -1 == constant */ + } ps_prolog; + struct { + struct si_ps_epilog_bits states; + unsigned colors_written:8; + unsigned writes_z:1; + unsigned writes_stencil:1; + unsigned writes_samplemask:1; + } ps_epilog; +}; + union si_shader_key { struct { - unsigned spi_shader_col_format; - unsigned color_is_int8:8; - unsigned last_cbuf:3; - unsigned color_two_side:1; - unsigned alpha_func:3; - unsigned alpha_to_one:1; - unsigned poly_stipple:1; - unsigned poly_line_smoothing:1; - unsigned clamp_color:1; - unsigned force_persample_interp:1; + struct si_ps_prolog_bits prolog; + struct si_ps_epilog_bits epilog; } ps; struct { - unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; - /* Mask of "get_unique_index" bits - which outputs are read - * by the next stage (needed by ES). - * This describes how outputs are laid out in memory. */ + struct si_vs_prolog_bits prolog; + struct si_vs_epilog_bits epilog; unsigned as_es:1; /* export shader */ unsigned as_ls:1; /* local shader */ - unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ } vs; struct { - unsigned prim_mode:3; + struct si_tcs_epilog_bits epilog; } tcs; /* tessellation control shader */ struct { - /* Mask of "get_unique_index" bits - which outputs are read - * by the next stage (needed by ES). - * This describes how outputs are laid out in memory. */ + struct si_vs_epilog_bits epilog; /* same as VS */ unsigned as_es:1; /* export shader */ - unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ } tes; /* tessellation evaluation shader */ }; @@ -267,22 +341,42 @@ struct si_shader_config { unsigned rsrc2; }; +/* GCN-specific shader info. */ +struct si_shader_info { + ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS]; + ubyte num_input_sgprs; + ubyte num_input_vgprs; + char face_vgpr_index; + bool uses_instanceid; + ubyte nr_pos_exports; + ubyte nr_param_exports; +}; + struct si_shader { struct si_shader_selector *selector; struct si_shader *next_variant; + struct si_shader_part *prolog; + struct si_shader_part *epilog; + struct si_shader *gs_copy_shader; struct si_pm4_state *pm4; struct r600_resource *bo; struct r600_resource *scratch_bo; union si_shader_key key; + bool is_binary_shared; + + /* The following data is all that's needed for binary shaders. */ struct radeon_shader_binary binary; struct si_shader_config config; + struct si_shader_info info; +}; - unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS]; - bool uses_instanceid; - unsigned nr_pos_exports; - unsigned nr_param_exports; +struct si_shader_part { + struct si_shader_part *next; + union si_shader_part_key key; + struct radeon_shader_binary binary; + struct si_shader_config config; }; static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) @@ -310,14 +404,19 @@ static inline struct si_shader* si_get_vs_state(struct si_context *sctx) static inline bool si_vs_exports_prim_id(struct si_shader *shader) { if (shader->selector->type == PIPE_SHADER_VERTEX) - return shader->key.vs.export_prim_id; + return shader->key.vs.epilog.export_prim_id; else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) - return shader->key.tes.export_prim_id; + return shader->key.tes.epilog.export_prim_id; else return false; } -/* radeonsi_shader.c */ +/* si_shader.c */ +int si_compile_tgsi_shader(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + bool is_monolithic, + struct pipe_debug_callback *debug); int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *shader, struct pipe_debug_callback *debug); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index bf780777b50..2dfdbeb8d8f 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -277,7 +277,7 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a if (sctx->b.family == CHIP_STONEY) { unsigned spi_shader_col_format = sctx->ps_shader.cso ? - sctx->ps_shader.current->key.ps.spi_shader_col_format : 0; + sctx->ps_shader.current->key.ps.epilog.spi_shader_col_format : 0; unsigned sx_ps_downconvert = 0; unsigned sx_blend_opt_epsilon = 0; unsigned sx_blend_opt_control = 0; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f64c4d45f1b..40792cbc1d5 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx, /* si_state_shader.c */ bool si_update_shaders(struct si_context *sctx); void si_init_shader_functions(struct si_context *sctx); +bool si_init_shader_cache(struct si_screen *sscreen); +void si_destroy_shader_cache(struct si_screen *sscreen); /* si_state_draw.c */ void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 77a4e47c809..a6753a7a528 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -32,10 +32,221 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" +#include "util/hash_table.h" +#include "util/u_hash.h" #include "util/u_memory.h" #include "util/u_prim.h" #include "util/u_simple_shaders.h" +/* SHADER_CACHE */ + +/** + * Return the TGSI binary in a buffer. The first 4 bytes contain its size as + * integer. + */ +static void *si_get_tgsi_binary(struct si_shader_selector *sel) +{ + unsigned tgsi_size = tgsi_num_tokens(sel->tokens) * + sizeof(struct tgsi_token); + unsigned size = 4 + tgsi_size + sizeof(sel->so); + char *result = (char*)MALLOC(size); + + if (!result) + return NULL; + + *((uint32_t*)result) = size; + memcpy(result + 4, sel->tokens, tgsi_size); + memcpy(result + 4 + tgsi_size, &sel->so, sizeof(sel->so)); + return result; +} + +/** Copy "data" to "ptr" and return the next dword following copied data. */ +static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned size) +{ + memcpy(ptr, data, size); + ptr += DIV_ROUND_UP(size, 4); + return ptr; +} + +/** Read data from "ptr". Return the next dword following the data. */ +static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size) +{ + memcpy(data, ptr, size); + ptr += DIV_ROUND_UP(size, 4); + return ptr; +} + +/** + * Write the size as uint followed by the data. Return the next dword + * following the copied data. + */ +static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned size) +{ + *ptr++ = size; + return write_data(ptr, data, size); +} + +/** + * Read the size as uint followed by the data. Return both via parameters. + * Return the next dword following the data. + */ +static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size) +{ + *size = *ptr++; + assert(*data == NULL); + *data = malloc(*size); + return read_data(ptr, *data, *size); +} + +/** + * Return the shader binary in a buffer. The first 4 bytes contain its size + * as integer. + */ +static void *si_get_shader_binary(struct si_shader *shader) +{ + /* There is always a size of data followed by the data itself. */ + unsigned relocs_size = shader->binary.reloc_count * + sizeof(shader->binary.relocs[0]); + unsigned disasm_size = strlen(shader->binary.disasm_string) + 1; + unsigned size = + 4 + /* total size */ + 4 + /* CRC32 of the data below */ + align(sizeof(shader->config), 4) + + align(sizeof(shader->info), 4) + + 4 + align(shader->binary.code_size, 4) + + 4 + align(shader->binary.rodata_size, 4) + + 4 + align(relocs_size, 4) + + 4 + align(disasm_size, 4); + void *buffer = CALLOC(1, size); + uint32_t *ptr = (uint32_t*)buffer; + + if (!buffer) + return NULL; + + *ptr++ = size; + ptr++; /* CRC32 is calculated at the end. */ + + ptr = write_data(ptr, &shader->config, sizeof(shader->config)); + ptr = write_data(ptr, &shader->info, sizeof(shader->info)); + ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size); + ptr = write_chunk(ptr, shader->binary.rodata, shader->binary.rodata_size); + ptr = write_chunk(ptr, shader->binary.relocs, relocs_size); + ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size); + assert((char *)ptr - (char *)buffer == size); + + /* Compute CRC32. */ + ptr = (uint32_t*)buffer; + ptr++; + *ptr = util_hash_crc32(ptr + 1, size - 8); + + return buffer; +} + +static bool si_load_shader_binary(struct si_shader *shader, void *binary) +{ + uint32_t *ptr = (uint32_t*)binary; + uint32_t size = *ptr++; + uint32_t crc32 = *ptr++; + unsigned chunk_size; + + if (util_hash_crc32(ptr, size - 8) != crc32) { + fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n"); + return false; + } + + ptr = read_data(ptr, &shader->config, sizeof(shader->config)); + ptr = read_data(ptr, &shader->info, sizeof(shader->info)); + ptr = read_chunk(ptr, (void**)&shader->binary.code, + &shader->binary.code_size); + ptr = read_chunk(ptr, (void**)&shader->binary.rodata, + &shader->binary.rodata_size); + ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size); + shader->binary.reloc_count = chunk_size / sizeof(shader->binary.relocs[0]); + ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, &chunk_size); + + return true; +} + +/** + * Insert a shader into the cache. It's assumed the shader is not in the cache. + * Use si_shader_cache_load_shader before calling this. + * + * Returns false on failure, in which case the tgsi_binary should be freed. + */ +static bool si_shader_cache_insert_shader(struct si_screen *sscreen, + void *tgsi_binary, + struct si_shader *shader) +{ + void *hw_binary = si_get_shader_binary(shader); + + if (!hw_binary) + return false; + + if (_mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary, + hw_binary) == NULL) { + FREE(hw_binary); + return false; + } + + return true; +} + +static bool si_shader_cache_load_shader(struct si_screen *sscreen, + void *tgsi_binary, + struct si_shader *shader) +{ + struct hash_entry *entry = + _mesa_hash_table_search(sscreen->shader_cache, tgsi_binary); + if (!entry) + return false; + + return si_load_shader_binary(shader, entry->data); +} + +static uint32_t si_shader_cache_key_hash(const void *key) +{ + /* The first dword is the key size. */ + return util_hash_crc32(key, *(uint32_t*)key); +} + +static bool si_shader_cache_key_equals(const void *a, const void *b) +{ + uint32_t *keya = (uint32_t*)a; + uint32_t *keyb = (uint32_t*)b; + + /* The first dword is the key size. */ + if (*keya != *keyb) + return false; + + return memcmp(keya, keyb, *keya) == 0; +} + +static void si_destroy_shader_cache_entry(struct hash_entry *entry) +{ + FREE((void*)entry->key); + FREE(entry->data); +} + +bool si_init_shader_cache(struct si_screen *sscreen) +{ + pipe_mutex_init(sscreen->shader_cache_mutex); + sscreen->shader_cache = + _mesa_hash_table_create(NULL, + si_shader_cache_key_hash, + si_shader_cache_key_equals); + return sscreen->shader_cache != NULL; +} + +void si_destroy_shader_cache(struct si_screen *sscreen) +{ + if (sscreen->shader_cache) + _mesa_hash_table_destroy(sscreen->shader_cache, + si_destroy_shader_cache_entry); + pipe_mutex_destroy(sscreen->shader_cache_mutex); +} + +/* SHADER STATES */ + static void si_set_tesseval_regs(struct si_shader *shader, struct si_pm4_state *pm4) { @@ -108,7 +319,7 @@ static void si_shader_ls(struct si_shader *shader) /* We need at least 2 components for LS. * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */ - vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1; + vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; num_user_sgprs = SI_LS_NUM_USER_SGPR; num_sgprs = shader->config.num_sgprs; @@ -181,7 +392,7 @@ static void si_shader_es(struct si_shader *shader) si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); if (shader->selector->type == PIPE_SHADER_VERTEX) { - vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0; + vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0; num_user_sgprs = SI_ES_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = 3; /* all components are needed for TES */ @@ -347,7 +558,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */ num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_VERTEX) { - vgpr_comp_cnt = shader->uses_instanceid ? 3 : (enable_prim_id ? 2 : 0); + vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : (enable_prim_id ? 2 : 0); num_user_sgprs = SI_VS_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = 3; /* all components are needed for TES */ @@ -363,19 +574,19 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) assert(num_sgprs <= 104); /* VS is required to export at least one param. */ - nparams = MAX2(shader->nr_param_exports, 1); + nparams = MAX2(shader->info.nr_param_exports, 1); si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(nparams - 1)); si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(shader->nr_pos_exports > 1 ? + S_02870C_POS1_EXPORT_FORMAT(shader->info.nr_pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS2_EXPORT_FORMAT(shader->nr_pos_exports > 2 ? + S_02870C_POS2_EXPORT_FORMAT(shader->info.nr_pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS3_EXPORT_FORMAT(shader->nr_pos_exports > 3 ? + S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); @@ -415,7 +626,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps) unsigned num_colors = !!(info->colors_read & 0x0f) + !!(info->colors_read & 0xf0); unsigned num_interp = ps->selector->info.num_inputs + - (ps->key.ps.color_two_side ? num_colors : 0); + (ps->key.ps.prolog.color_two_side ? num_colors : 0); assert(num_interp <= 32); return MIN2(num_interp, 32); @@ -423,7 +634,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps) static unsigned si_get_spi_shader_col_format(struct si_shader *shader) { - unsigned value = shader->key.ps.spi_shader_col_format; + unsigned value = shader->key.ps.epilog.spi_shader_col_format; unsigned i, num_targets = (util_last_bit(value) + 3) / 4; /* If the i-th target format is set, all previous target formats must @@ -528,7 +739,7 @@ static void si_shader_ps(struct si_shader *shader) if (!spi_shader_col_format && !info->writes_z && !info->writes_stencil && !info->writes_samplemask && (shader->selector->info.uses_kill || - shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)) + shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)) spi_shader_col_format = V_028714_SPI_SHADER_32_R; si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena); @@ -638,11 +849,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, switch (sel->type) { case PIPE_SHADER_VERTEX: - if (sctx->vertex_elements) - for (i = 0; i < sctx->vertex_elements->count; ++i) - key->vs.instance_divisors[i] = + if (sctx->vertex_elements) { + unsigned count = MIN2(sel->info.num_inputs, + sctx->vertex_elements->count); + for (i = 0; i < count; ++i) + key->vs.prolog.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; - + } if (sctx->tes_shader.cso) key->vs.as_ls = 1; else if (sctx->gs_shader.cso) @@ -650,17 +863,17 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, if (!sctx->gs_shader.cso && sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) - key->vs.export_prim_id = 1; + key->vs.epilog.export_prim_id = 1; break; case PIPE_SHADER_TESS_CTRL: - key->tcs.prim_mode = + key->tcs.epilog.prim_mode = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; break; case PIPE_SHADER_TESS_EVAL: if (sctx->gs_shader.cso) key->tes.as_es = 1; else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) - key->tes.export_prim_id = 1; + key->tes.epilog.export_prim_id = 1; break; case PIPE_SHADER_GEOMETRY: break; @@ -670,13 +883,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && sel->info.colors_written == 0x1) - key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; + key->ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; if (blend) { /* Select the shader color format based on whether * blending or alpha are needed. */ - key->ps.spi_shader_col_format = + key->ps.epilog.spi_shader_col_format = (blend->blend_enable_4bit & blend->need_src_alpha_4bit & sctx->framebuffer.spi_shader_col_format_blend_alpha) | (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & @@ -686,26 +899,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & sctx->framebuffer.spi_shader_col_format); } else - key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format; + key->ps.epilog.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format; /* If alpha-to-coverage is enabled, we have to export alpha * even if there is no color buffer. */ - if (!(key->ps.spi_shader_col_format & 0xf) && + if (!(key->ps.epilog.spi_shader_col_format & 0xf) && blend && blend->alpha_to_coverage) - key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; + key->ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; /* On SI and CIK except Hawaii, the CB doesn't clamp outputs * to the range supported by the type if a channel has less * than 16 bits and the export format is 16_ABGR. */ if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII) - key->ps.color_is_int8 = sctx->framebuffer.color_is_int8; + key->ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8; /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */ - if (!key->ps.last_cbuf) { - key->ps.spi_shader_col_format &= sel->colors_written_4bit; - key->ps.color_is_int8 &= sel->info.colors_written; + if (!key->ps.epilog.last_cbuf) { + key->ps.epilog.spi_shader_col_format &= sel->colors_written_4bit; + key->ps.epilog.color_is_int8 &= sel->info.colors_written; } if (rs) { @@ -714,31 +927,32 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY; bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS; - key->ps.color_two_side = rs->two_side && sel->info.colors_read; + key->ps.prolog.color_two_side = rs->two_side && sel->info.colors_read; if (sctx->queued.named.blend) { - key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one && - rs->multisample_enable && - !sctx->framebuffer.cb0_is_integer; + key->ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one && + rs->multisample_enable && + !sctx->framebuffer.cb0_is_integer; } - key->ps.poly_stipple = rs->poly_stipple_enable && is_poly; - key->ps.poly_line_smoothing = ((is_poly && rs->poly_smooth) || - (is_line && rs->line_smooth)) && - sctx->framebuffer.nr_samples <= 1; - key->ps.clamp_color = rs->clamp_fragment_color; - - key->ps.force_persample_interp = rs->force_persample_interp && - rs->multisample_enable && - sctx->framebuffer.nr_samples > 1 && - sctx->ps_iter_samples > 1 && - (sel->info.uses_persp_center || - sel->info.uses_persp_centroid || - sel->info.uses_linear_center || - sel->info.uses_linear_centroid); + key->ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly; + key->ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) || + (is_line && rs->line_smooth)) && + sctx->framebuffer.nr_samples <= 1; + key->ps.epilog.clamp_color = rs->clamp_fragment_color; + + key->ps.prolog.force_persample_interp = + rs->force_persample_interp && + rs->multisample_enable && + sctx->framebuffer.nr_samples > 1 && + sctx->ps_iter_samples > 1 && + (sel->info.uses_persp_center || + sel->info.uses_persp_centroid || + sel->info.uses_linear_center || + sel->info.uses_linear_centroid); } - key->ps.alpha_func = si_get_alpha_test_func(sctx); + key->ps.epilog.alpha_func = si_get_alpha_test_func(sctx); break; } default: @@ -821,6 +1035,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, const struct pipe_shader_state *state) { struct si_screen *sscreen = (struct si_screen *)ctx->screen; + struct si_context *sctx = (struct si_context*)ctx; struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector); int i; @@ -900,6 +1115,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx, for (i = 0; i < 8; i++) if (sel->info.colors_written & (1 << i)) sel->colors_written_4bit |= 0xf << (4 * i); + + for (i = 0; i < sel->info.num_inputs; i++) { + if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) { + int index = sel->info.input_semantic_index[i]; + sel->color_attr_index[index] = i; + } + } break; } @@ -921,6 +1143,44 @@ static void *si_create_shader_selector(struct pipe_context *ctx, break; } + /* Compile the main shader part for use with a prolog and/or epilog. */ + if (sel->type != PIPE_SHADER_GEOMETRY && + !sscreen->use_monolithic_shaders) { + struct si_shader *shader = CALLOC_STRUCT(si_shader); + void *tgsi_binary; + + if (!shader) + goto error; + + shader->selector = sel; + + tgsi_binary = si_get_tgsi_binary(sel); + + /* Try to load the shader from the shader cache. */ + pipe_mutex_lock(sscreen->shader_cache_mutex); + + if (tgsi_binary && + si_shader_cache_load_shader(sscreen, tgsi_binary, shader)) { + FREE(tgsi_binary); + } else { + /* Compile the shader if it hasn't been loaded from the cache. */ + if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, false, + &sctx->b.debug) != 0) { + FREE(shader); + FREE(tgsi_binary); + pipe_mutex_unlock(sscreen->shader_cache_mutex); + goto error; + } + + if (tgsi_binary && + !si_shader_cache_insert_shader(sscreen, tgsi_binary, shader)) + FREE(tgsi_binary); + } + pipe_mutex_unlock(sscreen->shader_cache_mutex); + + sel->main_shader_part = shader; + } + /* Pre-compilation. */ if (sel->type == PIPE_SHADER_GEOMETRY || sscreen->b.debug_flags & DBG_PRECOMPILE) { @@ -934,27 +1194,29 @@ static void *si_create_shader_selector(struct pipe_context *ctx, */ switch (sel->type) { case PIPE_SHADER_TESS_CTRL: - key.tcs.prim_mode = PIPE_PRIM_TRIANGLES; + key.tcs.epilog.prim_mode = PIPE_PRIM_TRIANGLES; break; case PIPE_SHADER_FRAGMENT: - key.ps.alpha_func = PIPE_FUNC_ALWAYS; + key.ps.epilog.alpha_func = PIPE_FUNC_ALWAYS; for (i = 0; i < 8; i++) if (sel->info.colors_written & (1 << i)) - key.ps.spi_shader_col_format |= + key.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_FP16_ABGR << (i * 4); break; } - if (si_shader_select_with_key(ctx, &state, &key)) { - fprintf(stderr, "radeonsi: can't create a shader\n"); - tgsi_free_tokens(sel->tokens); - FREE(sel); - return NULL; - } + if (si_shader_select_with_key(ctx, &state, &key)) + goto error; } pipe_mutex_init(sel->mutex); return sel; + +error: + fprintf(stderr, "radeonsi: can't create a shader\n"); + tgsi_free_tokens(sel->tokens); + FREE(sel); + return NULL; } /** @@ -1119,6 +1381,9 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) p = c; } + if (sel->main_shader_part) + si_delete_shader(sctx, sel->main_shader_part); + pipe_mutex_destroy(sel->mutex); free(sel->tokens); free(sel); @@ -1144,14 +1409,14 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, for (j = 0; j < vsinfo->num_outputs; j++) { if (name == vsinfo->output_semantic_name[j] && index == vsinfo->output_semantic_index[j]) { - ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[j]); + ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[j]); break; } } if (name == TGSI_SEMANTIC_PRIMID) /* PrimID is written after the last output. */ - ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]); + ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]); else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) { /* No corresponding output found, load defaults into input. * Don't set any other bits. @@ -1191,7 +1456,7 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) } } - if (ps->key.ps.color_two_side) { + if (ps->key.ps.prolog.color_two_side) { unsigned bcol = TGSI_SEMANTIC_BCOLOR; for (i = 0; i < 2; i++) { @@ -1745,8 +2010,8 @@ bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->db_render_state); } - if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) { - sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing; + if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.epilog.poly_line_smoothing) { + sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.epilog.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->msaa_config); if (sctx->b.chip_class == SI) diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 80526ed4d15..fe6cf71a6e5 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -590,6 +590,16 @@ draw_vgpu10(struct svga_hwtnl *hwtnl, } else { /* non-indexed drawing */ + if (svga->state.hw_draw.ib_format != SVGA3D_FORMAT_INVALID) { + /* Unbind previously bound index buffer */ + ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, NULL, + SVGA3D_FORMAT_INVALID, 0); + if (ret != PIPE_OK) + return ret; + svga->state.hw_draw.ib_format = SVGA3D_FORMAT_INVALID; + svga->state.hw_draw.ib = NULL; + } + if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawInstanced(svga->swc, vcount, diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index d5405f8eacf..c9abd49ec1e 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -468,12 +468,15 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader, return 16; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -527,12 +530,15 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader, return 0; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -619,12 +625,15 @@ vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader, return SVGA3D_DX_MAX_SAMPLERS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index 1223e446055..0c5afeb4cf9 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -1782,7 +1782,7 @@ alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, static unsigned alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) { - const unsigned n = emit->info.num_inputs + index; + const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index; assert(index < Elements(emit->system_value_indexes)); emit->system_value_indexes[index] = n; return n; @@ -2446,7 +2446,7 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit) else { assert(emit->unit == PIPE_SHADER_VERTEX); - for (i = 0; i < emit->info.num_inputs; i++) { + for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { unsigned usage_mask = emit->info.input_usage_mask[i]; unsigned index = i; diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 2ce2b3aef75..57f851833e5 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -50,7 +50,8 @@ struct trace_query static inline struct trace_query * -trace_query(struct pipe_query *query) { +trace_query(struct pipe_query *query) +{ return (struct trace_query *)query; } @@ -93,7 +94,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx, return NULL; assert(surface->texture); - if(!surface->texture) + if (!surface->texture) return surface; tr_surf = trace_surface(surface); @@ -105,7 +106,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx, } -static inline void +static void trace_context_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) { @@ -135,7 +136,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe, } -static inline struct pipe_query * +static struct pipe_query * trace_context_create_query(struct pipe_context *_pipe, unsigned query_type, unsigned index) @@ -173,7 +174,7 @@ trace_context_create_query(struct pipe_context *_pipe, } -static inline void +static void trace_context_destroy_query(struct pipe_context *_pipe, struct pipe_query *_query) { @@ -195,7 +196,7 @@ trace_context_destroy_query(struct pipe_context *_pipe, } -static inline boolean +static boolean trace_context_begin_query(struct pipe_context *_pipe, struct pipe_query *query) { @@ -217,7 +218,7 @@ trace_context_begin_query(struct pipe_context *_pipe, } -static inline void +static void trace_context_end_query(struct pipe_context *_pipe, struct pipe_query *query) { @@ -237,7 +238,7 @@ trace_context_end_query(struct pipe_context *_pipe, } -static inline boolean +static boolean trace_context_get_query_result(struct pipe_context *_pipe, struct pipe_query *_query, boolean wait, @@ -272,7 +273,7 @@ trace_context_get_query_result(struct pipe_context *_pipe, } -static inline void * +static void * trace_context_create_blend_state(struct pipe_context *_pipe, const struct pipe_blend_state *state) { @@ -295,7 +296,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_bind_blend_state(struct pipe_context *_pipe, void *state) { @@ -313,7 +314,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_delete_blend_state(struct pipe_context *_pipe, void *state) { @@ -331,7 +332,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe, } -static inline void * +static void * trace_context_create_sampler_state(struct pipe_context *_pipe, const struct pipe_sampler_state *state) { @@ -354,7 +355,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_bind_sampler_states(struct pipe_context *_pipe, unsigned shader, unsigned start, @@ -381,7 +382,7 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe, } -static inline void +static void trace_context_delete_sampler_state(struct pipe_context *_pipe, void *state) { @@ -399,7 +400,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe, } -static inline void * +static void * trace_context_create_rasterizer_state(struct pipe_context *_pipe, const struct pipe_rasterizer_state *state) { @@ -422,7 +423,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_bind_rasterizer_state(struct pipe_context *_pipe, void *state) { @@ -440,7 +441,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_delete_rasterizer_state(struct pipe_context *_pipe, void *state) { @@ -458,7 +459,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe, } -static inline void * +static void * trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, const struct pipe_depth_stencil_alpha_state *state) { @@ -481,7 +482,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, void *state) { @@ -499,7 +500,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, void *state) { @@ -518,7 +519,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, #define TRACE_SHADER_STATE(shader_type) \ - static inline void * \ + static void * \ trace_context_create_##shader_type##_state(struct pipe_context *_pipe, \ const struct pipe_shader_state *state) \ { \ @@ -534,7 +535,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, return result; \ } \ \ - static inline void \ + static void \ trace_context_bind_##shader_type##_state(struct pipe_context *_pipe, \ void *state) \ { \ @@ -547,7 +548,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_call_end(); \ } \ \ - static inline void \ + static void \ trace_context_delete_##shader_type##_state(struct pipe_context *_pipe, \ void *state) \ { \ @@ -570,6 +571,51 @@ TRACE_SHADER_STATE(tes) static inline void * +trace_context_create_compute_state(struct pipe_context *_pipe, + const struct pipe_compute_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_compute_state"); + trace_dump_arg(ptr, pipe); + trace_dump_arg(compute_state, state); + result = pipe->create_compute_state(pipe, state); + trace_dump_ret(ptr, result); + trace_dump_call_end(); + return result; +} + +static inline void +trace_context_bind_compute_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_compute_state"); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + pipe->bind_compute_state(pipe, state); + trace_dump_call_end(); +} + +static inline void +trace_context_delete_compute_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_compute_state"); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + pipe->delete_compute_state(pipe, state); + trace_dump_call_end(); +} + +static void * trace_context_create_vertex_elements_state(struct pipe_context *_pipe, unsigned num_elements, const struct pipe_vertex_element *elements) @@ -597,7 +643,7 @@ trace_context_create_vertex_elements_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_bind_vertex_elements_state(struct pipe_context *_pipe, void *state) { @@ -615,7 +661,7 @@ trace_context_bind_vertex_elements_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_delete_vertex_elements_state(struct pipe_context *_pipe, void *state) { @@ -633,7 +679,7 @@ trace_context_delete_vertex_elements_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_blend_color(struct pipe_context *_pipe, const struct pipe_blend_color *state) { @@ -651,7 +697,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_stencil_ref(struct pipe_context *_pipe, const struct pipe_stencil_ref *state) { @@ -669,7 +715,7 @@ trace_context_set_stencil_ref(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_clip_state(struct pipe_context *_pipe, const struct pipe_clip_state *state) { @@ -686,7 +732,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe, trace_dump_call_end(); } -static inline void +static void trace_context_set_sample_mask(struct pipe_context *_pipe, unsigned sample_mask) { @@ -703,7 +749,7 @@ trace_context_set_sample_mask(struct pipe_context *_pipe, trace_dump_call_end(); } -static inline void +static void trace_context_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, struct pipe_constant_buffer *constant_buffer) @@ -731,7 +777,7 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_framebuffer_state(struct pipe_context *_pipe, const struct pipe_framebuffer_state *state) { @@ -743,9 +789,9 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, /* Unwrap the input state */ memcpy(&unwrapped_state, state, sizeof(unwrapped_state)); - for(i = 0; i < state->nr_cbufs; ++i) + for (i = 0; i < state->nr_cbufs; ++i) unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]); - for(i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) + for (i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) unwrapped_state.cbufs[i] = NULL; unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf); state = &unwrapped_state; @@ -761,7 +807,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_polygon_stipple(struct pipe_context *_pipe, const struct pipe_poly_stipple *state) { @@ -779,7 +825,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_scissor_states(struct pipe_context *_pipe, unsigned start_slot, unsigned num_scissors, @@ -801,7 +847,7 @@ trace_context_set_scissor_states(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_viewport_states(struct pipe_context *_pipe, unsigned start_slot, unsigned num_viewports, @@ -825,8 +871,8 @@ trace_context_set_viewport_states(struct pipe_context *_pipe, static struct pipe_sampler_view * trace_context_create_sampler_view(struct pipe_context *_pipe, - struct pipe_resource *_resource, - const struct pipe_sampler_view *templ) + struct pipe_resource *_resource, + const struct pipe_sampler_view *templ) { struct trace_context *tr_ctx = trace_context(_pipe); struct trace_resource *tr_res = trace_resource(_resource); @@ -868,7 +914,7 @@ trace_context_create_sampler_view(struct pipe_context *_pipe, static void trace_context_sampler_view_destroy(struct pipe_context *_pipe, - struct pipe_sampler_view *_view) + struct pipe_sampler_view *_view) { struct trace_context *tr_ctx = trace_context(_pipe); struct trace_sampler_view *tr_view = trace_sampler_view(_view); @@ -910,7 +956,7 @@ trace_context_create_surface(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, resource); - + trace_dump_arg_begin("surf_tmpl"); trace_dump_surface_template(surf_tmpl, resource->target); trace_dump_arg_end(); @@ -948,7 +994,7 @@ trace_context_surface_destroy(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader, unsigned start, @@ -964,7 +1010,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe, /* remove this when we have pipe->set_sampler_views(..., start, ...) */ assert(start == 0); - for(i = 0; i < num; ++i) { + for (i = 0; i < num; ++i) { tr_view = trace_sampler_view(views[i]); unwrapped_views[i] = tr_view ? tr_view->sampler_view : NULL; } @@ -984,7 +1030,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_vertex_buffers(struct pipe_context *_pipe, unsigned start_slot, unsigned num_buffers, const struct pipe_vertex_buffer *buffers) @@ -1018,7 +1064,7 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_index_buffer(struct pipe_context *_pipe, const struct pipe_index_buffer *ib) { @@ -1043,7 +1089,7 @@ trace_context_set_index_buffer(struct pipe_context *_pipe, } -static inline struct pipe_stream_output_target * +static struct pipe_stream_output_target * trace_context_create_stream_output_target(struct pipe_context *_pipe, struct pipe_resource *res, unsigned buffer_offset, @@ -1073,7 +1119,7 @@ trace_context_create_stream_output_target(struct pipe_context *_pipe, } -static inline void +static void trace_context_stream_output_target_destroy( struct pipe_context *_pipe, struct pipe_stream_output_target *target) @@ -1092,7 +1138,7 @@ trace_context_stream_output_target_destroy( } -static inline void +static void trace_context_set_stream_output_targets(struct pipe_context *_pipe, unsigned num_targets, struct pipe_stream_output_target **tgs, @@ -1114,7 +1160,7 @@ trace_context_set_stream_output_targets(struct pipe_context *_pipe, } -static inline void +static void trace_context_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst, unsigned dst_level, @@ -1149,7 +1195,7 @@ trace_context_resource_copy_region(struct pipe_context *_pipe, } -static inline void +static void trace_context_blit(struct pipe_context *_pipe, const struct pipe_blit_info *_info) { @@ -1191,7 +1237,7 @@ trace_context_flush_resource(struct pipe_context *_pipe, } -static inline void +static void trace_context_clear(struct pipe_context *_pipe, unsigned buffers, const union pipe_color_union *color, @@ -1220,7 +1266,7 @@ trace_context_clear(struct pipe_context *_pipe, } -static inline void +static void trace_context_clear_render_target(struct pipe_context *_pipe, struct pipe_surface *dst, const union pipe_color_union *color, @@ -1247,7 +1293,7 @@ trace_context_clear_render_target(struct pipe_context *_pipe, trace_dump_call_end(); } -static inline void +static void trace_context_clear_depth_stencil(struct pipe_context *_pipe, struct pipe_surface *dst, unsigned clear_flags, @@ -1306,7 +1352,7 @@ trace_context_clear_texture(struct pipe_context *_pipe, trace_dump_call_end(); } -static inline void +static void trace_context_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, unsigned flags) @@ -1364,7 +1410,7 @@ trace_context_generate_mipmap(struct pipe_context *_pipe, } -static inline void +static void trace_context_destroy(struct pipe_context *_pipe) { struct trace_context *tr_ctx = trace_context(_pipe); @@ -1414,7 +1460,7 @@ trace_context_transfer_map(struct pipe_context *_context, *transfer = trace_transfer_create(tr_context, tr_res, result); if (map) { - if(usage & PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_WRITE) { trace_transfer(*transfer)->map = map; } } @@ -1432,9 +1478,7 @@ trace_context_transfer_flush_region( struct pipe_context *_context, struct pipe_context *context = tr_context->pipe; struct pipe_transfer *transfer = tr_transfer->transfer; - context->transfer_flush_region(context, - transfer, - box); + context->transfer_flush_region(context, transfer, box); } static void @@ -1446,7 +1490,7 @@ trace_context_transfer_unmap(struct pipe_context *_context, struct pipe_context *context = tr_ctx->pipe; struct pipe_transfer *transfer = tr_trans->transfer; - if(tr_trans->map) { + if (tr_trans->map) { /* * Fake a transfer_inline_write */ @@ -1525,15 +1569,16 @@ trace_context_transfer_inline_write(struct pipe_context *_context, trace_dump_call_end(); - context->transfer_inline_write(context, resource, - level, usage, box, data, stride, layer_stride); + context->transfer_inline_write(context, resource, level, usage, box, + data, stride, layer_stride); } -static void trace_context_render_condition(struct pipe_context *_context, - struct pipe_query *query, - boolean condition, - uint mode) +static void +trace_context_render_condition(struct pipe_context *_context, + struct pipe_query *query, + boolean condition, + uint mode) { struct trace_context *tr_context = trace_context(_context); struct pipe_context *context = tr_context->pipe; @@ -1553,7 +1598,8 @@ static void trace_context_render_condition(struct pipe_context *_context, } -static void trace_context_texture_barrier(struct pipe_context *_context) +static void +trace_context_texture_barrier(struct pipe_context *_context) { struct trace_context *tr_context = trace_context(_context); struct pipe_context *context = tr_context->pipe; @@ -1568,8 +1614,9 @@ static void trace_context_texture_barrier(struct pipe_context *_context) } -static void trace_context_memory_barrier(struct pipe_context *_context, - unsigned flags) +static void +trace_context_memory_barrier(struct pipe_context *_context, + unsigned flags) { struct trace_context *tr_context = trace_context(_context); struct pipe_context *context = tr_context->pipe; @@ -1583,9 +1630,10 @@ static void trace_context_memory_barrier(struct pipe_context *_context, } -static void trace_context_set_tess_state(struct pipe_context *_context, - const float default_outer_level[4], - const float default_inner_level[2]) +static void +trace_context_set_tess_state(struct pipe_context *_context, + const float default_outer_level[4], + const float default_inner_level[2]) { struct trace_context *tr_context = trace_context(_context); struct pipe_context *context = tr_context->pipe; @@ -1638,12 +1686,31 @@ static void trace_context_set_shader_buffers(struct pipe_context *_context, FREE(_buffers); } +static void trace_context_launch_grid(struct pipe_context *_pipe, + const struct pipe_grid_info *info) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "launch_grid"); -static const struct debug_named_value rbug_blocker_flags[] = { - {"before", 1, NULL}, - {"after", 2, NULL}, - DEBUG_NAMED_VALUE_END -}; + trace_dump_arg(ptr, pipe); + trace_dump_arg(grid_info, info); + + trace_dump_trace_flush(); + + if (info->indirect) { + struct pipe_grid_info _info; + + memcpy(&_info, info, sizeof(_info)); + _info.indirect = trace_resource_unwrap(tr_ctx, _info.indirect); + pipe->launch_grid(pipe, &_info); + } else { + pipe->launch_grid(pipe, info); + } + + trace_dump_call_end(); +} struct pipe_context * trace_context_create(struct trace_screen *tr_scr, @@ -1654,7 +1721,7 @@ trace_context_create(struct trace_screen *tr_scr, if (!pipe) goto error1; - if(!trace_enabled()) + if (!trace_enabled()) goto error1; tr_ctx = CALLOC_STRUCT(trace_context); @@ -1703,6 +1770,9 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(create_tes_state); TR_CTX_INIT(bind_tes_state); TR_CTX_INIT(delete_tes_state); + TR_CTX_INIT(create_compute_state); + TR_CTX_INIT(bind_compute_state); + TR_CTX_INIT(delete_compute_state); TR_CTX_INIT(create_vertex_elements_state); TR_CTX_INIT(bind_vertex_elements_state); TR_CTX_INIT(delete_vertex_elements_state); @@ -1738,6 +1808,7 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(memory_barrier); TR_CTX_INIT(set_tess_state); TR_CTX_INIT(set_shader_buffers); + TR_CTX_INIT(launch_grid); TR_CTX_INIT(transfer_map); TR_CTX_INIT(transfer_unmap); @@ -1756,7 +1827,7 @@ error1: /** - * Sanity checker: check that the given context really is a + * Sanity checker: check that the given context really is a * trace context (and not the wrapped driver's context). */ void @@ -1765,4 +1836,3 @@ trace_context_check(const struct pipe_context *pipe) struct trace_context *tr_ctx = (struct trace_context *) pipe; assert(tr_ctx->base.destroy == trace_context_destroy); } - diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index cfbf53cf767..0627e5ab5d7 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -305,6 +305,36 @@ void trace_dump_shader_state(const struct pipe_shader_state *state) } +void trace_dump_compute_state(const struct pipe_compute_state *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if (!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_compute_state"); + + trace_dump_member_begin("prog"); + if (state->prog) { + static char str[64 * 1024]; + tgsi_dump_str(state->prog, 0, str, sizeof(str)); + trace_dump_string(str); + } else { + trace_dump_null(); + } + trace_dump_member_end(); + + trace_dump_member(uint, state, req_local_mem); + trace_dump_member(uint, state, req_private_mem); + trace_dump_member(uint, state, req_input_mem); + + trace_dump_struct_end(); +} + + void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state) { unsigned i; @@ -864,3 +894,33 @@ trace_dump_query_result(unsigned query_type, break; } } + +void trace_dump_grid_info(const struct pipe_grid_info *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if (!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_grid_info"); + + trace_dump_member(uint, state, pc); + trace_dump_member(ptr, state, input); + + trace_dump_member_begin("block"); + trace_dump_array(uint, state->block, Elements(state->block)); + trace_dump_member_end(); + + trace_dump_member_begin("grid"); + trace_dump_array(uint, state->grid, Elements(state->grid)); + trace_dump_member_end(); + + trace_dump_member(ptr, state, indirect); + trace_dump_member(uint, state, indirect_offset); + + trace_dump_struct_end(); +} + diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 4f4ade155bc..ee0720d8ac8 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -50,6 +50,8 @@ void trace_dump_token(const struct tgsi_token *token); void trace_dump_shader_state(const struct pipe_shader_state *state); +void trace_dump_compute_state(const struct pipe_compute_state *state); + void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state); void trace_dump_blend_state(const struct pipe_blend_state *state); @@ -87,4 +89,6 @@ void trace_dump_blit_info(const struct pipe_blit_info *); void trace_dump_query_result(unsigned query_type, const union pipe_query_result *result); +void trace_dump_grid_info(const struct pipe_grid_info *state); + #endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index ff057e2a4a4..0612109c800 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -173,6 +173,30 @@ trace_screen_get_paramf(struct pipe_screen *_screen, } +static int +trace_screen_get_compute_param(struct pipe_screen *_screen, + enum pipe_compute_cap param, void *data) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + int result; + + trace_dump_call_begin("pipe_screen", "get_compute_param"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(int, param); + trace_dump_arg(ptr, data); + + result = screen->get_compute_param(screen, param, data); + + trace_dump_ret(int, result); + + trace_dump_call_end(); + + return result; +} + + static boolean trace_screen_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, @@ -472,6 +496,7 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.get_param = trace_screen_get_param; tr_scr->base.get_shader_param = trace_screen_get_shader_param; tr_scr->base.get_paramf = trace_screen_get_paramf; + tr_scr->base.get_compute_param = trace_screen_get_compute_param; tr_scr->base.is_format_supported = trace_screen_is_format_supported; assert(screen->context_create); tr_scr->base.context_create = trace_screen_context_create; diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index efbb69b71a7..f9eb0e151c5 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -193,6 +193,7 @@ qir_is_raw_mov(struct qinst *inst) return ((inst->op == QOP_MOV || inst->op == QOP_FMOV || inst->op == QOP_MMOV) && + inst->cond == QPU_COND_ALWAYS && !inst->dst.pack && !inst->src[0].pack); } diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c index 2f280c54523..ee1e9aafbb9 100644 --- a/src/gallium/drivers/vc4/vc4_qir_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c @@ -575,7 +575,7 @@ void qir_schedule_instructions(struct vc4_compile *c) { void *mem_ctx = ralloc_context(NULL); - struct schedule_state state = { 0 }; + struct schedule_state state = { { 0 } }; if (debug) { fprintf(stderr, "Pre-schedule instructions\n"); diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index b06702afea2..450b97fc014 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -312,7 +312,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; } - bool handled_qinst_cond = true; + bool handled_qinst_cond = false; switch (qinst->op) { case QOP_RCP: diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index b19d31af6ac..a4b3efcfda3 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -357,9 +357,12 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return VC4_MAX_TEXTURE_SAMPLERS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; default: fprintf(stderr, "unknown shader param %d\n", param); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 6c95b7b2178..ee68fdd6f6f 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -48,6 +48,7 @@ struct pipe_constant_buffer; struct pipe_debug_callback; struct pipe_depth_stencil_alpha_state; struct pipe_draw_info; +struct pipe_grid_info; struct pipe_fence_handle; struct pipe_framebuffer_state; struct pipe_image_view; @@ -312,14 +313,14 @@ struct pipe_context { * \param shader selects shader stage * \param start_slot first image slot to bind. * \param count number of consecutive images to bind. - * \param buffers array of pointers to the images to bind, it + * \param buffers array of the images to bind, it * should contain at least \a count elements * unless it's NULL, in which case no images will * be bound. */ void (*set_shader_images)(struct pipe_context *, unsigned shader, unsigned start_slot, unsigned count, - struct pipe_image_view **images); + struct pipe_image_view *images); void (*set_vertex_buffers)( struct pipe_context *, unsigned start_slot, @@ -477,16 +478,6 @@ struct pipe_context { void (*surface_destroy)(struct pipe_context *ctx, struct pipe_surface *); - /** - * Create an image view into a buffer or texture to be used with load, - * store, and atomic instructions by a shader stage. - */ - struct pipe_image_view * (*create_image_view)(struct pipe_context *ctx, - struct pipe_resource *texture, - const struct pipe_image_view *templat); - - void (*image_view_destroy)(struct pipe_context *ctx, - struct pipe_image_view *view); /** * Map a resource. @@ -618,23 +609,9 @@ struct pipe_context { /** * Launch the compute kernel starting from instruction \a pc of the * currently bound compute program. - * - * \a grid_layout and \a block_layout are arrays of size \a - * PIPE_COMPUTE_CAP_GRID_DIMENSION that determine the layout of the - * grid (in block units) and working block (in thread units) to be - * used, respectively. - * - * \a pc For drivers that use PIPE_SHADER_IR_LLVM as their prefered IR, - * this value will be the index of the kernel in the opencl.kernels - * metadata list. - * - * \a input will be used to initialize the INPUT resource, and it - * should point to a buffer of at least - * pipe_compute_state::req_input_mem bytes. */ void (*launch_grid)(struct pipe_context *context, - const uint *block_layout, const uint *grid_layout, - uint32_t pc, const void *input); + const struct pipe_grid_info *info); /*@}*/ /** diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index b01f6ea3dcb..010be62e638 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -725,6 +725,8 @@ enum pipe_shader_cap PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE, PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS, + PIPE_SHADER_CAP_SUPPORTED_IRS, + PIPE_SHADER_CAP_MAX_SHADER_IMAGES, }; /** diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 6539017b77c..9d4a96a5a7e 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -79,6 +79,7 @@ enum tgsi_file_type { TGSI_FILE_IMAGE =10, TGSI_FILE_SAMPLER_VIEW =11, TGSI_FILE_BUFFER =12, + TGSI_FILE_MEMORY =13, TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */ }; @@ -129,7 +130,8 @@ struct tgsi_declaration unsigned Local : 1; /**< optimize as subroutine local variable? */ unsigned Array : 1; /**< extra array info? */ unsigned Atomic : 1; /**< atomic only? for TGSI_FILE_BUFFER */ - unsigned Padding : 5; + unsigned Shared : 1; /**< shared storage for TGSI_FILE_MEMORY */ + unsigned Padding : 4; }; struct tgsi_declaration_range diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index ed62a33ad72..c568c483940 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -393,14 +393,12 @@ struct pipe_sampler_view /** - * A view into a writable buffer or texture that can be bound to a shader + * A description of a writable buffer or texture that can be bound to a shader * stage. */ struct pipe_image_view { - struct pipe_reference reference; struct pipe_resource *resource; /**< resource into which this is a view */ - struct pipe_context *context; /**< context this view belongs to */ enum pipe_format format; /**< typed PIPE_FORMAT_x */ union { @@ -678,6 +676,45 @@ struct pipe_blit_info boolean alpha_blend; /* dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a) */ }; +/** + * Information to describe a launch_grid call. + */ +struct pipe_grid_info +{ + /** + * For drivers that use PIPE_SHADER_IR_LLVM as their prefered IR, this value + * will be the index of the kernel in the opencl.kernels metadata list. + */ + uint32_t pc; + + /** + * Will be used to initialize the INPUT resource, and it should point to a + * buffer of at least pipe_compute_state::req_input_mem bytes. + */ + void *input; + + /** + * Determine the layout of the working block (in thread units) to be used. + */ + uint block[3]; + + /** + * Determine the layout of the grid (in block units) to be used. + */ + uint grid[3]; + + /* Indirect compute parameters resource: If not NULL, block sizes are taken + * from this buffer instead, which is laid out as follows: + * + * struct { + * uint32_t num_blocks_x; + * uint32_t num_blocks_y; + * uint32_t num_blocks_z; + * }; + */ + struct pipe_resource *indirect; + unsigned indirect_offset; /**< must be 4 byte aligned */ +}; /** * Structure used as a header for serialized LLVM programs. diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp index a226ec1a752..8396be91553 100644 --- a/src/gallium/state_trackers/clover/core/kernel.cpp +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -55,6 +55,7 @@ kernel::launch(command_queue &q, const auto reduced_grid_size = map(divides(), grid_size, block_size); void *st = exec.bind(&q, grid_offset); + struct pipe_grid_info info; // The handles are created during exec_context::bind(), so we need make // sure to call exec_context::bind() before retrieving them. @@ -74,11 +75,13 @@ kernel::launch(command_queue &q, q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), exec.g_buffers.data(), g_handles.data()); - q.pipe->launch_grid(q.pipe, - pad_vector(q, block_size, 1).data(), - pad_vector(q, reduced_grid_size, 1).data(), - find(name_equals(_name), m.syms).offset, - exec.input.data()); + // Fill information for the launch_grid() call. + copy(pad_vector(q, block_size, 1), info.block); + copy(pad_vector(q, reduced_grid_size, 1), info.grid); + info.pc = find(name_equals(_name), m.syms).offset; + info.input = exec.input.data(); + + q.pipe->launch_grid(q.pipe, &info); q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); diff --git a/src/gallium/state_trackers/nine/Makefile.sources b/src/gallium/state_trackers/nine/Makefile.sources index 8d178d4b18f..05eb1655376 100644 --- a/src/gallium/state_trackers/nine/Makefile.sources +++ b/src/gallium/state_trackers/nine/Makefile.sources @@ -32,6 +32,7 @@ C_SOURCES := \ nineexoverlayextension.h \ nine_ff.c \ nine_ff.h \ + nine_flags.h \ nine_helpers.c \ nine_helpers.h \ nine_lock.c \ diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c index 8428b1bd7eb..48e1e44c1bf 100644 --- a/src/gallium/state_trackers/nine/adapter9.c +++ b/src/gallium/state_trackers/nine/adapter9.c @@ -146,7 +146,7 @@ NineAdapter9_GetScreen( struct NineAdapter9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_GetAdapterIdentifier( struct NineAdapter9 *This, DWORD Flags, D3DADAPTER_IDENTIFIER9 *pIdentifier ) @@ -182,7 +182,7 @@ backbuffer_format( D3DFORMAT dfmt, return FALSE; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CheckDeviceType( struct NineAdapter9 *This, D3DDEVTYPE DevType, D3DFORMAT AdapterFormat, @@ -207,11 +207,11 @@ NineAdapter9_CheckDeviceType( struct NineAdapter9 *This, dfmt = d3d9_to_pipe_format_checked(screen, AdapterFormat, PIPE_TEXTURE_2D, 1, PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SHARED, FALSE); + PIPE_BIND_SHARED, FALSE, FALSE); bfmt = d3d9_to_pipe_format_checked(screen, BackBufferFormat, PIPE_TEXTURE_2D, 1, PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SHARED, FALSE); + PIPE_BIND_SHARED, FALSE, FALSE); if (dfmt == PIPE_FORMAT_NONE || bfmt == PIPE_FORMAT_NONE) { DBG("Unsupported Adapter/BackBufferFormat.\n"); return D3DERR_NOTAVAILABLE; @@ -241,7 +241,7 @@ display_format( D3DFORMAT fmt, return FALSE; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, @@ -270,7 +270,7 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This, return hr; pf = d3d9_to_pipe_format_checked(screen, AdapterFormat, PIPE_TEXTURE_2D, 0, PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SHARED, FALSE); + PIPE_BIND_SHARED, FALSE, FALSE); if (pf == PIPE_FORMAT_NONE) { DBG("AdapterFormat %s not available.\n", d3dformat_to_string(AdapterFormat)); @@ -332,14 +332,16 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This, srgb = (Usage & (D3DUSAGE_QUERY_SRGBREAD | D3DUSAGE_QUERY_SRGBWRITE)) != 0; - pf = d3d9_to_pipe_format_checked(screen, CheckFormat, target, 0, bind, srgb); + pf = d3d9_to_pipe_format_checked(screen, CheckFormat, target, + 0, bind, srgb, FALSE); if (pf == PIPE_FORMAT_NONE) { DBG("NOT AVAILABLE\n"); return D3DERR_NOTAVAILABLE; } - /* we support ATI1 and ATI2 hack only for 2D textures */ - if (RType != D3DRTYPE_TEXTURE && (CheckFormat == D3DFMT_ATI1 || CheckFormat == D3DFMT_ATI2)) + /* we support ATI1 and ATI2 hack only for 2D and Cube textures */ + if (RType != D3DRTYPE_TEXTURE && RType != D3DRTYPE_CUBETEXTURE && + (CheckFormat == D3DFMT_ATI1 || CheckFormat == D3DFMT_ATI2)) return D3DERR_NOTAVAILABLE; /* if (Usage & D3DUSAGE_NONSECURE) { don't know the implications of this } */ /* if (Usage & D3DUSAGE_SOFTWAREPROCESSING) { we can always support this } */ @@ -349,7 +351,7 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CheckDeviceMultiSampleType( struct NineAdapter9 *This, D3DDEVTYPE DeviceType, D3DFORMAT SurfaceFormat, @@ -378,7 +380,7 @@ NineAdapter9_CheckDeviceMultiSampleType( struct NineAdapter9 *This, PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_RENDER_TARGET; pf = d3d9_to_pipe_format_checked(screen, SurfaceFormat, PIPE_TEXTURE_2D, - MultiSampleType, bind, FALSE); + MultiSampleType, bind, FALSE, FALSE); if (pf == PIPE_FORMAT_NONE) { DBG("%s with %u samples not available.\n", @@ -392,7 +394,7 @@ NineAdapter9_CheckDeviceMultiSampleType( struct NineAdapter9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CheckDepthStencilMatch( struct NineAdapter9 *This, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, @@ -417,16 +419,16 @@ NineAdapter9_CheckDepthStencilMatch( struct NineAdapter9 *This, dfmt = d3d9_to_pipe_format_checked(screen, AdapterFormat, PIPE_TEXTURE_2D, 0, PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SHARED, FALSE); + PIPE_BIND_SHARED, FALSE, FALSE); bfmt = d3d9_to_pipe_format_checked(screen, RenderTargetFormat, PIPE_TEXTURE_2D, 0, - PIPE_BIND_RENDER_TARGET, FALSE); + PIPE_BIND_RENDER_TARGET, FALSE, FALSE); if (RenderTargetFormat == D3DFMT_NULL) bfmt = dfmt; zsfmt = d3d9_to_pipe_format_checked(screen, DepthStencilFormat, PIPE_TEXTURE_2D, 0, d3d9_get_pipe_depth_format_bindings(DepthStencilFormat), - FALSE); + FALSE, FALSE); if (dfmt == PIPE_FORMAT_NONE || bfmt == PIPE_FORMAT_NONE || zsfmt == PIPE_FORMAT_NONE) { @@ -436,7 +438,7 @@ NineAdapter9_CheckDepthStencilMatch( struct NineAdapter9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CheckDeviceFormatConversion( struct NineAdapter9 *This, D3DDEVTYPE DeviceType, D3DFORMAT SourceFormat, @@ -461,10 +463,10 @@ NineAdapter9_CheckDeviceFormatConversion( struct NineAdapter9 *This, dfmt = d3d9_to_pipe_format_checked(screen, TargetFormat, PIPE_TEXTURE_2D, 1, PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SHARED, FALSE); + PIPE_BIND_SHARED, FALSE, FALSE); bfmt = d3d9_to_pipe_format_checked(screen, SourceFormat, PIPE_TEXTURE_2D, 1, PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SHARED, FALSE); + PIPE_BIND_SHARED, FALSE, FALSE); if (dfmt == PIPE_FORMAT_NONE || bfmt == PIPE_FORMAT_NONE) { DBG("%s to %s not supported.\n", @@ -476,7 +478,7 @@ NineAdapter9_CheckDeviceFormatConversion( struct NineAdapter9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, D3DDEVTYPE DeviceType, D3DCAPS9 *pCaps ) @@ -932,7 +934,7 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CreateDevice( struct NineAdapter9 *This, UINT RealAdapter, D3DDEVTYPE DeviceType, @@ -992,7 +994,7 @@ NineAdapter9_CreateDevice( struct NineAdapter9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CreateDeviceEx( struct NineAdapter9 *This, UINT RealAdapter, D3DDEVTYPE DeviceType, diff --git a/src/gallium/state_trackers/nine/adapter9.h b/src/gallium/state_trackers/nine/adapter9.h index 2129ec8edc0..aaf7435fcda 100644 --- a/src/gallium/state_trackers/nine/adapter9.h +++ b/src/gallium/state_trackers/nine/adapter9.h @@ -67,19 +67,19 @@ NineAdapter9_ctor( struct NineAdapter9 *This, void NineAdapter9_dtor( struct NineAdapter9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_GetAdapterIdentifier( struct NineAdapter9 *This, DWORD Flags, D3DADAPTER_IDENTIFIER9 *pIdentifier ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CheckDeviceType( struct NineAdapter9 *This, D3DDEVTYPE DevType, D3DFORMAT AdapterFormat, D3DFORMAT BackBufferFormat, BOOL bWindowed ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, @@ -87,7 +87,7 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This, D3DRESOURCETYPE RType, D3DFORMAT CheckFormat ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CheckDeviceMultiSampleType( struct NineAdapter9 *This, D3DDEVTYPE DeviceType, D3DFORMAT SurfaceFormat, @@ -95,25 +95,25 @@ NineAdapter9_CheckDeviceMultiSampleType( struct NineAdapter9 *This, D3DMULTISAMPLE_TYPE MultiSampleType, DWORD *pQualityLevels ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CheckDepthStencilMatch( struct NineAdapter9 *This, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, D3DFORMAT RenderTargetFormat, D3DFORMAT DepthStencilFormat ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CheckDeviceFormatConversion( struct NineAdapter9 *This, D3DDEVTYPE DeviceType, D3DFORMAT SourceFormat, D3DFORMAT TargetFormat ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, D3DDEVTYPE DeviceType, D3DCAPS9 *pCaps ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CreateDevice( struct NineAdapter9 *This, UINT RealAdapter, D3DDEVTYPE DeviceType, @@ -124,7 +124,7 @@ NineAdapter9_CreateDevice( struct NineAdapter9 *This, ID3DPresentGroup *pPresentationGroup, IDirect3DDevice9 **ppReturnedDeviceInterface ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAdapter9_CreateDeviceEx( struct NineAdapter9 *This, UINT RealAdapter, D3DDEVTYPE DeviceType, diff --git a/src/gallium/state_trackers/nine/authenticatedchannel9.c b/src/gallium/state_trackers/nine/authenticatedchannel9.c index 44ad87c956f..45fca3a2975 100644 --- a/src/gallium/state_trackers/nine/authenticatedchannel9.c +++ b/src/gallium/state_trackers/nine/authenticatedchannel9.c @@ -24,14 +24,14 @@ #define DBG_CHANNEL DBG_AUTHENTICATEDCHANNEL -HRESULT WINAPI +HRESULT NINE_WINAPI NineAuthenticatedChannel9_GetCertificateSize( struct NineAuthenticatedChannel9 *This, UINT *pCertificateSize ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAuthenticatedChannel9_GetCertificate( struct NineAuthenticatedChannel9 *This, UINT CertifacteSize, BYTE *ppCertificate ) @@ -39,7 +39,7 @@ NineAuthenticatedChannel9_GetCertificate( struct NineAuthenticatedChannel9 *This STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAuthenticatedChannel9_NegotiateKeyExchange( struct NineAuthenticatedChannel9 *This, UINT DataSize, void *pData ) @@ -47,7 +47,7 @@ NineAuthenticatedChannel9_NegotiateKeyExchange( struct NineAuthenticatedChannel9 STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAuthenticatedChannel9_Query( struct NineAuthenticatedChannel9 *This, UINT InputSize, const void *pInput, @@ -57,7 +57,7 @@ NineAuthenticatedChannel9_Query( struct NineAuthenticatedChannel9 *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAuthenticatedChannel9_Configure( struct NineAuthenticatedChannel9 *This, UINT InputSize, const void *pInput, diff --git a/src/gallium/state_trackers/nine/authenticatedchannel9.h b/src/gallium/state_trackers/nine/authenticatedchannel9.h index 63cb2269db4..b18848235c8 100644 --- a/src/gallium/state_trackers/nine/authenticatedchannel9.h +++ b/src/gallium/state_trackers/nine/authenticatedchannel9.h @@ -35,28 +35,28 @@ NineAuthenticatedChannel9( void *data ) return (struct NineAuthenticatedChannel9 *)data; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineAuthenticatedChannel9_GetCertificateSize( struct NineAuthenticatedChannel9 *This, UINT *pCertificateSize ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAuthenticatedChannel9_GetCertificate( struct NineAuthenticatedChannel9 *This, UINT CertifacteSize, BYTE *ppCertificate ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAuthenticatedChannel9_NegotiateKeyExchange( struct NineAuthenticatedChannel9 *This, UINT DataSize, void *pData ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAuthenticatedChannel9_Query( struct NineAuthenticatedChannel9 *This, UINT InputSize, const void *pInput, UINT OutputSize, void *pOutput ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineAuthenticatedChannel9_Configure( struct NineAuthenticatedChannel9 *This, UINT InputSize, const void *pInput, diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index 7a0959a8f3e..c4eb813e9f8 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -109,7 +109,7 @@ NineBaseTexture9_dtor( struct NineBaseTexture9 *This ) NineResource9_dtor(&This->base); } -DWORD WINAPI +DWORD NINE_WINAPI NineBaseTexture9_SetLOD( struct NineBaseTexture9 *This, DWORD LODNew ) { @@ -130,7 +130,7 @@ NineBaseTexture9_SetLOD( struct NineBaseTexture9 *This, return old; } -DWORD WINAPI +DWORD NINE_WINAPI NineBaseTexture9_GetLOD( struct NineBaseTexture9 *This ) { DBG("This=%p\n", This); @@ -138,7 +138,7 @@ NineBaseTexture9_GetLOD( struct NineBaseTexture9 *This ) return This->managed.lod; } -DWORD WINAPI +DWORD NINE_WINAPI NineBaseTexture9_GetLevelCount( struct NineBaseTexture9 *This ) { DBG("This=%p\n", This); @@ -148,7 +148,7 @@ NineBaseTexture9_GetLevelCount( struct NineBaseTexture9 *This ) return This->base.info.last_level + 1; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This, D3DTEXTUREFILTERTYPE FilterType ) { @@ -165,7 +165,7 @@ NineBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This, return D3D_OK; } -D3DTEXTUREFILTERTYPE WINAPI +D3DTEXTUREFILTERTYPE NINE_WINAPI NineBaseTexture9_GetAutoGenFilterType( struct NineBaseTexture9 *This ) { DBG("This=%p\n", This); @@ -383,7 +383,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) return D3D_OK; } -void WINAPI +void NINE_WINAPI NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ) { struct pipe_resource *resource; @@ -580,7 +580,7 @@ NineBaseTexture9_UpdateSamplerView( struct NineBaseTexture9 *This, return This->view ? D3D_OK : D3DERR_DRIVERINTERNALERROR; } -void WINAPI +void NINE_WINAPI NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This ) { DBG("This=%p\n", This); diff --git a/src/gallium/state_trackers/nine/basetexture9.h b/src/gallium/state_trackers/nine/basetexture9.h index b19a62195fc..980c4a556b2 100644 --- a/src/gallium/state_trackers/nine/basetexture9.h +++ b/src/gallium/state_trackers/nine/basetexture9.h @@ -72,27 +72,27 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This, void NineBaseTexture9_dtor( struct NineBaseTexture9 *This ); -DWORD WINAPI +DWORD NINE_WINAPI NineBaseTexture9_SetLOD( struct NineBaseTexture9 *This, DWORD LODNew ); -DWORD WINAPI +DWORD NINE_WINAPI NineBaseTexture9_GetLOD( struct NineBaseTexture9 *This ); -DWORD WINAPI +DWORD NINE_WINAPI NineBaseTexture9_GetLevelCount( struct NineBaseTexture9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This, D3DTEXTUREFILTERTYPE FilterType ); -D3DTEXTUREFILTERTYPE WINAPI +D3DTEXTUREFILTERTYPE NINE_WINAPI NineBaseTexture9_GetAutoGenFilterType( struct NineBaseTexture9 *This ); -void WINAPI +void NINE_WINAPI NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ); -void WINAPI +void NINE_WINAPI NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This ); void diff --git a/src/gallium/state_trackers/nine/buffer9.c b/src/gallium/state_trackers/nine/buffer9.c index b4b91ec2a02..e066fc59f45 100644 --- a/src/gallium/state_trackers/nine/buffer9.c +++ b/src/gallium/state_trackers/nine/buffer9.c @@ -93,7 +93,26 @@ NineBuffer9_ctor( struct NineBuffer9 *This, hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE, Type, Pool, Usage); - return hr; + + if (FAILED(hr)) + return hr; + + if (Pool == D3DPOOL_MANAGED) { + This->managed.data = align_malloc( + nine_format_get_level_alloc_size(This->base.info.format, + Size, 1, 0), 32); + if (!This->managed.data) + return E_OUTOFMEMORY; + memset(This->managed.data, 0, Size); + This->managed.dirty = TRUE; + u_box_1d(0, Size, &This->managed.dirty_box); + list_inithead(&This->managed.list); + list_inithead(&This->managed.list2); + list_add(&This->managed.list, &pParams->device->update_buffers); + list_add(&This->managed.list2, &pParams->device->managed_buffers); + } + + return D3D_OK; } void @@ -106,6 +125,15 @@ NineBuffer9_dtor( struct NineBuffer9 *This ) FREE(This->maps); } + if (This->base.pool == D3DPOOL_MANAGED) { + if (This->managed.data) + align_free(This->managed.data); + if (This->managed.list.prev != NULL && This->managed.list.next != NULL) + list_del(&This->managed.list); + if (This->managed.list2.prev != NULL && This->managed.list2.next != NULL) + list_del(&This->managed.list2); + } + NineResource9_dtor(&This->base); } @@ -115,7 +143,7 @@ NineBuffer9_GetResource( struct NineBuffer9 *This ) return NineResource9_GetResource(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineBuffer9_Lock( struct NineBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, @@ -138,6 +166,28 @@ NineBuffer9_Lock( struct NineBuffer9 *This, D3DLOCK_READONLY | D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL); + if (SizeToLock == 0) { + SizeToLock = This->size - OffsetToLock; + user_warn(OffsetToLock != 0); + } + + u_box_1d(OffsetToLock, SizeToLock, &box); + + if (This->base.pool == D3DPOOL_MANAGED) { + if (!This->managed.dirty) { + assert(LIST_IS_EMPTY(&This->managed.list)); + list_add(&This->managed.list, &This->base.base.device->update_buffers); + This->managed.dirty = TRUE; + This->managed.dirty_box = box; + } else { + u_box_union_2d(&This->managed.dirty_box, &This->managed.dirty_box, &box); + } + *ppbData = (char *)This->managed.data + OffsetToLock; + DBG("returning pointer %p\n", *ppbData); + This->nmaps++; + return D3D_OK; + } + if (This->nmaps == This->maxmaps) { struct pipe_transfer **newmaps = REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps, @@ -149,13 +199,6 @@ NineBuffer9_Lock( struct NineBuffer9 *This, This->maps = newmaps; } - if (SizeToLock == 0) { - SizeToLock = This->size - OffsetToLock; - user_warn(OffsetToLock != 0); - } - - u_box_1d(OffsetToLock, SizeToLock, &box); - data = This->pipe->transfer_map(This->pipe, This->base.resource, 0, usage, &box, &This->maps[This->nmaps]); @@ -178,12 +221,28 @@ NineBuffer9_Lock( struct NineBuffer9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineBuffer9_Unlock( struct NineBuffer9 *This ) { DBG("This=%p\n", This); user_assert(This->nmaps > 0, D3DERR_INVALIDCALL); - This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]); + if (This->base.pool != D3DPOOL_MANAGED) + This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]); + else + This->nmaps--; return D3D_OK; } + +void +NineBuffer9_SetDirty( struct NineBuffer9 *This ) +{ + assert(This->base.pool == D3DPOOL_MANAGED); + + if (!This->managed.dirty) { + assert(LIST_IS_EMPTY(&This->managed.list)); + list_add(&This->managed.list, &This->base.base.device->update_buffers); + This->managed.dirty = TRUE; + } + u_box_1d(0, This->size, &This->managed.dirty_box); +} diff --git a/src/gallium/state_trackers/nine/buffer9.h b/src/gallium/state_trackers/nine/buffer9.h index 1afd9a996ea..8bdb4326a4c 100644 --- a/src/gallium/state_trackers/nine/buffer9.h +++ b/src/gallium/state_trackers/nine/buffer9.h @@ -25,6 +25,9 @@ #define _NINE_BUFFER9_H_ #include "resource9.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/list.h" struct pipe_screen; struct pipe_context; @@ -39,6 +42,15 @@ struct NineBuffer9 struct pipe_transfer **maps; int nmaps, maxmaps; UINT size; + + /* Specific to managed buffers */ + struct { + void *data; + boolean dirty; + struct pipe_box dirty_box; + struct list_head list; /* for update_buffers */ + struct list_head list2; /* for managed_buffers */ + } managed; }; static inline struct NineBuffer9 * NineBuffer9( void *data ) @@ -60,14 +72,30 @@ NineBuffer9_dtor( struct NineBuffer9 *This ); struct pipe_resource * NineBuffer9_GetResource( struct NineBuffer9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineBuffer9_Lock( struct NineBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineBuffer9_Unlock( struct NineBuffer9 *This ); +static inline void +NineBuffer9_Upload( struct NineBuffer9 *This ) +{ + struct pipe_context *pipe = This->pipe; + + assert(This->base.pool == D3DPOOL_MANAGED && This->managed.dirty); + pipe->transfer_inline_write(pipe, This->base.resource, 0, 0, + &This->managed.dirty_box, + (char *)This->managed.data + This->managed.dirty_box.x, + This->size, This->size); + This->managed.dirty = FALSE; +} + +void +NineBuffer9_SetDirty( struct NineBuffer9 *This ); + #endif /* _NINE_BUFFER9_H_ */ diff --git a/src/gallium/state_trackers/nine/cryptosession9.c b/src/gallium/state_trackers/nine/cryptosession9.c index 2622f2b32e4..c656f72a215 100644 --- a/src/gallium/state_trackers/nine/cryptosession9.c +++ b/src/gallium/state_trackers/nine/cryptosession9.c @@ -24,14 +24,14 @@ #define DBG_CHANNEL DBG_CRYPTOSESSION -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_GetCertificateSize( struct NineCryptoSession9 *This, UINT *pCertificateSize ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_GetCertificate( struct NineCryptoSession9 *This, UINT CertifacteSize, BYTE *ppCertificate ) @@ -39,7 +39,7 @@ NineCryptoSession9_GetCertificate( struct NineCryptoSession9 *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_NegotiateKeyExchange( struct NineCryptoSession9 *This, UINT DataSize, void *pData ) @@ -47,7 +47,7 @@ NineCryptoSession9_NegotiateKeyExchange( struct NineCryptoSession9 *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_EncryptionBlt( struct NineCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, @@ -57,7 +57,7 @@ NineCryptoSession9_EncryptionBlt( struct NineCryptoSession9 *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, @@ -69,7 +69,7 @@ NineCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_GetSurfacePitch( struct NineCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, UINT *pSurfacePitch ) @@ -77,7 +77,7 @@ NineCryptoSession9_GetSurfacePitch( struct NineCryptoSession9 *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_StartSessionKeyRefresh( struct NineCryptoSession9 *This, void *pRandomNumber, UINT RandomNumberSize ) @@ -85,13 +85,13 @@ NineCryptoSession9_StartSessionKeyRefresh( struct NineCryptoSession9 *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_FinishSessionKeyRefresh( struct NineCryptoSession9 *This ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_GetEncryptionBltKey( struct NineCryptoSession9 *This, void *pReadbackKey, UINT KeySize ) diff --git a/src/gallium/state_trackers/nine/cryptosession9.h b/src/gallium/state_trackers/nine/cryptosession9.h index d1eab72eb37..9904455a000 100644 --- a/src/gallium/state_trackers/nine/cryptosession9.h +++ b/src/gallium/state_trackers/nine/cryptosession9.h @@ -35,28 +35,28 @@ NineCryptoSession9( void *data ) return (struct NineCryptoSession9 *)data; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_GetCertificateSize( struct NineCryptoSession9 *This, UINT *pCertificateSize ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_GetCertificate( struct NineCryptoSession9 *This, UINT CertifacteSize, BYTE *ppCertificate ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_NegotiateKeyExchange( struct NineCryptoSession9 *This, UINT DataSize, void *pData ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_EncryptionBlt( struct NineCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, UINT DstSurfaceSize, void *pIV ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, @@ -65,20 +65,20 @@ NineCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This, void *pContentKey, void *pIV ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_GetSurfacePitch( struct NineCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, UINT *pSurfacePitch ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_StartSessionKeyRefresh( struct NineCryptoSession9 *This, void *pRandomNumber, UINT RandomNumberSize ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_FinishSessionKeyRefresh( struct NineCryptoSession9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCryptoSession9_GetEncryptionBltKey( struct NineCryptoSession9 *This, void *pReadbackKey, UINT KeySize ); diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c index 460cc853942..11000942d3a 100644 --- a/src/gallium/state_trackers/nine/cubetexture9.c +++ b/src/gallium/state_trackers/nine/cubetexture9.c @@ -53,21 +53,22 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, This, pParams, EdgeLength, Levels, Usage, Format, Pool, pSharedHandle); - user_assert(!(Usage & D3DUSAGE_AUTOGENMIPMAP) || - (Pool != D3DPOOL_SYSTEMMEM && Levels <= 1), D3DERR_INVALIDCALL); + user_assert(EdgeLength, D3DERR_INVALIDCALL); + /* user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); */ user_assert(!pSharedHandle, D3DERR_INVALIDCALL); /* TODO */ + user_assert(!(Usage & D3DUSAGE_AUTOGENMIPMAP) || + (Pool != D3DPOOL_SYSTEMMEM && Levels <= 1), D3DERR_INVALIDCALL); + if (Usage & D3DUSAGE_AUTOGENMIPMAP) Levels = 0; pf = d3d9_to_pipe_format_checked(screen, Format, PIPE_TEXTURE_CUBE, 0, - PIPE_BIND_SAMPLER_VIEW, FALSE); - if (pf == PIPE_FORMAT_NONE) - return D3DERR_INVALIDCALL; + PIPE_BIND_SAMPLER_VIEW, FALSE, + Pool == D3DPOOL_SCRATCH); - /* We support ATI1 and ATI2 hacks only for 2D textures */ - if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2) + if (pf == PIPE_FORMAT_NONE) return D3DERR_INVALIDCALL; if (compressed_format(Format)) { @@ -186,7 +187,7 @@ NineCubeTexture9_dtor( struct NineCubeTexture9 *This ) NineBaseTexture9_dtor(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCubeTexture9_GetLevelDesc( struct NineCubeTexture9 *This, UINT Level, D3DSURFACE_DESC *pDesc ) @@ -202,7 +203,7 @@ NineCubeTexture9_GetLevelDesc( struct NineCubeTexture9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCubeTexture9_GetCubeMapSurface( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level, @@ -224,7 +225,7 @@ NineCubeTexture9_GetCubeMapSurface( struct NineCubeTexture9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCubeTexture9_LockRect( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level, @@ -245,7 +246,7 @@ NineCubeTexture9_LockRect( struct NineCubeTexture9 *This, return NineSurface9_LockRect(This->surfaces[s], pLockedRect, pRect, Flags); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCubeTexture9_UnlockRect( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level ) @@ -260,7 +261,7 @@ NineCubeTexture9_UnlockRect( struct NineCubeTexture9 *This, return NineSurface9_UnlockRect(This->surfaces[s]); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineCubeTexture9_AddDirtyRect( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, const RECT *pDirtyRect ) diff --git a/src/gallium/state_trackers/nine/cubetexture9.h b/src/gallium/state_trackers/nine/cubetexture9.h index 999715c0a74..129789d0d69 100644 --- a/src/gallium/state_trackers/nine/cubetexture9.h +++ b/src/gallium/state_trackers/nine/cubetexture9.h @@ -48,18 +48,18 @@ NineCubeTexture9_new( struct NineDevice9 *pDevice, struct NineCubeTexture9 **ppOut, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCubeTexture9_GetLevelDesc( struct NineCubeTexture9 *This, UINT Level, D3DSURFACE_DESC *pDesc ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCubeTexture9_GetCubeMapSurface( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level, IDirect3DSurface9 **ppCubeMapSurface ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCubeTexture9_LockRect( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level, @@ -67,12 +67,12 @@ NineCubeTexture9_LockRect( struct NineCubeTexture9 *This, const RECT *pRect, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCubeTexture9_UnlockRect( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineCubeTexture9_AddDirtyRect( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, const RECT *pDirtyRect ); diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 475ef96788e..e4403f0db03 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -147,7 +147,9 @@ NineDevice9_ctor( struct NineDevice9 *This, if (FAILED(hr)) { return hr; } + list_inithead(&This->update_buffers); list_inithead(&This->update_textures); + list_inithead(&This->managed_buffers); list_inithead(&This->managed_textures); This->screen = pScreen; @@ -540,7 +542,7 @@ NineDevice9_ResumeRecording( struct NineDevice9 *This ) } } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_TestCooperativeLevel( struct NineDevice9 *This ) { if (NineSwapChain9_GetOccluded(This->swapchains[0])) { @@ -553,26 +555,35 @@ NineDevice9_TestCooperativeLevel( struct NineDevice9 *This ) return D3D_OK; } -UINT WINAPI +UINT NINE_WINAPI NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This ) { return This->available_texture_mem; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_EvictManagedResources( struct NineDevice9 *This ) { struct NineBaseTexture9 *tex; + struct NineBuffer9 *buf; DBG("This=%p\n", This); LIST_FOR_EACH_ENTRY(tex, &This->managed_textures, list2) { NineBaseTexture9_UnLoad(tex); } + /* Vertex/index buffers don't take a lot of space and aren't accounted + * for d3d memory usage. Instead of actually freeing from memory, + * just mark the buffer dirty to trigger a re-upload later. We + * could just ignore, but some bad behaving apps could rely on it (if + * they write outside the locked regions typically). */ + LIST_FOR_EACH_ENTRY(buf, &This->managed_buffers, managed.list2) { + NineBuffer9_SetDirty(buf); + } return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetDirect3D( struct NineDevice9 *This, IDirect3D9 **ppD3D9 ) { @@ -582,7 +593,7 @@ NineDevice9_GetDirect3D( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetDeviceCaps( struct NineDevice9 *This, D3DCAPS9 *pCaps ) { @@ -591,7 +602,7 @@ NineDevice9_GetDeviceCaps( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetDisplayMode( struct NineDevice9 *This, UINT iSwapChain, D3DDISPLAYMODE *pMode ) @@ -603,7 +614,7 @@ NineDevice9_GetDisplayMode( struct NineDevice9 *This, return NineSwapChain9_GetDisplayMode(This->swapchains[iSwapChain], pMode); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetCreationParameters( struct NineDevice9 *This, D3DDEVICE_CREATION_PARAMETERS *pParameters ) { @@ -612,7 +623,7 @@ NineDevice9_GetCreationParameters( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetCursorProperties( struct NineDevice9 *This, UINT XHotSpot, UINT YHotSpot, @@ -688,7 +699,7 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This, return D3D_OK; } -void WINAPI +void NINE_WINAPI NineDevice9_SetCursorPosition( struct NineDevice9 *This, int X, int Y, @@ -705,7 +716,7 @@ NineDevice9_SetCursorPosition( struct NineDevice9 *This, This->cursor.software = ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos) != D3D_OK; } -BOOL WINAPI +BOOL NINE_WINAPI NineDevice9_ShowCursor( struct NineDevice9 *This, BOOL bShow ) { @@ -720,7 +731,7 @@ NineDevice9_ShowCursor( struct NineDevice9 *This, return old; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DSwapChain9 **pSwapChain ) @@ -755,7 +766,7 @@ NineDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetSwapChain( struct NineDevice9 *This, UINT iSwapChain, IDirect3DSwapChain9 **pSwapChain ) @@ -771,13 +782,13 @@ NineDevice9_GetSwapChain( struct NineDevice9 *This, return D3D_OK; } -UINT WINAPI +UINT NINE_WINAPI NineDevice9_GetNumberOfSwapChains( struct NineDevice9 *This ) { return This->nswapchains; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_Reset( struct NineDevice9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters ) { @@ -810,7 +821,7 @@ NineDevice9_Reset( struct NineDevice9 *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_Present( struct NineDevice9 *This, const RECT *pSourceRect, const RECT *pDestRect, @@ -833,7 +844,7 @@ NineDevice9_Present( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetBackBuffer( struct NineDevice9 *This, UINT iSwapChain, UINT iBackBuffer, @@ -849,7 +860,7 @@ NineDevice9_GetBackBuffer( struct NineDevice9 *This, iBackBuffer, Type, ppBackBuffer); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetRasterStatus( struct NineDevice9 *This, UINT iSwapChain, D3DRASTER_STATUS *pRasterStatus ) @@ -861,14 +872,14 @@ NineDevice9_GetRasterStatus( struct NineDevice9 *This, pRasterStatus); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetDialogBoxMode( struct NineDevice9 *This, BOOL bEnableDialogs ) { STUB(D3DERR_INVALIDCALL); } -void WINAPI +void NINE_WINAPI NineDevice9_SetGammaRamp( struct NineDevice9 *This, UINT iSwapChain, DWORD Flags, @@ -887,7 +898,7 @@ NineDevice9_SetGammaRamp( struct NineDevice9 *This, } } -void WINAPI +void NINE_WINAPI NineDevice9_GetGammaRamp( struct NineDevice9 *This, UINT iSwapChain, D3DGAMMARAMP *pRamp ) @@ -901,7 +912,7 @@ NineDevice9_GetGammaRamp( struct NineDevice9 *This, *pRamp = This->swapchains[iSwapChain]->gamma; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateTexture( struct NineDevice9 *This, UINT Width, UINT Height, @@ -925,15 +936,6 @@ NineDevice9_CreateTexture( struct NineDevice9 *This, D3DUSAGE_SOFTWAREPROCESSING | D3DUSAGE_TEXTAPI; *ppTexture = NULL; - user_assert(Width && Height, D3DERR_INVALIDCALL); - user_assert(!pSharedHandle || This->ex, D3DERR_INVALIDCALL); - /* When is used shared handle, Pool must be - * SYSTEMMEM with Levels 1 or DEFAULT with any Levels */ - user_assert(!pSharedHandle || Pool != D3DPOOL_SYSTEMMEM || Levels == 1, - D3DERR_INVALIDCALL); - user_assert(!pSharedHandle || Pool == D3DPOOL_SYSTEMMEM || Pool == D3DPOOL_DEFAULT, - D3DERR_INVALIDCALL); - user_assert((Usage != D3DUSAGE_AUTOGENMIPMAP || Levels <= 1), D3DERR_INVALIDCALL); hr = NineTexture9_new(This, Width, Height, Levels, Usage, Format, Pool, &tex, pSharedHandle); @@ -943,7 +945,7 @@ NineDevice9_CreateTexture( struct NineDevice9 *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateVolumeTexture( struct NineDevice9 *This, UINT Width, UINT Height, @@ -967,8 +969,6 @@ NineDevice9_CreateVolumeTexture( struct NineDevice9 *This, D3DUSAGE_SOFTWAREPROCESSING; *ppVolumeTexture = NULL; - user_assert(Width && Height && Depth, D3DERR_INVALIDCALL); - user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); hr = NineVolumeTexture9_new(This, Width, Height, Depth, Levels, Usage, Format, Pool, &tex, pSharedHandle); @@ -978,7 +978,7 @@ NineDevice9_CreateVolumeTexture( struct NineDevice9 *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateCubeTexture( struct NineDevice9 *This, UINT EdgeLength, UINT Levels, @@ -1001,8 +1001,6 @@ NineDevice9_CreateCubeTexture( struct NineDevice9 *This, D3DUSAGE_SOFTWAREPROCESSING; *ppCubeTexture = NULL; - user_assert(EdgeLength, D3DERR_INVALIDCALL); - user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); hr = NineCubeTexture9_new(This, EdgeLength, Levels, Usage, Format, Pool, &tex, pSharedHandle); @@ -1012,7 +1010,7 @@ NineDevice9_CreateCubeTexture( struct NineDevice9 *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateVertexBuffer( struct NineDevice9 *This, UINT Length, DWORD Usage, @@ -1050,7 +1048,7 @@ NineDevice9_CreateVertexBuffer( struct NineDevice9 *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateIndexBuffer( struct NineDevice9 *This, UINT Length, DWORD Usage, @@ -1137,7 +1135,10 @@ create_zs_or_rt_surface(struct NineDevice9 *This, } templ.format = d3d9_to_pipe_format_checked(screen, Format, templ.target, templ.nr_samples, templ.bind, - FALSE); + FALSE, Pool == D3DPOOL_SCRATCH); + + if (templ.format == PIPE_FORMAT_NONE && Format != D3DFMT_NULL) + return D3DERR_INVALIDCALL; desc.Format = Format; desc.Type = D3DRTYPE_SURFACE; @@ -1178,7 +1179,7 @@ create_zs_or_rt_surface(struct NineDevice9 *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateRenderTarget( struct NineDevice9 *This, UINT Width, UINT Height, @@ -1196,7 +1197,7 @@ NineDevice9_CreateRenderTarget( struct NineDevice9 *This, Lockable, ppSurface, pSharedHandle); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateDepthStencilSurface( struct NineDevice9 *This, UINT Width, UINT Height, @@ -1216,7 +1217,7 @@ NineDevice9_CreateDepthStencilSurface( struct NineDevice9 *This, Discard, ppSurface, pSharedHandle); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_UpdateSurface( struct NineDevice9 *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, @@ -1309,7 +1310,7 @@ NineDevice9_UpdateSurface( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_UpdateTexture( struct NineDevice9 *This, IDirect3DBaseTexture9 *pSourceTexture, IDirect3DBaseTexture9 *pDestinationTexture ) @@ -1438,7 +1439,7 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetRenderTargetData( struct NineDevice9 *This, IDirect3DSurface9 *pRenderTarget, IDirect3DSurface9 *pDestSurface ) @@ -1463,7 +1464,7 @@ NineDevice9_GetRenderTargetData( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetFrontBufferData( struct NineDevice9 *This, UINT iSwapChain, IDirect3DSurface9 *pDestSurface ) @@ -1478,7 +1479,7 @@ NineDevice9_GetFrontBufferData( struct NineDevice9 *This, pDestSurface); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_StretchRect( struct NineDevice9 *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, @@ -1682,7 +1683,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_ColorFill( struct NineDevice9 *This, IDirect3DSurface9 *pSurface, const RECT *pRect, @@ -1749,7 +1750,7 @@ NineDevice9_ColorFill( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This, UINT Width, UINT Height, @@ -1782,7 +1783,7 @@ NineDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetRenderTarget( struct NineDevice9 *This, DWORD RenderTargetIndex, IDirect3DSurface9 *pRenderTarget ) @@ -1821,7 +1822,7 @@ NineDevice9_SetRenderTarget( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetRenderTarget( struct NineDevice9 *This, DWORD RenderTargetIndex, IDirect3DSurface9 **ppRenderTarget ) @@ -1839,7 +1840,7 @@ NineDevice9_GetRenderTarget( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetDepthStencilSurface( struct NineDevice9 *This, IDirect3DSurface9 *pNewZStencil ) { @@ -1852,7 +1853,7 @@ NineDevice9_SetDepthStencilSurface( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetDepthStencilSurface( struct NineDevice9 *This, IDirect3DSurface9 **ppZStencilSurface ) { @@ -1866,7 +1867,7 @@ NineDevice9_GetDepthStencilSurface( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_BeginScene( struct NineDevice9 *This ) { DBG("This=%p\n", This); @@ -1876,7 +1877,7 @@ NineDevice9_BeginScene( struct NineDevice9 *This ) return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_EndScene( struct NineDevice9 *This ) { DBG("This=%p\n", This); @@ -1885,7 +1886,7 @@ NineDevice9_EndScene( struct NineDevice9 *This ) return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_Clear( struct NineDevice9 *This, DWORD Count, const D3DRECT *pRects, @@ -2047,7 +2048,7 @@ NineDevice9_Clear( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetTransform( struct NineDevice9 *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix ) @@ -2066,7 +2067,7 @@ NineDevice9_SetTransform( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetTransform( struct NineDevice9 *This, D3DTRANSFORMSTATETYPE State, D3DMATRIX *pMatrix ) @@ -2077,7 +2078,7 @@ NineDevice9_GetTransform( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_MultiplyTransform( struct NineDevice9 *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix ) @@ -2094,7 +2095,7 @@ NineDevice9_MultiplyTransform( struct NineDevice9 *This, return NineDevice9_SetTransform(This, State, &T); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetViewport( struct NineDevice9 *This, const D3DVIEWPORT9 *pViewport ) { @@ -2110,7 +2111,7 @@ NineDevice9_SetViewport( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetViewport( struct NineDevice9 *This, D3DVIEWPORT9 *pViewport ) { @@ -2118,7 +2119,7 @@ NineDevice9_GetViewport( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetMaterial( struct NineDevice9 *This, const D3DMATERIAL9 *pMaterial ) { @@ -2136,7 +2137,7 @@ NineDevice9_SetMaterial( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetMaterial( struct NineDevice9 *This, D3DMATERIAL9 *pMaterial ) { @@ -2145,7 +2146,7 @@ NineDevice9_GetMaterial( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetLight( struct NineDevice9 *This, DWORD Index, const D3DLIGHT9 *pLight ) @@ -2194,7 +2195,7 @@ NineDevice9_SetLight( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetLight( struct NineDevice9 *This, DWORD Index, D3DLIGHT9 *pLight ) @@ -2211,7 +2212,7 @@ NineDevice9_GetLight( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_LightEnable( struct NineDevice9 *This, DWORD Index, BOOL Enable ) @@ -2261,7 +2262,7 @@ NineDevice9_LightEnable( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetLightEnable( struct NineDevice9 *This, DWORD Index, BOOL *pEnable ) @@ -2282,7 +2283,7 @@ NineDevice9_GetLightEnable( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetClipPlane( struct NineDevice9 *This, DWORD Index, const float *pPlane ) @@ -2303,7 +2304,7 @@ NineDevice9_SetClipPlane( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetClipPlane( struct NineDevice9 *This, DWORD Index, float *pPlane ) @@ -2374,7 +2375,7 @@ NineDevice9_ResolveZ( struct NineDevice9 *This ) #define ALPHA_TO_COVERAGE_ENABLE MAKEFOURCC('A', '2', 'M', '1') #define ALPHA_TO_COVERAGE_DISABLE MAKEFOURCC('A', '2', 'M', '0') -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetRenderState( struct NineDevice9 *This, D3DRENDERSTATETYPE State, DWORD Value ) @@ -2420,7 +2421,7 @@ NineDevice9_SetRenderState( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetRenderState( struct NineDevice9 *This, D3DRENDERSTATETYPE State, DWORD *pValue ) @@ -2431,7 +2432,7 @@ NineDevice9_GetRenderState( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateStateBlock( struct NineDevice9 *This, D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9 **ppSB ) @@ -2531,7 +2532,7 @@ NineDevice9_CreateStateBlock( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_BeginStateBlock( struct NineDevice9 *This ) { HRESULT hr; @@ -2551,7 +2552,7 @@ NineDevice9_BeginStateBlock( struct NineDevice9 *This ) return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_EndStateBlock( struct NineDevice9 *This, IDirect3DStateBlock9 **ppSB ) { @@ -2570,21 +2571,21 @@ NineDevice9_EndStateBlock( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetClipStatus( struct NineDevice9 *This, const D3DCLIPSTATUS9 *pClipStatus ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetClipStatus( struct NineDevice9 *This, D3DCLIPSTATUS9 *pClipStatus ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetTexture( struct NineDevice9 *This, DWORD Stage, IDirect3DBaseTexture9 **ppTexture ) @@ -2605,7 +2606,7 @@ NineDevice9_GetTexture( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetTexture( struct NineDevice9 *This, DWORD Stage, IDirect3DBaseTexture9 *pTexture ) @@ -2650,7 +2651,7 @@ NineDevice9_SetTexture( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetTextureStageState( struct NineDevice9 *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, @@ -2666,7 +2667,7 @@ NineDevice9_GetTextureStageState( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetTextureStageState( struct NineDevice9 *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, @@ -2719,7 +2720,7 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetSamplerState( struct NineDevice9 *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, @@ -2737,7 +2738,7 @@ NineDevice9_GetSamplerState( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetSamplerState( struct NineDevice9 *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, @@ -2765,7 +2766,7 @@ NineDevice9_SetSamplerState( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_ValidateDevice( struct NineDevice9 *This, DWORD *pNumPasses ) { @@ -2805,7 +2806,7 @@ NineDevice9_ValidateDevice( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetPaletteEntries( struct NineDevice9 *This, UINT PaletteNumber, const PALETTEENTRY *pEntries ) @@ -2813,7 +2814,7 @@ NineDevice9_SetPaletteEntries( struct NineDevice9 *This, STUB(D3D_OK); /* like wine */ } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetPaletteEntries( struct NineDevice9 *This, UINT PaletteNumber, PALETTEENTRY *pEntries ) @@ -2821,21 +2822,21 @@ NineDevice9_GetPaletteEntries( struct NineDevice9 *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetCurrentTexturePalette( struct NineDevice9 *This, UINT PaletteNumber ) { STUB(D3D_OK); /* like wine */ } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetCurrentTexturePalette( struct NineDevice9 *This, UINT *PaletteNumber ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetScissorRect( struct NineDevice9 *This, const RECT *pRect ) { @@ -2854,7 +2855,7 @@ NineDevice9_SetScissorRect( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetScissorRect( struct NineDevice9 *This, RECT *pRect ) { @@ -2866,27 +2867,27 @@ NineDevice9_GetScissorRect( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This, BOOL bSoftware ) { STUB(D3DERR_INVALIDCALL); } -BOOL WINAPI +BOOL NINE_WINAPI NineDevice9_GetSoftwareVertexProcessing( struct NineDevice9 *This ) { return !!(This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetNPatchMode( struct NineDevice9 *This, float nSegments ) { STUB(D3DERR_INVALIDCALL); } -float WINAPI +float NINE_WINAPI NineDevice9_GetNPatchMode( struct NineDevice9 *This ) { STUB(0); @@ -2908,7 +2909,7 @@ init_draw_info(struct pipe_draw_info *info, info->indirect = NULL; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawPrimitive( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT StartVertex, @@ -2933,7 +2934,7 @@ NineDevice9_DrawPrimitive( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, @@ -2967,7 +2968,7 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, @@ -3023,7 +3024,7 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT MinVertexIndex, @@ -3115,7 +3116,7 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, /* TODO: Write to pDestBuffer directly if vertex declaration contains * only f32 formats. */ -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_ProcessVertices( struct NineDevice9 *This, UINT SrcStartIndex, UINT DestIndex, @@ -3208,7 +3209,7 @@ out: return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateVertexDeclaration( struct NineDevice9 *This, const D3DVERTEXELEMENT9 *pVertexElements, IDirect3DVertexDeclaration9 **ppDecl ) @@ -3225,7 +3226,7 @@ NineDevice9_CreateVertexDeclaration( struct NineDevice9 *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetVertexDeclaration( struct NineDevice9 *This, IDirect3DVertexDeclaration9 *pDecl ) { @@ -3250,7 +3251,7 @@ NineDevice9_SetVertexDeclaration( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetVertexDeclaration( struct NineDevice9 *This, IDirect3DVertexDeclaration9 **ppDecl ) { @@ -3262,7 +3263,7 @@ NineDevice9_GetVertexDeclaration( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetFVF( struct NineDevice9 *This, DWORD FVF ) { @@ -3286,7 +3287,7 @@ NineDevice9_SetFVF( struct NineDevice9 *This, This, (IDirect3DVertexDeclaration9 *)vdecl); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetFVF( struct NineDevice9 *This, DWORD *pFVF ) { @@ -3294,7 +3295,7 @@ NineDevice9_GetFVF( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateVertexShader( struct NineDevice9 *This, const DWORD *pFunction, IDirect3DVertexShader9 **ppShader ) @@ -3311,7 +3312,7 @@ NineDevice9_CreateVertexShader( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetVertexShader( struct NineDevice9 *This, IDirect3DVertexShader9 *pShader ) { @@ -3336,7 +3337,7 @@ NineDevice9_SetVertexShader( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetVertexShader( struct NineDevice9 *This, IDirect3DVertexShader9 **ppShader ) { @@ -3345,7 +3346,7 @@ NineDevice9_GetVertexShader( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, UINT StartRegister, const float *pConstantData, @@ -3382,7 +3383,7 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This, UINT StartRegister, float *pConstantData, @@ -3401,7 +3402,7 @@ NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This, UINT StartRegister, const int *pConstantData, @@ -3441,7 +3442,7 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This, UINT StartRegister, int *pConstantData, @@ -3470,7 +3471,7 @@ NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This, UINT StartRegister, const BOOL *pConstantData, @@ -3506,7 +3507,7 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This, UINT StartRegister, BOOL *pConstantData, @@ -3525,7 +3526,7 @@ NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetStreamSource( struct NineDevice9 *This, UINT StreamNumber, IDirect3DVertexBuffer9 *pStreamData, @@ -3562,7 +3563,7 @@ NineDevice9_SetStreamSource( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetStreamSource( struct NineDevice9 *This, UINT StreamNumber, IDirect3DVertexBuffer9 **ppStreamData, @@ -3582,7 +3583,7 @@ NineDevice9_GetStreamSource( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This, UINT StreamNumber, UINT Setting ) @@ -3616,7 +3617,7 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetStreamSourceFreq( struct NineDevice9 *This, UINT StreamNumber, UINT *pSetting ) @@ -3626,7 +3627,7 @@ NineDevice9_GetStreamSourceFreq( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetIndices( struct NineDevice9 *This, IDirect3DIndexBuffer9 *pIndexData ) { @@ -3647,7 +3648,7 @@ NineDevice9_SetIndices( struct NineDevice9 *This, /* XXX: wine/d3d9 doesn't have pBaseVertexIndex, and it doesn't make sense * here because it's an argument passed to the Draw calls. */ -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetIndices( struct NineDevice9 *This, IDirect3DIndexBuffer9 **ppIndexData /*, UINT *pBaseVertexIndex */ ) @@ -3657,7 +3658,7 @@ NineDevice9_GetIndices( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreatePixelShader( struct NineDevice9 *This, const DWORD *pFunction, IDirect3DPixelShader9 **ppShader ) @@ -3674,7 +3675,7 @@ NineDevice9_CreatePixelShader( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetPixelShader( struct NineDevice9 *This, IDirect3DPixelShader9 *pShader ) { @@ -3704,7 +3705,7 @@ NineDevice9_SetPixelShader( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetPixelShader( struct NineDevice9 *This, IDirect3DPixelShader9 **ppShader ) { @@ -3713,7 +3714,7 @@ NineDevice9_GetPixelShader( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This, UINT StartRegister, const float *pConstantData, @@ -3750,7 +3751,7 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetPixelShaderConstantF( struct NineDevice9 *This, UINT StartRegister, float *pConstantData, @@ -3769,7 +3770,7 @@ NineDevice9_GetPixelShaderConstantF( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This, UINT StartRegister, const int *pConstantData, @@ -3808,7 +3809,7 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetPixelShaderConstantI( struct NineDevice9 *This, UINT StartRegister, int *pConstantData, @@ -3837,7 +3838,7 @@ NineDevice9_GetPixelShaderConstantI( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This, UINT StartRegister, const BOOL *pConstantData, @@ -3873,7 +3874,7 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetPixelShaderConstantB( struct NineDevice9 *This, UINT StartRegister, BOOL *pConstantData, @@ -3892,7 +3893,7 @@ NineDevice9_GetPixelShaderConstantB( struct NineDevice9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawRectPatch( struct NineDevice9 *This, UINT Handle, const float *pNumSegs, @@ -3901,7 +3902,7 @@ NineDevice9_DrawRectPatch( struct NineDevice9 *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawTriPatch( struct NineDevice9 *This, UINT Handle, const float *pNumSegs, @@ -3910,14 +3911,14 @@ NineDevice9_DrawTriPatch( struct NineDevice9 *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DeletePatch( struct NineDevice9 *This, UINT Handle ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateQuery( struct NineDevice9 *This, D3DQUERYTYPE Type, IDirect3DQuery9 **ppQuery ) diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h index 34edf0cfa48..73a43cf08ff 100644 --- a/src/gallium/state_trackers/nine/device9.h +++ b/src/gallium/state_trackers/nine/device9.h @@ -68,7 +68,9 @@ struct NineDevice9 struct nine_state *update; /* state to update (&state / &record->state) */ struct nine_state state; /* device state */ + struct list_head update_buffers; struct list_head update_textures; + struct list_head managed_buffers; struct list_head managed_textures; boolean is_recording; @@ -196,100 +198,100 @@ NineDevice9_GetCaps( struct NineDevice9 *This ); /*** Direct3D public ***/ -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_TestCooperativeLevel( struct NineDevice9 *This ); -UINT WINAPI +UINT NINE_WINAPI NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_EvictManagedResources( struct NineDevice9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetDirect3D( struct NineDevice9 *This, IDirect3D9 **ppD3D9 ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetDeviceCaps( struct NineDevice9 *This, D3DCAPS9 *pCaps ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetDisplayMode( struct NineDevice9 *This, UINT iSwapChain, D3DDISPLAYMODE *pMode ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetCreationParameters( struct NineDevice9 *This, D3DDEVICE_CREATION_PARAMETERS *pParameters ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetCursorProperties( struct NineDevice9 *This, UINT XHotSpot, UINT YHotSpot, IDirect3DSurface9 *pCursorBitmap ); -void WINAPI +void NINE_WINAPI NineDevice9_SetCursorPosition( struct NineDevice9 *This, int X, int Y, DWORD Flags ); -BOOL WINAPI +BOOL NINE_WINAPI NineDevice9_ShowCursor( struct NineDevice9 *This, BOOL bShow ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DSwapChain9 **pSwapChain ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetSwapChain( struct NineDevice9 *This, UINT iSwapChain, IDirect3DSwapChain9 **pSwapChain ); -UINT WINAPI +UINT NINE_WINAPI NineDevice9_GetNumberOfSwapChains( struct NineDevice9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_Reset( struct NineDevice9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_Present( struct NineDevice9 *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetBackBuffer( struct NineDevice9 *This, UINT iSwapChain, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetRasterStatus( struct NineDevice9 *This, UINT iSwapChain, D3DRASTER_STATUS *pRasterStatus ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetDialogBoxMode( struct NineDevice9 *This, BOOL bEnableDialogs ); -void WINAPI +void NINE_WINAPI NineDevice9_SetGammaRamp( struct NineDevice9 *This, UINT iSwapChain, DWORD Flags, const D3DGAMMARAMP *pRamp ); -void WINAPI +void NINE_WINAPI NineDevice9_GetGammaRamp( struct NineDevice9 *This, UINT iSwapChain, D3DGAMMARAMP *pRamp ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateTexture( struct NineDevice9 *This, UINT Width, UINT Height, @@ -300,7 +302,7 @@ NineDevice9_CreateTexture( struct NineDevice9 *This, IDirect3DTexture9 **ppTexture, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateVolumeTexture( struct NineDevice9 *This, UINT Width, UINT Height, @@ -312,7 +314,7 @@ NineDevice9_CreateVolumeTexture( struct NineDevice9 *This, IDirect3DVolumeTexture9 **ppVolumeTexture, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateCubeTexture( struct NineDevice9 *This, UINT EdgeLength, UINT Levels, @@ -322,7 +324,7 @@ NineDevice9_CreateCubeTexture( struct NineDevice9 *This, IDirect3DCubeTexture9 **ppCubeTexture, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateVertexBuffer( struct NineDevice9 *This, UINT Length, DWORD Usage, @@ -331,7 +333,7 @@ NineDevice9_CreateVertexBuffer( struct NineDevice9 *This, IDirect3DVertexBuffer9 **ppVertexBuffer, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateIndexBuffer( struct NineDevice9 *This, UINT Length, DWORD Usage, @@ -340,7 +342,7 @@ NineDevice9_CreateIndexBuffer( struct NineDevice9 *This, IDirect3DIndexBuffer9 **ppIndexBuffer, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateRenderTarget( struct NineDevice9 *This, UINT Width, UINT Height, @@ -351,7 +353,7 @@ NineDevice9_CreateRenderTarget( struct NineDevice9 *This, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateDepthStencilSurface( struct NineDevice9 *This, UINT Width, UINT Height, @@ -362,29 +364,29 @@ NineDevice9_CreateDepthStencilSurface( struct NineDevice9 *This, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_UpdateSurface( struct NineDevice9 *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestinationSurface, const POINT *pDestPoint ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_UpdateTexture( struct NineDevice9 *This, IDirect3DBaseTexture9 *pSourceTexture, IDirect3DBaseTexture9 *pDestinationTexture ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetRenderTargetData( struct NineDevice9 *This, IDirect3DSurface9 *pRenderTarget, IDirect3DSurface9 *pDestSurface ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetFrontBufferData( struct NineDevice9 *This, UINT iSwapChain, IDirect3DSurface9 *pDestSurface ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_StretchRect( struct NineDevice9 *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, @@ -392,13 +394,13 @@ NineDevice9_StretchRect( struct NineDevice9 *This, const RECT *pDestRect, D3DTEXTUREFILTERTYPE Filter ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_ColorFill( struct NineDevice9 *This, IDirect3DSurface9 *pSurface, const RECT *pRect, D3DCOLOR color ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This, UINT Width, UINT Height, @@ -407,31 +409,31 @@ NineDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetRenderTarget( struct NineDevice9 *This, DWORD RenderTargetIndex, IDirect3DSurface9 *pRenderTarget ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetRenderTarget( struct NineDevice9 *This, DWORD RenderTargetIndex, IDirect3DSurface9 **ppRenderTarget ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetDepthStencilSurface( struct NineDevice9 *This, IDirect3DSurface9 *pNewZStencil ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetDepthStencilSurface( struct NineDevice9 *This, IDirect3DSurface9 **ppZStencilSurface ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_BeginScene( struct NineDevice9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_EndScene( struct NineDevice9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_Clear( struct NineDevice9 *This, DWORD Count, const D3DRECT *pRects, @@ -440,182 +442,182 @@ NineDevice9_Clear( struct NineDevice9 *This, float Z, DWORD Stencil ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetTransform( struct NineDevice9 *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetTransform( struct NineDevice9 *This, D3DTRANSFORMSTATETYPE State, D3DMATRIX *pMatrix ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_MultiplyTransform( struct NineDevice9 *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetViewport( struct NineDevice9 *This, const D3DVIEWPORT9 *pViewport ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetViewport( struct NineDevice9 *This, D3DVIEWPORT9 *pViewport ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetMaterial( struct NineDevice9 *This, const D3DMATERIAL9 *pMaterial ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetMaterial( struct NineDevice9 *This, D3DMATERIAL9 *pMaterial ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetLight( struct NineDevice9 *This, DWORD Index, const D3DLIGHT9 *pLight ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetLight( struct NineDevice9 *This, DWORD Index, D3DLIGHT9 *pLight ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_LightEnable( struct NineDevice9 *This, DWORD Index, BOOL Enable ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetLightEnable( struct NineDevice9 *This, DWORD Index, BOOL *pEnable ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetClipPlane( struct NineDevice9 *This, DWORD Index, const float *pPlane ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetClipPlane( struct NineDevice9 *This, DWORD Index, float *pPlane ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetRenderState( struct NineDevice9 *This, D3DRENDERSTATETYPE State, DWORD Value ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetRenderState( struct NineDevice9 *This, D3DRENDERSTATETYPE State, DWORD *pValue ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateStateBlock( struct NineDevice9 *This, D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9 **ppSB ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_BeginStateBlock( struct NineDevice9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_EndStateBlock( struct NineDevice9 *This, IDirect3DStateBlock9 **ppSB ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetClipStatus( struct NineDevice9 *This, const D3DCLIPSTATUS9 *pClipStatus ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetClipStatus( struct NineDevice9 *This, D3DCLIPSTATUS9 *pClipStatus ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetTexture( struct NineDevice9 *This, DWORD Stage, IDirect3DBaseTexture9 **ppTexture ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetTexture( struct NineDevice9 *This, DWORD Stage, IDirect3DBaseTexture9 *pTexture ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetTextureStageState( struct NineDevice9 *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD *pValue ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetTextureStageState( struct NineDevice9 *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetSamplerState( struct NineDevice9 *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD *pValue ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetSamplerState( struct NineDevice9 *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_ValidateDevice( struct NineDevice9 *This, DWORD *pNumPasses ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetPaletteEntries( struct NineDevice9 *This, UINT PaletteNumber, const PALETTEENTRY *pEntries ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetPaletteEntries( struct NineDevice9 *This, UINT PaletteNumber, PALETTEENTRY *pEntries ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetCurrentTexturePalette( struct NineDevice9 *This, UINT PaletteNumber ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetCurrentTexturePalette( struct NineDevice9 *This, UINT *PaletteNumber ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetScissorRect( struct NineDevice9 *This, const RECT *pRect ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetScissorRect( struct NineDevice9 *This, RECT *pRect ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This, BOOL bSoftware ); -BOOL WINAPI +BOOL NINE_WINAPI NineDevice9_GetSoftwareVertexProcessing( struct NineDevice9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetNPatchMode( struct NineDevice9 *This, float nSegments ); -float WINAPI +float NINE_WINAPI NineDevice9_GetNPatchMode( struct NineDevice9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawPrimitive( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT StartVertex, UINT PrimitiveCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, @@ -624,14 +626,14 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This, UINT startIndex, UINT primCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT MinVertexIndex, @@ -642,7 +644,7 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_ProcessVertices( struct NineDevice9 *This, UINT SrcStartIndex, UINT DestIndex, @@ -651,175 +653,175 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This, IDirect3DVertexDeclaration9 *pVertexDecl, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateVertexDeclaration( struct NineDevice9 *This, const D3DVERTEXELEMENT9 *pVertexElements, IDirect3DVertexDeclaration9 **ppDecl ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetVertexDeclaration( struct NineDevice9 *This, IDirect3DVertexDeclaration9 *pDecl ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetVertexDeclaration( struct NineDevice9 *This, IDirect3DVertexDeclaration9 **ppDecl ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetFVF( struct NineDevice9 *This, DWORD FVF ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetFVF( struct NineDevice9 *This, DWORD *pFVF ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateVertexShader( struct NineDevice9 *This, const DWORD *pFunction, IDirect3DVertexShader9 **ppShader ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetVertexShader( struct NineDevice9 *This, IDirect3DVertexShader9 *pShader ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetVertexShader( struct NineDevice9 *This, IDirect3DVertexShader9 **ppShader ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, UINT StartRegister, const float *pConstantData, UINT Vector4fCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This, UINT StartRegister, float *pConstantData, UINT Vector4fCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This, UINT StartRegister, const int *pConstantData, UINT Vector4iCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This, UINT StartRegister, int *pConstantData, UINT Vector4iCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This, UINT StartRegister, const BOOL *pConstantData, UINT BoolCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This, UINT StartRegister, BOOL *pConstantData, UINT BoolCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetStreamSource( struct NineDevice9 *This, UINT StreamNumber, IDirect3DVertexBuffer9 *pStreamData, UINT OffsetInBytes, UINT Stride ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetStreamSource( struct NineDevice9 *This, UINT StreamNumber, IDirect3DVertexBuffer9 **ppStreamData, UINT *pOffsetInBytes, UINT *pStride ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This, UINT StreamNumber, UINT Setting ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetStreamSourceFreq( struct NineDevice9 *This, UINT StreamNumber, UINT *pSetting ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetIndices( struct NineDevice9 *This, IDirect3DIndexBuffer9 *pIndexData ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetIndices( struct NineDevice9 *This, IDirect3DIndexBuffer9 **ppIndexData /*, UINT *pBaseVertexIndex */ ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreatePixelShader( struct NineDevice9 *This, const DWORD *pFunction, IDirect3DPixelShader9 **ppShader ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetPixelShader( struct NineDevice9 *This, IDirect3DPixelShader9 *pShader ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetPixelShader( struct NineDevice9 *This, IDirect3DPixelShader9 **ppShader ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This, UINT StartRegister, const float *pConstantData, UINT Vector4fCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetPixelShaderConstantF( struct NineDevice9 *This, UINT StartRegister, float *pConstantData, UINT Vector4fCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This, UINT StartRegister, const int *pConstantData, UINT Vector4iCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetPixelShaderConstantI( struct NineDevice9 *This, UINT StartRegister, int *pConstantData, UINT Vector4iCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This, UINT StartRegister, const BOOL *pConstantData, UINT BoolCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_GetPixelShaderConstantB( struct NineDevice9 *This, UINT StartRegister, BOOL *pConstantData, UINT BoolCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawRectPatch( struct NineDevice9 *This, UINT Handle, const float *pNumSegs, const D3DRECTPATCH_INFO *pRectPatchInfo ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DrawTriPatch( struct NineDevice9 *This, UINT Handle, const float *pNumSegs, const D3DTRIPATCH_INFO *pTriPatchInfo ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_DeletePatch( struct NineDevice9 *This, UINT Handle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9_CreateQuery( struct NineDevice9 *This, D3DQUERYTYPE Type, IDirect3DQuery9 **ppQuery ); diff --git a/src/gallium/state_trackers/nine/device9ex.c b/src/gallium/state_trackers/nine/device9ex.c index 11244b1bedf..fd4272468cb 100644 --- a/src/gallium/state_trackers/nine/device9ex.c +++ b/src/gallium/state_trackers/nine/device9ex.c @@ -62,7 +62,7 @@ NineDevice9Ex_dtor( struct NineDevice9Ex *This ) NineDevice9_dtor(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This, UINT width, UINT height, @@ -72,7 +72,7 @@ NineDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_ComposeRects( struct NineDevice9Ex *This, IDirect3DSurface9 *pSrc, IDirect3DSurface9 *pDst, @@ -86,7 +86,7 @@ NineDevice9Ex_ComposeRects( struct NineDevice9Ex *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_PresentEx( struct NineDevice9Ex *This, const RECT *pSourceRect, const RECT *pDestRect, @@ -111,28 +111,28 @@ NineDevice9Ex_PresentEx( struct NineDevice9Ex *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This, INT *pPriority ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_SetGPUThreadPriority( struct NineDevice9Ex *This, INT Priority ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_WaitForVBlank( struct NineDevice9Ex *This, UINT iSwapChain ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_CheckResourceResidency( struct NineDevice9Ex *This, IDirect3DResource9 **pResourceArray, UINT32 NumResources ) @@ -140,21 +140,21 @@ NineDevice9Ex_CheckResourceResidency( struct NineDevice9Ex *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_SetMaximumFrameLatency( struct NineDevice9Ex *This, UINT MaxLatency ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_GetMaximumFrameLatency( struct NineDevice9Ex *This, UINT *pMaxLatency ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This, HWND hDestinationWindow ) { @@ -173,7 +173,7 @@ NineDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This, UINT Width, UINT Height, @@ -188,7 +188,7 @@ NineDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This, UINT Width, UINT Height, @@ -201,7 +201,7 @@ NineDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This, UINT Width, UINT Height, @@ -216,7 +216,7 @@ NineDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_ResetEx( struct NineDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode ) @@ -241,7 +241,7 @@ NineDevice9Ex_ResetEx( struct NineDevice9Ex *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_Reset( struct NineDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters ) { @@ -267,7 +267,7 @@ NineDevice9Ex_Reset( struct NineDevice9Ex *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This, UINT iSwapChain, D3DDISPLAYMODEEX *pMode, @@ -284,7 +284,7 @@ NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This, return NineSwapChain9Ex_GetDisplayModeEx(swapchain, pMode, pRotation); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This ) { return D3D_OK; diff --git a/src/gallium/state_trackers/nine/device9ex.h b/src/gallium/state_trackers/nine/device9ex.h index 1c7e57e0974..3306f238b59 100644 --- a/src/gallium/state_trackers/nine/device9ex.h +++ b/src/gallium/state_trackers/nine/device9ex.h @@ -47,14 +47,14 @@ NineDevice9Ex_new( struct pipe_screen *pScreen, struct NineDevice9Ex **ppOut, int minorVersionNum ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This, UINT width, UINT height, float *rows, float *columns ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_ComposeRects( struct NineDevice9Ex *This, IDirect3DSurface9 *pSrc, IDirect3DSurface9 *pDst, @@ -65,7 +65,7 @@ NineDevice9Ex_ComposeRects( struct NineDevice9Ex *This, int Xoffset, int Yoffset ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_PresentEx( struct NineDevice9Ex *This, const RECT *pSourceRect, const RECT *pDestRect, @@ -73,43 +73,43 @@ NineDevice9Ex_PresentEx( struct NineDevice9Ex *This, const RGNDATA *pDirtyRegion, DWORD dwFlags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_Present( struct NineDevice9Ex *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This, INT *pPriority ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_SetGPUThreadPriority( struct NineDevice9Ex *This, INT Priority ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_WaitForVBlank( struct NineDevice9Ex *This, UINT iSwapChain ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_CheckResourceResidency( struct NineDevice9Ex *This, IDirect3DResource9 **pResourceArray, UINT32 NumResources ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_SetMaximumFrameLatency( struct NineDevice9Ex *This, UINT MaxLatency ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_GetMaximumFrameLatency( struct NineDevice9Ex *This, UINT *pMaxLatency ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This, HWND hDestinationWindow ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This, UINT Width, UINT Height, @@ -121,7 +121,7 @@ NineDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This, HANDLE *pSharedHandle, DWORD Usage ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This, UINT Width, UINT Height, @@ -131,7 +131,7 @@ NineDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This, HANDLE *pSharedHandle, DWORD Usage ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This, UINT Width, UINT Height, @@ -143,22 +143,22 @@ NineDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This, HANDLE *pSharedHandle, DWORD Usage ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_ResetEx( struct NineDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_Reset( struct NineDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This, UINT iSwapChain, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This ); #endif /* _NINE_DEVICE9EX_H_ */ diff --git a/src/gallium/state_trackers/nine/device9video.c b/src/gallium/state_trackers/nine/device9video.c index 65cc6a05c68..2e8e94a26e7 100644 --- a/src/gallium/state_trackers/nine/device9video.c +++ b/src/gallium/state_trackers/nine/device9video.c @@ -24,7 +24,7 @@ #define DBG_CHANNEL DBG_DEVICEVIDEO -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Video_GetContentProtectionCaps( struct NineDevice9Video *This, const GUID *pCryptoType, const GUID *pDecodeProfile, @@ -33,7 +33,7 @@ NineDevice9Video_GetContentProtectionCaps( struct NineDevice9Video *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Video_CreateAuthenticatedChannel( struct NineDevice9Video *This, D3DAUTHENTICATEDCHANNELTYPE ChannelType, IDirect3DAuthenticatedChannel9 **ppAuthenticatedChannel, @@ -42,7 +42,7 @@ NineDevice9Video_CreateAuthenticatedChannel( struct NineDevice9Video *This, STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Video_CreateCryptoSession( struct NineDevice9Video *This, const GUID *pCryptoType, const GUID *pDecodeProfile, diff --git a/src/gallium/state_trackers/nine/device9video.h b/src/gallium/state_trackers/nine/device9video.h index fc2faeb624a..d77fd5ec57a 100644 --- a/src/gallium/state_trackers/nine/device9video.h +++ b/src/gallium/state_trackers/nine/device9video.h @@ -35,19 +35,19 @@ NineDevice9Video( void *data ) return (struct NineDevice9Video *)data; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Video_GetContentProtectionCaps( struct NineDevice9Video *This, const GUID *pCryptoType, const GUID *pDecodeProfile, D3DCONTENTPROTECTIONCAPS *pCaps ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Video_CreateAuthenticatedChannel( struct NineDevice9Video *This, D3DAUTHENTICATEDCHANNELTYPE ChannelType, IDirect3DAuthenticatedChannel9 **ppAuthenticatedChannel, HANDLE *pChannelHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineDevice9Video_CreateCryptoSession( struct NineDevice9Video *This, const GUID *pCryptoType, const GUID *pDecodeProfile, diff --git a/src/gallium/state_trackers/nine/indexbuffer9.c b/src/gallium/state_trackers/nine/indexbuffer9.c index 401fe75e95f..0a31d7e9716 100644 --- a/src/gallium/state_trackers/nine/indexbuffer9.c +++ b/src/gallium/state_trackers/nine/indexbuffer9.c @@ -85,7 +85,7 @@ NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This ) return NineBuffer9_GetResource(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, @@ -95,13 +95,13 @@ NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This, return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineIndexBuffer9_Unlock( struct NineIndexBuffer9 *This ) { return NineBuffer9_Unlock(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineIndexBuffer9_GetDesc( struct NineIndexBuffer9 *This, D3DINDEXBUFFER_DESC *pDesc ) { diff --git a/src/gallium/state_trackers/nine/indexbuffer9.h b/src/gallium/state_trackers/nine/indexbuffer9.h index f3274b71224..4802105c952 100644 --- a/src/gallium/state_trackers/nine/indexbuffer9.h +++ b/src/gallium/state_trackers/nine/indexbuffer9.h @@ -70,17 +70,17 @@ struct pipe_resource * NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This ); /*** Direct3D public ***/ -HRESULT WINAPI +HRESULT NINE_WINAPI NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineIndexBuffer9_Unlock( struct NineIndexBuffer9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineIndexBuffer9_GetDesc( struct NineIndexBuffer9 *This, D3DINDEXBUFFER_DESC *pDesc ); diff --git a/src/gallium/state_trackers/nine/iunknown.c b/src/gallium/state_trackers/nine/iunknown.c index aaf17bfeda7..a28c8b2de8b 100644 --- a/src/gallium/state_trackers/nine/iunknown.c +++ b/src/gallium/state_trackers/nine/iunknown.c @@ -51,7 +51,7 @@ NineUnknown_dtor( struct NineUnknown *This ) FREE(This); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineUnknown_QueryInterface( struct NineUnknown *This, REFIID riid, void **ppvObject ) @@ -75,7 +75,7 @@ NineUnknown_QueryInterface( struct NineUnknown *This, return E_NOINTERFACE; } -ULONG WINAPI +ULONG NINE_WINAPI NineUnknown_AddRef( struct NineUnknown *This ) { ULONG r; @@ -94,7 +94,7 @@ NineUnknown_AddRef( struct NineUnknown *This ) return r; } -ULONG WINAPI +ULONG NINE_WINAPI NineUnknown_Release( struct NineUnknown *This ) { if (This->forward) @@ -117,7 +117,7 @@ NineUnknown_Release( struct NineUnknown *This ) return r; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineUnknown_GetDevice( struct NineUnknown *This, IDirect3DDevice9 **ppDevice ) { diff --git a/src/gallium/state_trackers/nine/iunknown.h b/src/gallium/state_trackers/nine/iunknown.h index 628d984553e..b8de6be9663 100644 --- a/src/gallium/state_trackers/nine/iunknown.h +++ b/src/gallium/state_trackers/nine/iunknown.h @@ -28,6 +28,7 @@ #include "util/u_memory.h" #include "guid.h" +#include "nine_flags.h" #include "nine_debug.h" #include "nine_quirk.h" @@ -77,18 +78,18 @@ NineUnknown_dtor( struct NineUnknown *This ); /*** Direct3D public methods ***/ -HRESULT WINAPI +HRESULT NINE_WINAPI NineUnknown_QueryInterface( struct NineUnknown *This, REFIID riid, void **ppvObject ); -ULONG WINAPI +ULONG NINE_WINAPI NineUnknown_AddRef( struct NineUnknown *This ); -ULONG WINAPI +ULONG NINE_WINAPI NineUnknown_Release( struct NineUnknown *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineUnknown_GetDevice( struct NineUnknown *This, IDirect3DDevice9 **ppDevice ); diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c index a5466a7bdd4..d5daabd6661 100644 --- a/src/gallium/state_trackers/nine/nine_ff.c +++ b/src/gallium/state_trackers/nine/nine_ff.c @@ -24,8 +24,6 @@ #include "util/u_hash_table.h" #include "util/u_upload_mgr.h" -#define NINE_TGSI_LAZY_DEVS 1 - #define DBG_CHANNEL DBG_FF #define NINE_FF_NUM_VS_CONST 256 @@ -319,15 +317,11 @@ ureg_normalize3(struct ureg_program *ureg, struct ureg_dst dst, struct ureg_src src, struct ureg_dst tmp) { -#ifdef NINE_TGSI_LAZY_DEVS struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); ureg_DP3(ureg, tmp_x, src, src); ureg_RSQ(ureg, tmp_x, _X(tmp)); ureg_MUL(ureg, dst, src, _X(tmp)); -#else - ureg_NRM(ureg, dst, src); -#endif } static void * @@ -549,34 +543,22 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) */ if (key->vertexpointsize) { struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); -#ifdef NINE_TGSI_LAZY_DEVS - struct ureg_dst tmp_clamp = ureg_DECL_temporary(ureg); - - ureg_MAX(ureg, tmp_clamp, vs->aPsz, _XXXX(cPsz1)); - ureg_MIN(ureg, oPsz, ureg_src(tmp_clamp), _YYYY(cPsz1)); - ureg_release_temporary(ureg, tmp_clamp); -#else - ureg_CLAMP(ureg, oPsz, vs->aPsz, _XXXX(cPsz1), _YYYY(cPsz1)); -#endif + ureg_MAX(ureg, tmp_x, _XXXX(vs->aPsz), _XXXX(cPsz1)); + ureg_MIN(ureg, oPsz, _X(tmp), _YYYY(cPsz1)); } else if (key->pointscale) { struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27); ureg_DP3(ureg, tmp_x, ureg_src(r[1]), ureg_src(r[1])); - ureg_SQRT(ureg, tmp_y, _X(tmp)); + ureg_RSQ(ureg, tmp_y, _X(tmp)); + ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp)); + ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f)); ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2)); ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1)); ureg_RCP(ureg, tmp_x, ureg_src(tmp)); ureg_MUL(ureg, tmp_x, ureg_src(tmp), _ZZZZ(cPsz1)); -#ifdef NINE_TGSI_LAZY_DEVS - struct ureg_dst tmp_clamp = ureg_DECL_temporary(ureg); - - ureg_MAX(ureg, tmp_clamp, _X(tmp), _XXXX(cPsz1)); - ureg_MIN(ureg, oPsz, ureg_src(tmp_clamp), _YYYY(cPsz1)); - ureg_release_temporary(ureg, tmp_clamp); -#else - ureg_CLAMP(ureg, oPsz, _X(tmp), _XXXX(cPsz1), _YYYY(cPsz1)); -#endif + ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1)); + ureg_MIN(ureg, oPsz, _X(tmp), _YYYY(cPsz1)); } for (i = 0; i < 8; ++i) { diff --git a/src/gallium/state_trackers/nine/nine_flags.h b/src/gallium/state_trackers/nine/nine_flags.h new file mode 100644 index 00000000000..61686a7f60f --- /dev/null +++ b/src/gallium/state_trackers/nine/nine_flags.h @@ -0,0 +1,14 @@ + + +#ifndef _NINE_FLAGS_H_ +#define _NINE_FLAGS_H_ + +#include "pipe/p_compiler.h" + +/* Incoming 32 bits calls are 4-byte aligned. + * We need to realign them to be able to use + * SSE and to work with other libraries (llvm, etc) + */ +#define NINE_WINAPI WINAPI PIPE_ALIGN_STACK + +#endif /* _NINE_FLAGS_H_ */
\ No newline at end of file diff --git a/src/gallium/state_trackers/nine/nine_lock.c b/src/gallium/state_trackers/nine/nine_lock.c index 42cbb0589c4..6e15cc190e8 100644 --- a/src/gallium/state_trackers/nine/nine_lock.c +++ b/src/gallium/state_trackers/nine/nine_lock.c @@ -49,7 +49,7 @@ /* Global mutex as described by MSDN */ pipe_static_mutex(d3dlock_global); -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockAuthenticatedChannel9_GetCertificateSize( struct NineAuthenticatedChannel9 *This, UINT *pCertificateSize ) { @@ -60,7 +60,7 @@ LockAuthenticatedChannel9_GetCertificateSize( struct NineAuthenticatedChannel9 * return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockAuthenticatedChannel9_GetCertificate( struct NineAuthenticatedChannel9 *This, UINT CertifacteSize, BYTE *ppCertificate ) @@ -72,7 +72,7 @@ LockAuthenticatedChannel9_GetCertificate( struct NineAuthenticatedChannel9 *This return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockAuthenticatedChannel9_NegotiateKeyExchange( struct NineAuthenticatedChannel9 *This, UINT DataSize, void *pData ) @@ -84,7 +84,7 @@ LockAuthenticatedChannel9_NegotiateKeyExchange( struct NineAuthenticatedChannel9 return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockAuthenticatedChannel9_Query( struct NineAuthenticatedChannel9 *This, UINT InputSize, const void *pInput, @@ -98,7 +98,7 @@ LockAuthenticatedChannel9_Query( struct NineAuthenticatedChannel9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockAuthenticatedChannel9_Configure( struct NineAuthenticatedChannel9 *This, UINT InputSize, const void *pInput, @@ -123,7 +123,7 @@ IDirect3DAuthenticatedChannel9Vtbl LockAuthenticatedChannel9_vtable = { }; #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockResource9_GetDevice( struct NineResource9 *This, IDirect3DDevice9 **ppDevice ) { @@ -135,7 +135,7 @@ LockResource9_GetDevice( struct NineResource9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockResource9_SetPrivateData( struct NineResource9 *This, REFGUID refguid, const void *pData, @@ -149,7 +149,7 @@ LockResource9_SetPrivateData( struct NineResource9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockResource9_GetPrivateData( struct NineResource9 *This, REFGUID refguid, void *pData, @@ -162,7 +162,7 @@ LockResource9_GetPrivateData( struct NineResource9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockResource9_FreePrivateData( struct NineResource9 *This, REFGUID refguid ) { @@ -173,7 +173,7 @@ LockResource9_FreePrivateData( struct NineResource9 *This, return r; } -static DWORD WINAPI +static DWORD NINE_WINAPI LockResource9_SetPriority( struct NineResource9 *This, DWORD PriorityNew ) { @@ -184,7 +184,7 @@ LockResource9_SetPriority( struct NineResource9 *This, return r; } -static DWORD WINAPI +static DWORD NINE_WINAPI LockResource9_GetPriority( struct NineResource9 *This ) { DWORD r; @@ -195,7 +195,7 @@ LockResource9_GetPriority( struct NineResource9 *This ) } #if 0 -static void WINAPI +static void NINE_WINAPI LockResource9_PreLoad( struct NineResource9 *This ) { pipe_mutex_lock(d3dlock_global); @@ -205,7 +205,7 @@ LockResource9_PreLoad( struct NineResource9 *This ) #endif #if 0 -static D3DRESOURCETYPE WINAPI +static D3DRESOURCETYPE NINE_WINAPI LockResource9_GetType( struct NineResource9 *This ) { D3DRESOURCETYPE r; @@ -216,7 +216,7 @@ LockResource9_GetType( struct NineResource9 *This ) } #endif -static DWORD WINAPI +static DWORD NINE_WINAPI LockBaseTexture9_SetLOD( struct NineBaseTexture9 *This, DWORD LODNew ) { @@ -227,7 +227,7 @@ LockBaseTexture9_SetLOD( struct NineBaseTexture9 *This, return r; } -static DWORD WINAPI +static DWORD NINE_WINAPI LockBaseTexture9_GetLOD( struct NineBaseTexture9 *This ) { DWORD r; @@ -237,7 +237,7 @@ LockBaseTexture9_GetLOD( struct NineBaseTexture9 *This ) return r; } -static DWORD WINAPI +static DWORD NINE_WINAPI LockBaseTexture9_GetLevelCount( struct NineBaseTexture9 *This ) { DWORD r; @@ -247,7 +247,7 @@ LockBaseTexture9_GetLevelCount( struct NineBaseTexture9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This, D3DTEXTUREFILTERTYPE FilterType ) { @@ -258,7 +258,7 @@ LockBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This, return r; } -static D3DTEXTUREFILTERTYPE WINAPI +static D3DTEXTUREFILTERTYPE NINE_WINAPI LockBaseTexture9_GetAutoGenFilterType( struct NineBaseTexture9 *This ) { D3DTEXTUREFILTERTYPE r; @@ -268,7 +268,7 @@ LockBaseTexture9_GetAutoGenFilterType( struct NineBaseTexture9 *This ) return r; } -static void WINAPI +static void NINE_WINAPI LockBaseTexture9_PreLoad( struct NineBaseTexture9 *This ) { pipe_mutex_lock(d3dlock_global); @@ -276,7 +276,7 @@ LockBaseTexture9_PreLoad( struct NineBaseTexture9 *This ) pipe_mutex_unlock(d3dlock_global); } -static void WINAPI +static void NINE_WINAPI LockBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ) { pipe_mutex_lock(d3dlock_global); @@ -284,7 +284,7 @@ LockBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ) pipe_mutex_unlock(d3dlock_global); } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCryptoSession9_GetCertificateSize( struct NineCryptoSession9 *This, UINT *pCertificateSize ) { @@ -295,7 +295,7 @@ LockCryptoSession9_GetCertificateSize( struct NineCryptoSession9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCryptoSession9_GetCertificate( struct NineCryptoSession9 *This, UINT CertifacteSize, BYTE *ppCertificate ) @@ -307,7 +307,7 @@ LockCryptoSession9_GetCertificate( struct NineCryptoSession9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCryptoSession9_NegotiateKeyExchange( struct NineCryptoSession9 *This, UINT DataSize, void *pData ) @@ -319,7 +319,7 @@ LockCryptoSession9_NegotiateKeyExchange( struct NineCryptoSession9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCryptoSession9_EncryptionBlt( struct NineCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, @@ -333,7 +333,7 @@ LockCryptoSession9_EncryptionBlt( struct NineCryptoSession9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, @@ -349,7 +349,7 @@ LockCryptoSession9_DecryptionBlt( struct NineCryptoSession9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCryptoSession9_GetSurfacePitch( struct NineCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, UINT *pSurfacePitch ) @@ -361,7 +361,7 @@ LockCryptoSession9_GetSurfacePitch( struct NineCryptoSession9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCryptoSession9_StartSessionKeyRefresh( struct NineCryptoSession9 *This, void *pRandomNumber, UINT RandomNumberSize ) @@ -373,7 +373,7 @@ LockCryptoSession9_StartSessionKeyRefresh( struct NineCryptoSession9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCryptoSession9_FinishSessionKeyRefresh( struct NineCryptoSession9 *This ) { HRESULT r; @@ -383,7 +383,7 @@ LockCryptoSession9_FinishSessionKeyRefresh( struct NineCryptoSession9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCryptoSession9_GetEncryptionBltKey( struct NineCryptoSession9 *This, void *pReadbackKey, UINT KeySize ) @@ -411,7 +411,7 @@ IDirect3DCryptoSession9Vtbl LockCryptoSession9_vtable = { }; #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCubeTexture9_GetLevelDesc( struct NineCubeTexture9 *This, UINT Level, D3DSURFACE_DESC *pDesc ) @@ -425,7 +425,7 @@ LockCubeTexture9_GetLevelDesc( struct NineCubeTexture9 *This, #endif #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCubeTexture9_GetCubeMapSurface( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level, @@ -439,7 +439,7 @@ LockCubeTexture9_GetCubeMapSurface( struct NineCubeTexture9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCubeTexture9_LockRect( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level, @@ -454,7 +454,7 @@ LockCubeTexture9_LockRect( struct NineCubeTexture9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCubeTexture9_UnlockRect( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level ) @@ -466,7 +466,7 @@ LockCubeTexture9_UnlockRect( struct NineCubeTexture9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockCubeTexture9_AddDirtyRect( struct NineCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, const RECT *pDirtyRect ) @@ -503,7 +503,7 @@ IDirect3DCubeTexture9Vtbl LockCubeTexture9_vtable = { (void *)LockCubeTexture9_AddDirtyRect }; -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_TestCooperativeLevel( struct NineDevice9 *This ) { HRESULT r; @@ -513,7 +513,7 @@ LockDevice9_TestCooperativeLevel( struct NineDevice9 *This ) return r; } -static UINT WINAPI +static UINT NINE_WINAPI LockDevice9_GetAvailableTextureMem( struct NineDevice9 *This ) { UINT r; @@ -523,7 +523,7 @@ LockDevice9_GetAvailableTextureMem( struct NineDevice9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_EvictManagedResources( struct NineDevice9 *This ) { HRESULT r; @@ -533,7 +533,7 @@ LockDevice9_EvictManagedResources( struct NineDevice9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetDirect3D( struct NineDevice9 *This, IDirect3D9 **ppD3D9 ) { @@ -545,7 +545,7 @@ LockDevice9_GetDirect3D( struct NineDevice9 *This, } #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetDeviceCaps( struct NineDevice9 *This, D3DCAPS9 *pCaps ) { @@ -557,7 +557,7 @@ LockDevice9_GetDeviceCaps( struct NineDevice9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetDisplayMode( struct NineDevice9 *This, UINT iSwapChain, D3DDISPLAYMODE *pMode ) @@ -570,7 +570,7 @@ LockDevice9_GetDisplayMode( struct NineDevice9 *This, } #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetCreationParameters( struct NineDevice9 *This, D3DDEVICE_CREATION_PARAMETERS *pParameters ) { @@ -582,7 +582,7 @@ LockDevice9_GetCreationParameters( struct NineDevice9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetCursorProperties( struct NineDevice9 *This, UINT XHotSpot, UINT YHotSpot, @@ -595,7 +595,7 @@ LockDevice9_SetCursorProperties( struct NineDevice9 *This, return r; } -static void WINAPI +static void NINE_WINAPI LockDevice9_SetCursorPosition( struct NineDevice9 *This, int X, int Y, @@ -606,7 +606,7 @@ LockDevice9_SetCursorPosition( struct NineDevice9 *This, pipe_mutex_unlock(d3dlock_global); } -static BOOL WINAPI +static BOOL NINE_WINAPI LockDevice9_ShowCursor( struct NineDevice9 *This, BOOL bShow ) { @@ -617,7 +617,7 @@ LockDevice9_ShowCursor( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DSwapChain9 **pSwapChain ) @@ -629,7 +629,7 @@ LockDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetSwapChain( struct NineDevice9 *This, UINT iSwapChain, IDirect3DSwapChain9 **pSwapChain ) @@ -641,7 +641,7 @@ LockDevice9_GetSwapChain( struct NineDevice9 *This, return r; } -static UINT WINAPI +static UINT NINE_WINAPI LockDevice9_GetNumberOfSwapChains( struct NineDevice9 *This ) { UINT r; @@ -651,7 +651,7 @@ LockDevice9_GetNumberOfSwapChains( struct NineDevice9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_Reset( struct NineDevice9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters ) { @@ -662,7 +662,7 @@ LockDevice9_Reset( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_Present( struct NineDevice9 *This, const RECT *pSourceRect, const RECT *pDestRect, @@ -676,7 +676,7 @@ LockDevice9_Present( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetBackBuffer( struct NineDevice9 *This, UINT iSwapChain, UINT iBackBuffer, @@ -690,7 +690,7 @@ LockDevice9_GetBackBuffer( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetRasterStatus( struct NineDevice9 *This, UINT iSwapChain, D3DRASTER_STATUS *pRasterStatus ) @@ -702,7 +702,7 @@ LockDevice9_GetRasterStatus( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetDialogBoxMode( struct NineDevice9 *This, BOOL bEnableDialogs ) { @@ -713,7 +713,7 @@ LockDevice9_SetDialogBoxMode( struct NineDevice9 *This, return r; } -static void WINAPI +static void NINE_WINAPI LockDevice9_SetGammaRamp( struct NineDevice9 *This, UINT iSwapChain, DWORD Flags, @@ -724,7 +724,7 @@ LockDevice9_SetGammaRamp( struct NineDevice9 *This, pipe_mutex_unlock(d3dlock_global); } -static void WINAPI +static void NINE_WINAPI LockDevice9_GetGammaRamp( struct NineDevice9 *This, UINT iSwapChain, D3DGAMMARAMP *pRamp ) @@ -734,7 +734,7 @@ LockDevice9_GetGammaRamp( struct NineDevice9 *This, pipe_mutex_unlock(d3dlock_global); } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateTexture( struct NineDevice9 *This, UINT Width, UINT Height, @@ -752,7 +752,7 @@ LockDevice9_CreateTexture( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateVolumeTexture( struct NineDevice9 *This, UINT Width, UINT Height, @@ -771,7 +771,7 @@ LockDevice9_CreateVolumeTexture( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateCubeTexture( struct NineDevice9 *This, UINT EdgeLength, UINT Levels, @@ -788,7 +788,7 @@ LockDevice9_CreateCubeTexture( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateVertexBuffer( struct NineDevice9 *This, UINT Length, DWORD Usage, @@ -804,7 +804,7 @@ LockDevice9_CreateVertexBuffer( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateIndexBuffer( struct NineDevice9 *This, UINT Length, DWORD Usage, @@ -820,7 +820,7 @@ LockDevice9_CreateIndexBuffer( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateRenderTarget( struct NineDevice9 *This, UINT Width, UINT Height, @@ -838,7 +838,7 @@ LockDevice9_CreateRenderTarget( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateDepthStencilSurface( struct NineDevice9 *This, UINT Width, UINT Height, @@ -856,7 +856,7 @@ LockDevice9_CreateDepthStencilSurface( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_UpdateSurface( struct NineDevice9 *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, @@ -870,7 +870,7 @@ LockDevice9_UpdateSurface( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_UpdateTexture( struct NineDevice9 *This, IDirect3DBaseTexture9 *pSourceTexture, IDirect3DBaseTexture9 *pDestinationTexture ) @@ -882,7 +882,7 @@ LockDevice9_UpdateTexture( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetRenderTargetData( struct NineDevice9 *This, IDirect3DSurface9 *pRenderTarget, IDirect3DSurface9 *pDestSurface ) @@ -894,7 +894,7 @@ LockDevice9_GetRenderTargetData( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetFrontBufferData( struct NineDevice9 *This, UINT iSwapChain, IDirect3DSurface9 *pDestSurface ) @@ -906,7 +906,7 @@ LockDevice9_GetFrontBufferData( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_StretchRect( struct NineDevice9 *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, @@ -921,7 +921,7 @@ LockDevice9_StretchRect( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_ColorFill( struct NineDevice9 *This, IDirect3DSurface9 *pSurface, const RECT *pRect, @@ -934,7 +934,7 @@ LockDevice9_ColorFill( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This, UINT Width, UINT Height, @@ -950,7 +950,7 @@ LockDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetRenderTarget( struct NineDevice9 *This, DWORD RenderTargetIndex, IDirect3DSurface9 *pRenderTarget ) @@ -962,7 +962,7 @@ LockDevice9_SetRenderTarget( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetRenderTarget( struct NineDevice9 *This, DWORD RenderTargetIndex, IDirect3DSurface9 **ppRenderTarget ) @@ -974,7 +974,7 @@ LockDevice9_GetRenderTarget( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetDepthStencilSurface( struct NineDevice9 *This, IDirect3DSurface9 *pNewZStencil ) { @@ -985,7 +985,7 @@ LockDevice9_SetDepthStencilSurface( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetDepthStencilSurface( struct NineDevice9 *This, IDirect3DSurface9 **ppZStencilSurface ) { @@ -996,7 +996,7 @@ LockDevice9_GetDepthStencilSurface( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_BeginScene( struct NineDevice9 *This ) { HRESULT r; @@ -1006,7 +1006,7 @@ LockDevice9_BeginScene( struct NineDevice9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_EndScene( struct NineDevice9 *This ) { HRESULT r; @@ -1016,7 +1016,7 @@ LockDevice9_EndScene( struct NineDevice9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_Clear( struct NineDevice9 *This, DWORD Count, const D3DRECT *pRects, @@ -1032,7 +1032,7 @@ LockDevice9_Clear( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetTransform( struct NineDevice9 *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix ) @@ -1044,7 +1044,7 @@ LockDevice9_SetTransform( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetTransform( struct NineDevice9 *This, D3DTRANSFORMSTATETYPE State, D3DMATRIX *pMatrix ) @@ -1056,7 +1056,7 @@ LockDevice9_GetTransform( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_MultiplyTransform( struct NineDevice9 *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix ) @@ -1068,7 +1068,7 @@ LockDevice9_MultiplyTransform( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetViewport( struct NineDevice9 *This, const D3DVIEWPORT9 *pViewport ) { @@ -1079,7 +1079,7 @@ LockDevice9_SetViewport( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetViewport( struct NineDevice9 *This, D3DVIEWPORT9 *pViewport ) { @@ -1090,7 +1090,7 @@ LockDevice9_GetViewport( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetMaterial( struct NineDevice9 *This, const D3DMATERIAL9 *pMaterial ) { @@ -1101,7 +1101,7 @@ LockDevice9_SetMaterial( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetMaterial( struct NineDevice9 *This, D3DMATERIAL9 *pMaterial ) { @@ -1112,7 +1112,7 @@ LockDevice9_GetMaterial( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetLight( struct NineDevice9 *This, DWORD Index, const D3DLIGHT9 *pLight ) @@ -1124,7 +1124,7 @@ LockDevice9_SetLight( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetLight( struct NineDevice9 *This, DWORD Index, D3DLIGHT9 *pLight ) @@ -1136,7 +1136,7 @@ LockDevice9_GetLight( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_LightEnable( struct NineDevice9 *This, DWORD Index, BOOL Enable ) @@ -1148,7 +1148,7 @@ LockDevice9_LightEnable( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetLightEnable( struct NineDevice9 *This, DWORD Index, BOOL *pEnable ) @@ -1160,7 +1160,7 @@ LockDevice9_GetLightEnable( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetClipPlane( struct NineDevice9 *This, DWORD Index, const float *pPlane ) @@ -1172,7 +1172,7 @@ LockDevice9_SetClipPlane( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetClipPlane( struct NineDevice9 *This, DWORD Index, float *pPlane ) @@ -1184,7 +1184,7 @@ LockDevice9_GetClipPlane( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetRenderState( struct NineDevice9 *This, D3DRENDERSTATETYPE State, DWORD Value ) @@ -1196,7 +1196,7 @@ LockDevice9_SetRenderState( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetRenderState( struct NineDevice9 *This, D3DRENDERSTATETYPE State, DWORD *pValue ) @@ -1208,7 +1208,7 @@ LockDevice9_GetRenderState( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateStateBlock( struct NineDevice9 *This, D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9 **ppSB ) @@ -1220,7 +1220,7 @@ LockDevice9_CreateStateBlock( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_BeginStateBlock( struct NineDevice9 *This ) { HRESULT r; @@ -1230,7 +1230,7 @@ LockDevice9_BeginStateBlock( struct NineDevice9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_EndStateBlock( struct NineDevice9 *This, IDirect3DStateBlock9 **ppSB ) { @@ -1241,7 +1241,7 @@ LockDevice9_EndStateBlock( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetClipStatus( struct NineDevice9 *This, const D3DCLIPSTATUS9 *pClipStatus ) { @@ -1252,7 +1252,7 @@ LockDevice9_SetClipStatus( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetClipStatus( struct NineDevice9 *This, D3DCLIPSTATUS9 *pClipStatus ) { @@ -1263,7 +1263,7 @@ LockDevice9_GetClipStatus( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetTexture( struct NineDevice9 *This, DWORD Stage, IDirect3DBaseTexture9 **ppTexture ) @@ -1275,7 +1275,7 @@ LockDevice9_GetTexture( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetTexture( struct NineDevice9 *This, DWORD Stage, IDirect3DBaseTexture9 *pTexture ) @@ -1287,7 +1287,7 @@ LockDevice9_SetTexture( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetTextureStageState( struct NineDevice9 *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, @@ -1300,7 +1300,7 @@ LockDevice9_GetTextureStageState( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetTextureStageState( struct NineDevice9 *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, @@ -1313,7 +1313,7 @@ LockDevice9_SetTextureStageState( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetSamplerState( struct NineDevice9 *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, @@ -1326,7 +1326,7 @@ LockDevice9_GetSamplerState( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetSamplerState( struct NineDevice9 *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, @@ -1339,7 +1339,7 @@ LockDevice9_SetSamplerState( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_ValidateDevice( struct NineDevice9 *This, DWORD *pNumPasses ) { @@ -1350,7 +1350,7 @@ LockDevice9_ValidateDevice( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetPaletteEntries( struct NineDevice9 *This, UINT PaletteNumber, const PALETTEENTRY *pEntries ) @@ -1362,7 +1362,7 @@ LockDevice9_SetPaletteEntries( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetPaletteEntries( struct NineDevice9 *This, UINT PaletteNumber, PALETTEENTRY *pEntries ) @@ -1374,7 +1374,7 @@ LockDevice9_GetPaletteEntries( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetCurrentTexturePalette( struct NineDevice9 *This, UINT PaletteNumber ) { @@ -1385,7 +1385,7 @@ LockDevice9_SetCurrentTexturePalette( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetCurrentTexturePalette( struct NineDevice9 *This, UINT *PaletteNumber ) { @@ -1396,7 +1396,7 @@ LockDevice9_GetCurrentTexturePalette( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetScissorRect( struct NineDevice9 *This, const RECT *pRect ) { @@ -1407,7 +1407,7 @@ LockDevice9_SetScissorRect( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetScissorRect( struct NineDevice9 *This, RECT *pRect ) { @@ -1418,7 +1418,7 @@ LockDevice9_GetScissorRect( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This, BOOL bSoftware ) { @@ -1429,7 +1429,7 @@ LockDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This, return r; } -static BOOL WINAPI +static BOOL NINE_WINAPI LockDevice9_GetSoftwareVertexProcessing( struct NineDevice9 *This ) { BOOL r; @@ -1439,7 +1439,7 @@ LockDevice9_GetSoftwareVertexProcessing( struct NineDevice9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetNPatchMode( struct NineDevice9 *This, float nSegments ) { @@ -1450,7 +1450,7 @@ LockDevice9_SetNPatchMode( struct NineDevice9 *This, return r; } -static float WINAPI +static float NINE_WINAPI LockDevice9_GetNPatchMode( struct NineDevice9 *This ) { float r; @@ -1460,7 +1460,7 @@ LockDevice9_GetNPatchMode( struct NineDevice9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_DrawPrimitive( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT StartVertex, @@ -1473,7 +1473,7 @@ LockDevice9_DrawPrimitive( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_DrawIndexedPrimitive( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, @@ -1489,7 +1489,7 @@ LockDevice9_DrawIndexedPrimitive( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_DrawPrimitiveUP( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, @@ -1503,7 +1503,7 @@ LockDevice9_DrawPrimitiveUP( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT MinVertexIndex, @@ -1521,7 +1521,7 @@ LockDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_ProcessVertices( struct NineDevice9 *This, UINT SrcStartIndex, UINT DestIndex, @@ -1537,7 +1537,7 @@ LockDevice9_ProcessVertices( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateVertexDeclaration( struct NineDevice9 *This, const D3DVERTEXELEMENT9 *pVertexElements, IDirect3DVertexDeclaration9 **ppDecl ) @@ -1549,7 +1549,7 @@ LockDevice9_CreateVertexDeclaration( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetVertexDeclaration( struct NineDevice9 *This, IDirect3DVertexDeclaration9 *pDecl ) { @@ -1560,7 +1560,7 @@ LockDevice9_SetVertexDeclaration( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetVertexDeclaration( struct NineDevice9 *This, IDirect3DVertexDeclaration9 **ppDecl ) { @@ -1571,7 +1571,7 @@ LockDevice9_GetVertexDeclaration( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetFVF( struct NineDevice9 *This, DWORD FVF ) { @@ -1582,7 +1582,7 @@ LockDevice9_SetFVF( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetFVF( struct NineDevice9 *This, DWORD *pFVF ) { @@ -1593,7 +1593,7 @@ LockDevice9_GetFVF( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateVertexShader( struct NineDevice9 *This, const DWORD *pFunction, IDirect3DVertexShader9 **ppShader ) @@ -1605,7 +1605,7 @@ LockDevice9_CreateVertexShader( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetVertexShader( struct NineDevice9 *This, IDirect3DVertexShader9 *pShader ) { @@ -1616,7 +1616,7 @@ LockDevice9_SetVertexShader( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetVertexShader( struct NineDevice9 *This, IDirect3DVertexShader9 **ppShader ) { @@ -1627,7 +1627,7 @@ LockDevice9_GetVertexShader( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, UINT StartRegister, const float *pConstantData, @@ -1640,7 +1640,7 @@ LockDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetVertexShaderConstantF( struct NineDevice9 *This, UINT StartRegister, float *pConstantData, @@ -1653,7 +1653,7 @@ LockDevice9_GetVertexShaderConstantF( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetVertexShaderConstantI( struct NineDevice9 *This, UINT StartRegister, const int *pConstantData, @@ -1666,7 +1666,7 @@ LockDevice9_SetVertexShaderConstantI( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetVertexShaderConstantI( struct NineDevice9 *This, UINT StartRegister, int *pConstantData, @@ -1679,7 +1679,7 @@ LockDevice9_GetVertexShaderConstantI( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetVertexShaderConstantB( struct NineDevice9 *This, UINT StartRegister, const BOOL *pConstantData, @@ -1692,7 +1692,7 @@ LockDevice9_SetVertexShaderConstantB( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetVertexShaderConstantB( struct NineDevice9 *This, UINT StartRegister, BOOL *pConstantData, @@ -1705,7 +1705,7 @@ LockDevice9_GetVertexShaderConstantB( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetStreamSource( struct NineDevice9 *This, UINT StreamNumber, IDirect3DVertexBuffer9 *pStreamData, @@ -1719,7 +1719,7 @@ LockDevice9_SetStreamSource( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetStreamSource( struct NineDevice9 *This, UINT StreamNumber, IDirect3DVertexBuffer9 **ppStreamData, @@ -1733,7 +1733,7 @@ LockDevice9_GetStreamSource( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetStreamSourceFreq( struct NineDevice9 *This, UINT StreamNumber, UINT Setting ) @@ -1745,7 +1745,7 @@ LockDevice9_SetStreamSourceFreq( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetStreamSourceFreq( struct NineDevice9 *This, UINT StreamNumber, UINT *pSetting ) @@ -1757,7 +1757,7 @@ LockDevice9_GetStreamSourceFreq( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetIndices( struct NineDevice9 *This, IDirect3DIndexBuffer9 *pIndexData ) { @@ -1768,7 +1768,7 @@ LockDevice9_SetIndices( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetIndices( struct NineDevice9 *This, IDirect3DIndexBuffer9 **ppIndexData ) { @@ -1779,7 +1779,7 @@ LockDevice9_GetIndices( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreatePixelShader( struct NineDevice9 *This, const DWORD *pFunction, IDirect3DPixelShader9 **ppShader ) @@ -1791,7 +1791,7 @@ LockDevice9_CreatePixelShader( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetPixelShader( struct NineDevice9 *This, IDirect3DPixelShader9 *pShader ) { @@ -1802,7 +1802,7 @@ LockDevice9_SetPixelShader( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetPixelShader( struct NineDevice9 *This, IDirect3DPixelShader9 **ppShader ) { @@ -1813,7 +1813,7 @@ LockDevice9_GetPixelShader( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetPixelShaderConstantF( struct NineDevice9 *This, UINT StartRegister, const float *pConstantData, @@ -1826,7 +1826,7 @@ LockDevice9_SetPixelShaderConstantF( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetPixelShaderConstantF( struct NineDevice9 *This, UINT StartRegister, float *pConstantData, @@ -1839,7 +1839,7 @@ LockDevice9_GetPixelShaderConstantF( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetPixelShaderConstantI( struct NineDevice9 *This, UINT StartRegister, const int *pConstantData, @@ -1852,7 +1852,7 @@ LockDevice9_SetPixelShaderConstantI( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetPixelShaderConstantI( struct NineDevice9 *This, UINT StartRegister, int *pConstantData, @@ -1865,7 +1865,7 @@ LockDevice9_GetPixelShaderConstantI( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_SetPixelShaderConstantB( struct NineDevice9 *This, UINT StartRegister, const BOOL *pConstantData, @@ -1878,7 +1878,7 @@ LockDevice9_SetPixelShaderConstantB( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_GetPixelShaderConstantB( struct NineDevice9 *This, UINT StartRegister, BOOL *pConstantData, @@ -1891,7 +1891,7 @@ LockDevice9_GetPixelShaderConstantB( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_DrawRectPatch( struct NineDevice9 *This, UINT Handle, const float *pNumSegs, @@ -1904,7 +1904,7 @@ LockDevice9_DrawRectPatch( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_DrawTriPatch( struct NineDevice9 *This, UINT Handle, const float *pNumSegs, @@ -1917,7 +1917,7 @@ LockDevice9_DrawTriPatch( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_DeletePatch( struct NineDevice9 *This, UINT Handle ) { @@ -1928,7 +1928,7 @@ LockDevice9_DeletePatch( struct NineDevice9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9_CreateQuery( struct NineDevice9 *This, D3DQUERYTYPE Type, IDirect3DQuery9 **ppQuery ) @@ -2062,7 +2062,7 @@ IDirect3DDevice9Vtbl LockDevice9_vtable = { (void *)LockDevice9_CreateQuery }; -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This, UINT width, UINT height, @@ -2076,7 +2076,7 @@ LockDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_ComposeRects( struct NineDevice9Ex *This, IDirect3DSurface9 *pSrc, IDirect3DSurface9 *pDst, @@ -2094,7 +2094,7 @@ LockDevice9Ex_ComposeRects( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_PresentEx( struct NineDevice9Ex *This, const RECT *pSourceRect, const RECT *pDestRect, @@ -2109,7 +2109,7 @@ LockDevice9Ex_PresentEx( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This, INT *pPriority ) { @@ -2120,7 +2120,7 @@ LockDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_SetGPUThreadPriority( struct NineDevice9Ex *This, INT Priority ) { @@ -2131,7 +2131,7 @@ LockDevice9Ex_SetGPUThreadPriority( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_WaitForVBlank( struct NineDevice9Ex *This, UINT iSwapChain ) { @@ -2142,7 +2142,7 @@ LockDevice9Ex_WaitForVBlank( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_CheckResourceResidency( struct NineDevice9Ex *This, IDirect3DResource9 **pResourceArray, UINT32 NumResources ) @@ -2154,7 +2154,7 @@ LockDevice9Ex_CheckResourceResidency( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_SetMaximumFrameLatency( struct NineDevice9Ex *This, UINT MaxLatency ) { @@ -2165,7 +2165,7 @@ LockDevice9Ex_SetMaximumFrameLatency( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_GetMaximumFrameLatency( struct NineDevice9Ex *This, UINT *pMaxLatency ) { @@ -2176,7 +2176,7 @@ LockDevice9Ex_GetMaximumFrameLatency( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This, HWND hDestinationWindow ) { @@ -2187,7 +2187,7 @@ LockDevice9Ex_CheckDeviceState( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This, UINT Width, UINT Height, @@ -2206,7 +2206,7 @@ LockDevice9Ex_CreateRenderTargetEx( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This, UINT Width, UINT Height, @@ -2223,7 +2223,7 @@ LockDevice9Ex_CreateOffscreenPlainSurfaceEx( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This, UINT Width, UINT Height, @@ -2242,7 +2242,7 @@ LockDevice9Ex_CreateDepthStencilSurfaceEx( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_ResetEx( struct NineDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode ) @@ -2254,7 +2254,7 @@ LockDevice9Ex_ResetEx( struct NineDevice9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This, UINT iSwapChain, D3DDISPLAYMODEEX *pMode, @@ -2404,7 +2404,7 @@ IDirect3DDevice9ExVtbl LockDevice9Ex_vtable = { (void *)LockDevice9Ex_GetDisplayModeEx }; -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Video_GetContentProtectionCaps( struct NineDevice9Video *This, const GUID *pCryptoType, const GUID *pDecodeProfile, @@ -2417,7 +2417,7 @@ LockDevice9Video_GetContentProtectionCaps( struct NineDevice9Video *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Video_CreateAuthenticatedChannel( struct NineDevice9Video *This, D3DAUTHENTICATEDCHANNELTYPE ChannelType, IDirect3DAuthenticatedChannel9 **ppAuthenticatedChannel, @@ -2430,7 +2430,7 @@ LockDevice9Video_CreateAuthenticatedChannel( struct NineDevice9Video *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockDevice9Video_CreateCryptoSession( struct NineDevice9Video *This, const GUID *pCryptoType, const GUID *pDecodeProfile, @@ -2453,7 +2453,7 @@ IDirect3DDevice9VideoVtbl LockDevice9Video_vtable = { (void *)LockDevice9Video_CreateCryptoSession }; -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockIndexBuffer9_Lock( struct NineIndexBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, @@ -2467,7 +2467,7 @@ LockIndexBuffer9_Lock( struct NineIndexBuffer9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockIndexBuffer9_Unlock( struct NineIndexBuffer9 *This ) { HRESULT r; @@ -2478,7 +2478,7 @@ LockIndexBuffer9_Unlock( struct NineIndexBuffer9 *This ) } #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockIndexBuffer9_GetDesc( struct NineIndexBuffer9 *This, D3DINDEXBUFFER_DESC *pDesc ) { @@ -2508,7 +2508,7 @@ IDirect3DIndexBuffer9Vtbl LockIndexBuffer9_vtable = { }; #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockPixelShader9_GetDevice( struct NinePixelShader9 *This, IDirect3DDevice9 **ppDevice ) { @@ -2520,7 +2520,7 @@ LockPixelShader9_GetDevice( struct NinePixelShader9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockPixelShader9_GetFunction( struct NinePixelShader9 *This, void *pData, UINT *pSizeOfData ) @@ -2541,7 +2541,7 @@ IDirect3DPixelShader9Vtbl LockPixelShader9_vtable = { }; #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockQuery9_GetDevice( struct NineQuery9 *This, IDirect3DDevice9 **ppDevice ) { @@ -2554,7 +2554,7 @@ LockQuery9_GetDevice( struct NineQuery9 *This, #endif #if 0 -static D3DQUERYTYPE WINAPI +static D3DQUERYTYPE NINE_WINAPI LockQuery9_GetType( struct NineQuery9 *This ) { D3DQUERYTYPE r; @@ -2566,7 +2566,7 @@ LockQuery9_GetType( struct NineQuery9 *This ) #endif #if 0 -static DWORD WINAPI +static DWORD NINE_WINAPI LockQuery9_GetDataSize( struct NineQuery9 *This ) { DWORD r; @@ -2577,7 +2577,7 @@ LockQuery9_GetDataSize( struct NineQuery9 *This ) } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockQuery9_Issue( struct NineQuery9 *This, DWORD dwIssueFlags ) { @@ -2588,7 +2588,7 @@ LockQuery9_Issue( struct NineQuery9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockQuery9_GetData( struct NineQuery9 *This, void *pData, DWORD dwSize, @@ -2613,7 +2613,7 @@ IDirect3DQuery9Vtbl LockQuery9_vtable = { }; #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockStateBlock9_GetDevice( struct NineStateBlock9 *This, IDirect3DDevice9 **ppDevice ) { @@ -2625,7 +2625,7 @@ LockStateBlock9_GetDevice( struct NineStateBlock9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockStateBlock9_Capture( struct NineStateBlock9 *This ) { HRESULT r; @@ -2635,7 +2635,7 @@ LockStateBlock9_Capture( struct NineStateBlock9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockStateBlock9_Apply( struct NineStateBlock9 *This ) { HRESULT r; @@ -2654,7 +2654,7 @@ IDirect3DStateBlock9Vtbl LockStateBlock9_vtable = { (void *)LockStateBlock9_Apply }; -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSurface9_GetContainer( struct NineSurface9 *This, REFIID riid, void **ppContainer ) @@ -2667,7 +2667,7 @@ LockSurface9_GetContainer( struct NineSurface9 *This, } #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSurface9_GetDesc( struct NineSurface9 *This, D3DSURFACE_DESC *pDesc ) { @@ -2679,7 +2679,7 @@ LockSurface9_GetDesc( struct NineSurface9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSurface9_LockRect( struct NineSurface9 *This, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, @@ -2692,7 +2692,7 @@ LockSurface9_LockRect( struct NineSurface9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSurface9_UnlockRect( struct NineSurface9 *This ) { HRESULT r; @@ -2702,7 +2702,7 @@ LockSurface9_UnlockRect( struct NineSurface9 *This ) return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSurface9_GetDC( struct NineSurface9 *This, HDC *phdc ) { @@ -2713,7 +2713,7 @@ LockSurface9_GetDC( struct NineSurface9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSurface9_ReleaseDC( struct NineSurface9 *This, HDC hdc ) { @@ -2744,7 +2744,7 @@ IDirect3DSurface9Vtbl LockSurface9_vtable = { (void *)LockSurface9_ReleaseDC }; -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSwapChain9_Present( struct NineSwapChain9 *This, const RECT *pSourceRect, const RECT *pDestRect, @@ -2759,7 +2759,7 @@ LockSwapChain9_Present( struct NineSwapChain9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, IDirect3DSurface9 *pDestSurface ) { @@ -2770,7 +2770,7 @@ LockSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSwapChain9_GetBackBuffer( struct NineSwapChain9 *This, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, @@ -2783,7 +2783,7 @@ LockSwapChain9_GetBackBuffer( struct NineSwapChain9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSwapChain9_GetRasterStatus( struct NineSwapChain9 *This, D3DRASTER_STATUS *pRasterStatus ) { @@ -2794,7 +2794,7 @@ LockSwapChain9_GetRasterStatus( struct NineSwapChain9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSwapChain9_GetDisplayMode( struct NineSwapChain9 *This, D3DDISPLAYMODE *pMode ) { @@ -2806,7 +2806,7 @@ LockSwapChain9_GetDisplayMode( struct NineSwapChain9 *This, } #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSwapChain9_GetDevice( struct NineSwapChain9 *This, IDirect3DDevice9 **ppDevice ) { @@ -2818,7 +2818,7 @@ LockSwapChain9_GetDevice( struct NineSwapChain9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSwapChain9_GetPresentParameters( struct NineSwapChain9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters ) { @@ -2842,7 +2842,7 @@ IDirect3DSwapChain9Vtbl LockSwapChain9_vtable = { (void *)LockSwapChain9_GetPresentParameters }; -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSwapChain9Ex_GetLastPresentCount( struct NineSwapChain9Ex *This, UINT *pLastPresentCount ) { @@ -2853,7 +2853,7 @@ LockSwapChain9Ex_GetLastPresentCount( struct NineSwapChain9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSwapChain9Ex_GetPresentStats( struct NineSwapChain9Ex *This, D3DPRESENTSTATS *pPresentationStatistics ) { @@ -2864,7 +2864,7 @@ LockSwapChain9Ex_GetPresentStats( struct NineSwapChain9Ex *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockSwapChain9Ex_GetDisplayModeEx( struct NineSwapChain9Ex *This, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation ) @@ -2893,7 +2893,7 @@ IDirect3DSwapChain9ExVtbl LockSwapChain9Ex_vtable = { }; #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockTexture9_GetLevelDesc( struct NineTexture9 *This, UINT Level, D3DSURFACE_DESC *pDesc ) @@ -2907,7 +2907,7 @@ LockTexture9_GetLevelDesc( struct NineTexture9 *This, #endif #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockTexture9_GetSurfaceLevel( struct NineTexture9 *This, UINT Level, IDirect3DSurface9 **ppSurfaceLevel ) @@ -2920,7 +2920,7 @@ LockTexture9_GetSurfaceLevel( struct NineTexture9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockTexture9_LockRect( struct NineTexture9 *This, UINT Level, D3DLOCKED_RECT *pLockedRect, @@ -2934,7 +2934,7 @@ LockTexture9_LockRect( struct NineTexture9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockTexture9_UnlockRect( struct NineTexture9 *This, UINT Level ) { @@ -2945,7 +2945,7 @@ LockTexture9_UnlockRect( struct NineTexture9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockTexture9_AddDirtyRect( struct NineTexture9 *This, const RECT *pDirtyRect ) { @@ -2981,7 +2981,7 @@ IDirect3DTexture9Vtbl LockTexture9_vtable = { (void *)LockTexture9_AddDirtyRect }; -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVertexBuffer9_Lock( struct NineVertexBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, @@ -2995,7 +2995,7 @@ LockVertexBuffer9_Lock( struct NineVertexBuffer9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVertexBuffer9_Unlock( struct NineVertexBuffer9 *This ) { HRESULT r; @@ -3006,7 +3006,7 @@ LockVertexBuffer9_Unlock( struct NineVertexBuffer9 *This ) } #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVertexBuffer9_GetDesc( struct NineVertexBuffer9 *This, D3DVERTEXBUFFER_DESC *pDesc ) { @@ -3036,7 +3036,7 @@ IDirect3DVertexBuffer9Vtbl LockVertexBuffer9_vtable = { }; #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVertexDeclaration9_GetDevice( struct NineVertexDeclaration9 *This, IDirect3DDevice9 **ppDevice ) { @@ -3048,7 +3048,7 @@ LockVertexDeclaration9_GetDevice( struct NineVertexDeclaration9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVertexDeclaration9_GetDeclaration( struct NineVertexDeclaration9 *This, D3DVERTEXELEMENT9 *pElement, UINT *pNumElements ) @@ -3069,7 +3069,7 @@ IDirect3DVertexDeclaration9Vtbl LockVertexDeclaration9_vtable = { }; #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVertexShader9_GetDevice( struct NineVertexShader9 *This, IDirect3DDevice9 **ppDevice ) { @@ -3081,7 +3081,7 @@ LockVertexShader9_GetDevice( struct NineVertexShader9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVertexShader9_GetFunction( struct NineVertexShader9 *This, void *pData, UINT *pSizeOfData ) @@ -3102,7 +3102,7 @@ IDirect3DVertexShader9Vtbl LockVertexShader9_vtable = { }; #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolume9_GetDevice( struct NineVolume9 *This, IDirect3DDevice9 **ppDevice ) { @@ -3114,7 +3114,7 @@ LockVolume9_GetDevice( struct NineVolume9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolume9_SetPrivateData( struct NineVolume9 *This, REFGUID refguid, const void *pData, @@ -3128,7 +3128,7 @@ LockVolume9_SetPrivateData( struct NineVolume9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolume9_GetPrivateData( struct NineVolume9 *This, REFGUID refguid, void *pData, @@ -3141,7 +3141,7 @@ LockVolume9_GetPrivateData( struct NineVolume9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolume9_FreePrivateData( struct NineVolume9 *This, REFGUID refguid ) { @@ -3152,7 +3152,7 @@ LockVolume9_FreePrivateData( struct NineVolume9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolume9_GetContainer( struct NineVolume9 *This, REFIID riid, void **ppContainer ) @@ -3165,7 +3165,7 @@ LockVolume9_GetContainer( struct NineVolume9 *This, } #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolume9_GetDesc( struct NineVolume9 *This, D3DVOLUME_DESC *pDesc ) { @@ -3177,7 +3177,7 @@ LockVolume9_GetDesc( struct NineVolume9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolume9_LockBox( struct NineVolume9 *This, D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, @@ -3190,7 +3190,7 @@ LockVolume9_LockBox( struct NineVolume9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolume9_UnlockBox( struct NineVolume9 *This ) { HRESULT r; @@ -3215,7 +3215,7 @@ IDirect3DVolume9Vtbl LockVolume9_vtable = { }; #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This, UINT Level, D3DVOLUME_DESC *pDesc ) @@ -3229,7 +3229,7 @@ LockVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This, #endif #if 0 -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This, UINT Level, IDirect3DVolume9 **ppVolumeLevel ) @@ -3242,7 +3242,7 @@ LockVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This, } #endif -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolumeTexture9_LockBox( struct NineVolumeTexture9 *This, UINT Level, D3DLOCKED_BOX *pLockedVolume, @@ -3256,7 +3256,7 @@ LockVolumeTexture9_LockBox( struct NineVolumeTexture9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolumeTexture9_UnlockBox( struct NineVolumeTexture9 *This, UINT Level ) { @@ -3267,7 +3267,7 @@ LockVolumeTexture9_UnlockBox( struct NineVolumeTexture9 *This, return r; } -static HRESULT WINAPI +static HRESULT NINE_WINAPI LockVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This, const D3DBOX *pDirtyBox ) { diff --git a/src/gallium/state_trackers/nine/nine_pipe.h b/src/gallium/state_trackers/nine/nine_pipe.h index 86117866ed5..1ffce7dc1d7 100644 --- a/src/gallium/state_trackers/nine/nine_pipe.h +++ b/src/gallium/state_trackers/nine/nine_pipe.h @@ -324,7 +324,8 @@ d3d9_to_pipe_format_checked(struct pipe_screen *screen, enum pipe_texture_target target, unsigned sample_count, unsigned bindings, - boolean srgb) + boolean srgb, + boolean bypass_check) { enum pipe_format result; @@ -335,7 +336,10 @@ d3d9_to_pipe_format_checked(struct pipe_screen *screen, if (srgb) result = util_format_srgb(result); - if (format_check_internal(result)) + /* bypass_check: Used for D3DPOOL_SCRATCH, which + * isn't limited to the formats supported by the + * device. */ + if (bypass_check || format_check_internal(result)) return result; /* fallback to another format for formats diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 6f94e378984..f8af969f283 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -23,6 +23,7 @@ #include "device9.h" #include "basetexture9.h" +#include "buffer9.h" #include "indexbuffer9.h" #include "surface9.h" #include "vertexdeclaration9.h" @@ -935,6 +936,16 @@ validate_textures(struct NineDevice9 *device) } } +static void +update_managed_buffers(struct NineDevice9 *device) +{ + struct NineBuffer9 *buf, *ptr; + LIST_FOR_EACH_ENTRY_SAFE(buf, ptr, &device->update_buffers, managed.list) { + list_delinit(&buf->managed.list); + NineBuffer9_Upload(buf); + } +} + void nine_update_state_framebuffer_clear(struct NineDevice9 *device) { @@ -962,6 +973,7 @@ nine_update_state(struct NineDevice9 *device) * may be dirty anyway, even if no texture bindings changed. */ validate_textures(device); /* may clobber state */ + update_managed_buffers(device); /* ff_update may change VS/PS dirty bits */ if (unlikely(!state->programmable_vs || !state->ps)) diff --git a/src/gallium/state_trackers/nine/nineexoverlayextension.c b/src/gallium/state_trackers/nine/nineexoverlayextension.c index 2253f8d9789..6a4f287e9a3 100644 --- a/src/gallium/state_trackers/nine/nineexoverlayextension.c +++ b/src/gallium/state_trackers/nine/nineexoverlayextension.c @@ -24,7 +24,7 @@ #define DBG_CHANNEL DBG_OVERLAYEXTENSION -HRESULT WINAPI +HRESULT NINE_WINAPI Nine9ExOverlayExtension_CheckDeviceOverlayType( struct Nine9ExOverlayExtension *This, UINT Adapter, D3DDEVTYPE DevType, diff --git a/src/gallium/state_trackers/nine/nineexoverlayextension.h b/src/gallium/state_trackers/nine/nineexoverlayextension.h index 1616ed0532c..f528f1d3ca9 100644 --- a/src/gallium/state_trackers/nine/nineexoverlayextension.h +++ b/src/gallium/state_trackers/nine/nineexoverlayextension.h @@ -35,7 +35,7 @@ Nine9ExOverlayExtension( void *data ) return (struct Nine9ExOverlayExtension *)data; } -HRESULT WINAPI +HRESULT NINE_WINAPI Nine9ExOverlayExtension_CheckDeviceOverlayType( struct Nine9ExOverlayExtension *This, UINT Adapter, D3DDEVTYPE DevType, diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c index 00be67f8955..541919c130c 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.c +++ b/src/gallium/state_trackers/nine/pixelshader9.c @@ -116,7 +116,7 @@ NinePixelShader9_dtor( struct NinePixelShader9 *This ) NineUnknown_dtor(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NinePixelShader9_GetFunction( struct NinePixelShader9 *This, void *pData, UINT *pSizeOfData ) diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h index 6b431813a81..1fef1c47dd1 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.h +++ b/src/gallium/state_trackers/nine/pixelshader9.h @@ -126,7 +126,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *, void NinePixelShader9_dtor( struct NinePixelShader9 * ); -HRESULT WINAPI +HRESULT NINE_WINAPI NinePixelShader9_GetFunction( struct NinePixelShader9 *This, void *pData, UINT *pSizeOfData ); diff --git a/src/gallium/state_trackers/nine/query9.c b/src/gallium/state_trackers/nine/query9.c index 3afa9007f61..39ac831ba77 100644 --- a/src/gallium/state_trackers/nine/query9.c +++ b/src/gallium/state_trackers/nine/query9.c @@ -143,19 +143,19 @@ NineQuery9_dtor( struct NineQuery9 *This ) NineUnknown_dtor(&This->base); } -D3DQUERYTYPE WINAPI +D3DQUERYTYPE NINE_WINAPI NineQuery9_GetType( struct NineQuery9 *This ) { return This->type; } -DWORD WINAPI +DWORD NINE_WINAPI NineQuery9_GetDataSize( struct NineQuery9 *This ) { return This->result_size; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineQuery9_Issue( struct NineQuery9 *This, DWORD dwIssueFlags ) { @@ -197,7 +197,7 @@ union nine_query_result UINT64 u64; }; -HRESULT WINAPI +HRESULT NINE_WINAPI NineQuery9_GetData( struct NineQuery9 *This, void *pData, DWORD dwSize, diff --git a/src/gallium/state_trackers/nine/query9.h b/src/gallium/state_trackers/nine/query9.h index 9cc1e317055..6397a4c0f6c 100644 --- a/src/gallium/state_trackers/nine/query9.h +++ b/src/gallium/state_trackers/nine/query9.h @@ -63,17 +63,17 @@ NineQuery9_ctor( struct NineQuery9 *, void NineQuery9_dtor( struct NineQuery9 * ); -D3DQUERYTYPE WINAPI +D3DQUERYTYPE NINE_WINAPI NineQuery9_GetType( struct NineQuery9 *This ); -DWORD WINAPI +DWORD NINE_WINAPI NineQuery9_GetDataSize( struct NineQuery9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineQuery9_Issue( struct NineQuery9 *This, DWORD dwIssueFlags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineQuery9_GetData( struct NineQuery9 *This, void *pData, DWORD dwSize, diff --git a/src/gallium/state_trackers/nine/resource9.c b/src/gallium/state_trackers/nine/resource9.c index b929c50a83c..d01c2225520 100644 --- a/src/gallium/state_trackers/nine/resource9.c +++ b/src/gallium/state_trackers/nine/resource9.c @@ -138,7 +138,7 @@ NineResource9_GetPool( struct NineResource9 *This ) return This->pool; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineResource9_SetPrivateData( struct NineResource9 *This, REFGUID refguid, const void *pData, @@ -187,7 +187,7 @@ NineResource9_SetPrivateData( struct NineResource9 *This, return D3DERR_DRIVERINTERNALERROR; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineResource9_GetPrivateData( struct NineResource9 *This, REFGUID refguid, void *pData, @@ -220,7 +220,7 @@ NineResource9_GetPrivateData( struct NineResource9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineResource9_FreePrivateData( struct NineResource9 *This, REFGUID refguid ) { @@ -239,7 +239,7 @@ NineResource9_FreePrivateData( struct NineResource9 *This, return D3D_OK; } -DWORD WINAPI +DWORD NINE_WINAPI NineResource9_SetPriority( struct NineResource9 *This, DWORD PriorityNew ) { @@ -254,7 +254,7 @@ NineResource9_SetPriority( struct NineResource9 *This, return prev; } -DWORD WINAPI +DWORD NINE_WINAPI NineResource9_GetPriority( struct NineResource9 *This ) { if (This->pool != D3DPOOL_MANAGED || This->type == D3DRTYPE_SURFACE) @@ -264,7 +264,7 @@ NineResource9_GetPriority( struct NineResource9 *This ) } /* NOTE: Don't forget to adjust locked vtable if you change this ! */ -void WINAPI +void NINE_WINAPI NineResource9_PreLoad( struct NineResource9 *This ) { if (This->pool != D3DPOOL_MANAGED) @@ -275,7 +275,7 @@ NineResource9_PreLoad( struct NineResource9 *This ) */ } -D3DRESOURCETYPE WINAPI +D3DRESOURCETYPE NINE_WINAPI NineResource9_GetType( struct NineResource9 *This ) { return This->type; diff --git a/src/gallium/state_trackers/nine/resource9.h b/src/gallium/state_trackers/nine/resource9.h index 8122257b7a7..24a241c00cd 100644 --- a/src/gallium/state_trackers/nine/resource9.h +++ b/src/gallium/state_trackers/nine/resource9.h @@ -76,34 +76,34 @@ NineResource9_GetPool( struct NineResource9 *This ); /*** Direct3D public methods ***/ -HRESULT WINAPI +HRESULT NINE_WINAPI NineResource9_SetPrivateData( struct NineResource9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineResource9_GetPrivateData( struct NineResource9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineResource9_FreePrivateData( struct NineResource9 *This, REFGUID refguid ); -DWORD WINAPI +DWORD NINE_WINAPI NineResource9_SetPriority( struct NineResource9 *This, DWORD PriorityNew ); -DWORD WINAPI +DWORD NINE_WINAPI NineResource9_GetPriority( struct NineResource9 *This ); -void WINAPI +void NINE_WINAPI NineResource9_PreLoad( struct NineResource9 *This ); -D3DRESOURCETYPE WINAPI +D3DRESOURCETYPE NINE_WINAPI NineResource9_GetType( struct NineResource9 *This ); #endif /* _NINE_RESOURCE9_H_ */ diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c index 0d1a04b657a..cc368b7f725 100644 --- a/src/gallium/state_trackers/nine/stateblock9.c +++ b/src/gallium/state_trackers/nine/stateblock9.c @@ -86,7 +86,7 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This ) */ static void nine_state_copy_common(struct nine_state *dst, - const struct nine_state *src, + struct nine_state *src, struct nine_state *mask, /* aliases either src or dst */ const boolean apply, struct nine_range_pool *pool) @@ -267,17 +267,41 @@ nine_state_copy_common(struct nine_state *dst, } } if (mask->changed.group & NINE_STATE_FF_LIGHTING) { - if (dst->ff.num_lights < mask->ff.num_lights) { + unsigned num_lights = MAX2(dst->ff.num_lights, src->ff.num_lights); + /* Can happen in Capture() if device state has created new lights after + * the stateblock was created. + * Can happen in Apply() if the stateblock had recorded the creation of + * new lights. */ + if (dst->ff.num_lights < num_lights) { dst->ff.light = REALLOC(dst->ff.light, dst->ff.num_lights * sizeof(D3DLIGHT9), - mask->ff.num_lights * sizeof(D3DLIGHT9)); - for (i = dst->ff.num_lights; i < mask->ff.num_lights; ++i) { - memset(&dst->ff.light[i], 0, sizeof(D3DLIGHT9)); - dst->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID; + num_lights * sizeof(D3DLIGHT9)); + memset(&dst->ff.light[dst->ff.num_lights], 0, (num_lights - dst->ff.num_lights) * sizeof(D3DLIGHT9)); + /* if mask == dst, a Type of 0 will trigger + * "dst->ff.light[i] = src->ff.light[i];" later, + * which is what we want in that case. */ + if (mask != dst) { + for (i = src->ff.num_lights; i < num_lights; ++i) + src->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID; } - dst->ff.num_lights = mask->ff.num_lights; + dst->ff.num_lights = num_lights; } - for (i = 0; i < mask->ff.num_lights; ++i) + /* Can happen in Capture() if the stateblock had recorded the creation of + * new lights. + * Can happen in Apply() if device state has created new lights after + * the stateblock was created. */ + if (src->ff.num_lights < num_lights) { + src->ff.light = REALLOC(src->ff.light, + src->ff.num_lights * sizeof(D3DLIGHT9), + num_lights * sizeof(D3DLIGHT9)); + memset(&src->ff.light[src->ff.num_lights], 0, (num_lights - src->ff.num_lights) * sizeof(D3DLIGHT9)); + for (i = src->ff.num_lights; i < num_lights; ++i) + src->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID; + src->ff.num_lights = num_lights; + } + /* Note: mask is either src or dst, so at this point src, dst and mask + * have num_lights lights. */ + for (i = 0; i < num_lights; ++i) if (mask->ff.light[i].Type != NINED3DLIGHT_INVALID) dst->ff.light[i] = src->ff.light[i]; @@ -446,7 +470,7 @@ nine_state_copy_common_all(struct nine_state *dst, /* Capture those bits of current device state that have been changed between * BeginStateBlock and EndStateBlock. */ -HRESULT WINAPI +HRESULT NINE_WINAPI NineStateBlock9_Capture( struct NineStateBlock9 *This ) { struct nine_state *dst = &This->state; @@ -476,7 +500,7 @@ NineStateBlock9_Capture( struct NineStateBlock9 *This ) } /* Set state managed by this StateBlock as current device state. */ -HRESULT WINAPI +HRESULT NINE_WINAPI NineStateBlock9_Apply( struct NineStateBlock9 *This ) { struct nine_state *dst = &This->base.device->state; diff --git a/src/gallium/state_trackers/nine/stateblock9.h b/src/gallium/state_trackers/nine/stateblock9.h index a580c6a2302..ffcc2d99a53 100644 --- a/src/gallium/state_trackers/nine/stateblock9.h +++ b/src/gallium/state_trackers/nine/stateblock9.h @@ -62,10 +62,10 @@ NineStateBlock9_ctor( struct NineStateBlock9 *, void NineStateBlock9_dtor( struct NineStateBlock9 * ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineStateBlock9_Capture( struct NineStateBlock9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineStateBlock9_Apply( struct NineStateBlock9 *This ); #endif /* _NINE_STATEBLOCK9_H_ */ diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index f88b75c3dd7..4c4234bfe27 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -97,7 +97,8 @@ NineSurface9_ctor( struct NineSurface9 *This, This->base.info.target, This->base.info.nr_samples, This->base.info.bind, - FALSE); + FALSE, + pDesc->Pool == D3DPOOL_SCRATCH); if (pDesc->Usage & D3DUSAGE_RENDERTARGET) This->base.info.bind |= PIPE_BIND_RENDER_TARGET; @@ -116,13 +117,10 @@ NineSurface9_ctor( struct NineSurface9 *This, return E_OUTOFMEMORY; } - if (pDesc->Pool == D3DPOOL_SYSTEMMEM) { - This->base.info.usage = PIPE_USAGE_STAGING; - assert(!pResource); - } else { - if (pResource && (pDesc->Usage & D3DUSAGE_DYNAMIC)) - pResource->flags |= NINE_RESOURCE_FLAG_LOCKABLE; - } + assert(pDesc->Pool != D3DPOOL_SYSTEMMEM || !pResource); + + if (pResource && (pDesc->Usage & D3DUSAGE_DYNAMIC)) + pResource->flags |= NINE_RESOURCE_FLAG_LOCKABLE; hr = NineResource9_ctor(&This->base, pParams, pResource, FALSE, D3DRTYPE_SURFACE, pDesc->Pool, pDesc->Usage); @@ -227,7 +225,7 @@ NineSurface9_Dump( struct NineSurface9 *This ) } #endif /* DEBUG */ -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_GetContainer( struct NineSurface9 *This, REFIID riid, void **ppContainer ) @@ -260,7 +258,7 @@ NineSurface9_MarkContainerDirty( struct NineSurface9 *This ) } } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_GetDesc( struct NineSurface9 *This, D3DSURFACE_DESC *pDesc ) { @@ -315,7 +313,7 @@ NineSurface9_GetSystemMemPointer(struct NineSurface9 *This, int x, int y) return This->data + (y * This->stride + x_offset); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_LockRect( struct NineSurface9 *This, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, @@ -426,7 +424,7 @@ NineSurface9_LockRect( struct NineSurface9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_UnlockRect( struct NineSurface9 *This ) { DBG("This=%p lock_count=%u\n", This, This->lock_count); @@ -439,14 +437,14 @@ NineSurface9_UnlockRect( struct NineSurface9 *This ) return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_GetDC( struct NineSurface9 *This, HDC *phdc ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_ReleaseDC( struct NineSurface9 *This, HDC hdc ) { diff --git a/src/gallium/state_trackers/nine/surface9.h b/src/gallium/state_trackers/nine/surface9.h index 76156ae699c..7e8f2d35267 100644 --- a/src/gallium/state_trackers/nine/surface9.h +++ b/src/gallium/state_trackers/nine/surface9.h @@ -151,29 +151,29 @@ NineSurface9_Dump( struct NineSurface9 *This ) { } /*** Direct3D public ***/ -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_GetContainer( struct NineSurface9 *This, REFIID riid, void **ppContainer ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_GetDesc( struct NineSurface9 *This, D3DSURFACE_DESC *pDesc ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_LockRect( struct NineSurface9 *This, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_UnlockRect( struct NineSurface9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_GetDC( struct NineSurface9 *This, HDC *phdc ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSurface9_ReleaseDC( struct NineSurface9 *This, HDC hdc ); diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c index 82d4173fbb2..1ab339c459c 100644 --- a/src/gallium/state_trackers/nine/swapchain9.c +++ b/src/gallium/state_trackers/nine/swapchain9.c @@ -202,7 +202,7 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This, pf = d3d9_to_pipe_format_checked(This->screen, pParams->BackBufferFormat, PIPE_TEXTURE_2D, pParams->MultiSampleType, - PIPE_BIND_RENDER_TARGET, FALSE); + PIPE_BIND_RENDER_TARGET, FALSE, FALSE); if (This->actx->linear_framebuffer || (pf != PIPE_FORMAT_B8G8R8X8_UNORM && @@ -304,7 +304,7 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This, pParams->BackBufferFormat, PIPE_TEXTURE_2D, tmplt.nr_samples, - tmplt.bind, FALSE); + tmplt.bind, FALSE, FALSE); if (tmplt.format == PIPE_FORMAT_NONE) return D3DERR_INVALIDCALL; resource = This->screen->resource_create(This->screen, &tmplt); @@ -360,7 +360,7 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This, PIPE_TEXTURE_2D, tmplt.nr_samples, tmplt.bind, - FALSE); + FALSE, FALSE); if (tmplt.format == PIPE_FORMAT_NONE) { tmplt.bind &= ~PIPE_BIND_SAMPLER_VIEW; tmplt.format = d3d9_to_pipe_format_checked(This->screen, @@ -368,7 +368,7 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This, PIPE_TEXTURE_2D, tmplt.nr_samples, tmplt.bind, - FALSE); + FALSE, FALSE); } if (tmplt.format == PIPE_FORMAT_NONE) @@ -764,7 +764,7 @@ bypass_rendering: return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_Present( struct NineSwapChain9 *This, const RECT *pSourceRect, const RECT *pDestRect, @@ -858,7 +858,7 @@ NineSwapChain9_Present( struct NineSwapChain9 *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, IDirect3DSurface9 *pDestSurface ) { @@ -911,7 +911,7 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_GetBackBuffer( struct NineSwapChain9 *This, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, @@ -929,7 +929,7 @@ NineSwapChain9_GetBackBuffer( struct NineSwapChain9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_GetRasterStatus( struct NineSwapChain9 *This, D3DRASTER_STATUS *pRasterStatus ) { @@ -939,7 +939,7 @@ NineSwapChain9_GetRasterStatus( struct NineSwapChain9 *This, return ID3DPresent_GetRasterStatus(This->present, pRasterStatus); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_GetDisplayMode( struct NineSwapChain9 *This, D3DDISPLAYMODE *pMode ) { @@ -961,7 +961,7 @@ NineSwapChain9_GetDisplayMode( struct NineSwapChain9 *This, return hr; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_GetPresentParameters( struct NineSwapChain9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters ) { diff --git a/src/gallium/state_trackers/nine/swapchain9.h b/src/gallium/state_trackers/nine/swapchain9.h index 4bd74f7b6ec..7714cd52cdd 100644 --- a/src/gallium/state_trackers/nine/swapchain9.h +++ b/src/gallium/state_trackers/nine/swapchain9.h @@ -109,7 +109,7 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This, D3DPRESENT_PARAMETERS *pParams, D3DDISPLAYMODEEX *mode ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_Present( struct NineSwapChain9 *This, const RECT *pSourceRect, const RECT *pDestRect, @@ -117,25 +117,25 @@ NineSwapChain9_Present( struct NineSwapChain9 *This, const RGNDATA *pDirtyRegion, DWORD dwFlags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, IDirect3DSurface9 *pDestSurface ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_GetBackBuffer( struct NineSwapChain9 *This, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_GetRasterStatus( struct NineSwapChain9 *This, D3DRASTER_STATUS *pRasterStatus ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_GetDisplayMode( struct NineSwapChain9 *This, D3DDISPLAYMODE *pMode ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9_GetPresentParameters( struct NineSwapChain9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters ); diff --git a/src/gallium/state_trackers/nine/swapchain9ex.c b/src/gallium/state_trackers/nine/swapchain9ex.c index bcf627f4c22..309402cbb7b 100644 --- a/src/gallium/state_trackers/nine/swapchain9ex.c +++ b/src/gallium/state_trackers/nine/swapchain9ex.c @@ -51,21 +51,21 @@ NineSwapChain9Ex_dtor( struct NineSwapChain9Ex *This ) NineSwapChain9_dtor(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9Ex_GetLastPresentCount( struct NineSwapChain9Ex *This, UINT *pLastPresentCount ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9Ex_GetPresentStats( struct NineSwapChain9Ex *This, D3DPRESENTSTATS *pPresentationStatistics ) { STUB(D3DERR_INVALIDCALL); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9Ex_GetDisplayModeEx( struct NineSwapChain9Ex *This, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation ) diff --git a/src/gallium/state_trackers/nine/swapchain9ex.h b/src/gallium/state_trackers/nine/swapchain9ex.h index 075f8835222..a15d9d2c01e 100644 --- a/src/gallium/state_trackers/nine/swapchain9ex.h +++ b/src/gallium/state_trackers/nine/swapchain9ex.h @@ -45,15 +45,15 @@ NineSwapChain9Ex_new( struct NineDevice9 *pDevice, D3DDISPLAYMODEEX *mode, struct NineSwapChain9Ex **ppOut ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9Ex_GetLastPresentCount( struct NineSwapChain9Ex *This, UINT *pLastPresentCount ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9Ex_GetPresentStats( struct NineSwapChain9Ex *This, D3DPRESENTSTATS *pPresentationStatistics ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineSwapChain9Ex_GetDisplayModeEx( struct NineSwapChain9Ex *This, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation ); diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c index ada08cea90a..3b88e07e976 100644 --- a/src/gallium/state_trackers/nine/texture9.c +++ b/src/gallium/state_trackers/nine/texture9.c @@ -48,7 +48,6 @@ NineTexture9_ctor( struct NineTexture9 *This, { struct pipe_screen *screen = pParams->device->screen; struct pipe_resource *info = &This->base.base.info; - struct pipe_resource *resource; enum pipe_format pf; unsigned *level_offsets; unsigned l; @@ -61,10 +60,23 @@ NineTexture9_ctor( struct NineTexture9 *This, nine_D3DUSAGE_to_str(Usage), d3dformat_to_string(Format), nine_D3DPOOL_to_str(Pool), pSharedHandle); + user_assert(Width && Height, D3DERR_INVALIDCALL); + + /* pSharedHandle: can be non-null for ex only. + * D3DPOOL_SYSTEMMEM: Levels must be 1 + * D3DPOOL_DEFAULT: no restriction for Levels + * Other Pools are forbidden. */ + user_assert(!pSharedHandle || pParams->device->ex, D3DERR_INVALIDCALL); + user_assert(!pSharedHandle || + (Pool == D3DPOOL_SYSTEMMEM && Levels == 1) || + Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); + user_assert(!(Usage & D3DUSAGE_AUTOGENMIPMAP) || - (Pool != D3DPOOL_SYSTEMMEM && Levels <= 1), D3DERR_INVALIDCALL); + (Pool != D3DPOOL_SYSTEMMEM && Pool != D3DPOOL_SCRATCH && Levels <= 1), + D3DERR_INVALIDCALL); - /* TODO: implement buffer sharing (should work with cross process too) + /* TODO: implement pSharedHandle for D3DPOOL_DEFAULT (cross process + * buffer sharing). * * Gem names may have fit but they're depreciated and won't work on render-nodes. * One solution is to use shm buffers. We would use a /dev/shm file, fill the first @@ -77,9 +89,6 @@ NineTexture9_ctor( struct NineTexture9 *This, * invalid handle, that we would fail to import. Please note that we don't advertise * the flag indicating the support for that feature, but apps seem to not care. */ - user_assert(!pSharedHandle || - Pool == D3DPOOL_SYSTEMMEM || - Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); if (pSharedHandle && Pool == D3DPOOL_DEFAULT) { if (!*pSharedHandle) { @@ -97,7 +106,9 @@ NineTexture9_ctor( struct NineTexture9 *This, Levels = 0; pf = d3d9_to_pipe_format_checked(screen, Format, PIPE_TEXTURE_2D, 0, - PIPE_BIND_SAMPLER_VIEW, FALSE); + PIPE_BIND_SAMPLER_VIEW, FALSE, + Pool == D3DPOOL_SCRATCH); + if (Format != D3DFMT_NULL && pf == PIPE_FORMAT_NONE) return D3DERR_INVALIDCALL; @@ -140,12 +151,6 @@ NineTexture9_ctor( struct NineTexture9 *This, DBG("Application asked for Software Vertex Processing, " "but this is unimplemented\n"); - if (pSharedHandle) - info->bind |= PIPE_BIND_SHARED; - - if (Pool == D3DPOOL_SYSTEMMEM) - info->usage = PIPE_USAGE_STAGING; - if (pSharedHandle && *pSharedHandle) { /* Pool == D3DPOOL_SYSTEMMEM */ user_buffer = (void *)*pSharedHandle; level_offsets = alloca(sizeof(unsigned) * (info->last_level + 1)); @@ -188,11 +193,6 @@ NineTexture9_ctor( struct NineTexture9 *This, sfdesc.MultiSampleType = D3DMULTISAMPLE_NONE; sfdesc.MultiSampleQuality = 0; - if (Pool == D3DPOOL_SYSTEMMEM) - resource = NULL; - else - resource = This->base.base.resource; - for (l = 0; l <= info->last_level; ++l) { sfdesc.Width = u_minify(Width, l); sfdesc.Height = u_minify(Height, l); @@ -202,7 +202,7 @@ NineTexture9_ctor( struct NineTexture9 *This, level_offsets[l] : NULL; hr = NineSurface9_new(This->base.base.base.device, NineUnknown(This), - resource, user_buffer_for_level, + This->base.base.resource, user_buffer_for_level, D3DRTYPE_TEXTURE, l, 0, &sfdesc, &This->surfaces[l]); if (FAILED(hr)) @@ -240,7 +240,7 @@ NineTexture9_dtor( struct NineTexture9 *This ) NineBaseTexture9_dtor(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineTexture9_GetLevelDesc( struct NineTexture9 *This, UINT Level, D3DSURFACE_DESC *pDesc ) @@ -254,7 +254,7 @@ NineTexture9_GetLevelDesc( struct NineTexture9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineTexture9_GetSurfaceLevel( struct NineTexture9 *This, UINT Level, IDirect3DSurface9 **ppSurfaceLevel ) @@ -269,7 +269,7 @@ NineTexture9_GetSurfaceLevel( struct NineTexture9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineTexture9_LockRect( struct NineTexture9 *This, UINT Level, D3DLOCKED_RECT *pLockedRect, @@ -287,7 +287,7 @@ NineTexture9_LockRect( struct NineTexture9 *This, pRect, Flags); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineTexture9_UnlockRect( struct NineTexture9 *This, UINT Level ) { @@ -298,7 +298,7 @@ NineTexture9_UnlockRect( struct NineTexture9 *This, return NineSurface9_UnlockRect(This->surfaces[Level]); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineTexture9_AddDirtyRect( struct NineTexture9 *This, const RECT *pDirtyRect ) { diff --git a/src/gallium/state_trackers/nine/texture9.h b/src/gallium/state_trackers/nine/texture9.h index 6f80be9ccde..3911f26d468 100644 --- a/src/gallium/state_trackers/nine/texture9.h +++ b/src/gallium/state_trackers/nine/texture9.h @@ -48,28 +48,28 @@ NineTexture9_new( struct NineDevice9 *pDevice, struct NineTexture9 **ppOut, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineTexture9_GetLevelDesc( struct NineTexture9 *This, UINT Level, D3DSURFACE_DESC *pDesc ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineTexture9_GetSurfaceLevel( struct NineTexture9 *This, UINT Level, IDirect3DSurface9 **ppSurfaceLevel ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineTexture9_LockRect( struct NineTexture9 *This, UINT Level, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineTexture9_UnlockRect( struct NineTexture9 *This, UINT Level ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineTexture9_AddDirtyRect( struct NineTexture9 *This, const RECT *pDirtyRect ); diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.c b/src/gallium/state_trackers/nine/vertexbuffer9.c index 10311b428fe..7c00b2d17e2 100644 --- a/src/gallium/state_trackers/nine/vertexbuffer9.c +++ b/src/gallium/state_trackers/nine/vertexbuffer9.c @@ -68,7 +68,7 @@ NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This ) return NineBuffer9_GetResource(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, @@ -78,13 +78,13 @@ NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This, return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVertexBuffer9_Unlock( struct NineVertexBuffer9 *This ) { return NineBuffer9_Unlock(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVertexBuffer9_GetDesc( struct NineVertexBuffer9 *This, D3DVERTEXBUFFER_DESC *pDesc ) { diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.h b/src/gallium/state_trackers/nine/vertexbuffer9.h index 859402b925b..6caa9bfe326 100644 --- a/src/gallium/state_trackers/nine/vertexbuffer9.h +++ b/src/gallium/state_trackers/nine/vertexbuffer9.h @@ -62,17 +62,17 @@ NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This ); /*** Direct3D public ***/ -HRESULT WINAPI +HRESULT NINE_WINAPI NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVertexBuffer9_Unlock( struct NineVertexBuffer9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVertexBuffer9_GetDesc( struct NineVertexBuffer9 *This, D3DVERTEXBUFFER_DESC *pDesc ); diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.c b/src/gallium/state_trackers/nine/vertexdeclaration9.c index 36c594b5be3..e478c102b32 100644 --- a/src/gallium/state_trackers/nine/vertexdeclaration9.c +++ b/src/gallium/state_trackers/nine/vertexdeclaration9.c @@ -233,7 +233,7 @@ NineVertexDeclaration9_dtor( struct NineVertexDeclaration9 *This ) NineUnknown_dtor(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVertexDeclaration9_GetDeclaration( struct NineVertexDeclaration9 *This, D3DVERTEXELEMENT9 *pElement, UINT *pNumElements ) diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.h b/src/gallium/state_trackers/nine/vertexdeclaration9.h index e39f259440f..9d3b1bdca88 100644 --- a/src/gallium/state_trackers/nine/vertexdeclaration9.h +++ b/src/gallium/state_trackers/nine/vertexdeclaration9.h @@ -73,7 +73,7 @@ NineVertexDeclaration9_ctor( struct NineVertexDeclaration9 *This, void NineVertexDeclaration9_dtor( struct NineVertexDeclaration9 *This ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVertexDeclaration9_GetDeclaration( struct NineVertexDeclaration9 *This, D3DVERTEXELEMENT9 *pElement, UINT *pNumElements ); diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c index fdfb79a138e..7978ea8925b 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.c +++ b/src/gallium/state_trackers/nine/vertexshader9.c @@ -123,7 +123,7 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This ) NineUnknown_dtor(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVertexShader9_GetFunction( struct NineVertexShader9 *This, void *pData, UINT *pSizeOfData ) diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h index 15c3f4ff041..c89d4a1ee24 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.h +++ b/src/gallium/state_trackers/nine/vertexshader9.h @@ -107,7 +107,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *, void NineVertexShader9_dtor( struct NineVertexShader9 * ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVertexShader9_GetFunction( struct NineVertexShader9 *This, void *pData, UINT *pSizeOfData ); diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c index f6988923caa..7d54a7ac8cf 100644 --- a/src/gallium/state_trackers/nine/volume9.c +++ b/src/gallium/state_trackers/nine/volume9.c @@ -106,7 +106,8 @@ NineVolume9_ctor( struct NineVolume9 *This, pDesc->Format, This->info.target, This->info.nr_samples, - This->info.bind, FALSE); + This->info.bind, FALSE, + pDesc->Pool == D3DPOOL_SCRATCH); if (This->info.format == PIPE_FORMAT_NONE) return D3DERR_DRIVERINTERNALERROR; @@ -116,9 +117,6 @@ NineVolume9_ctor( struct NineVolume9 *This, This->layer_stride = util_format_get_2d_size(This->info.format, This->stride, pDesc->Height); - if (pDesc->Pool == D3DPOOL_SYSTEMMEM) - This->info.usage = PIPE_USAGE_STAGING; - if (!This->resource) { hr = NineVolume9_AllocateData(This); if (FAILED(hr)) @@ -143,7 +141,7 @@ NineVolume9_dtor( struct NineVolume9 *This ) NineUnknown_dtor(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_GetContainer( struct NineVolume9 *This, REFIID riid, void **ppContainer ) @@ -174,7 +172,7 @@ NineVolume9_MarkContainerDirty( struct NineVolume9 *This ) BASETEX_REGISTER_UPDATE(tex); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_GetDesc( struct NineVolume9 *This, D3DVOLUME_DESC *pDesc ) { @@ -214,7 +212,7 @@ NineVolume9_GetSystemMemPointer(struct NineVolume9 *This, int x, int y, int z) return This->data + (z * This->layer_stride + y * This->stride + x_offset); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_LockBox( struct NineVolume9 *This, D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, @@ -308,7 +306,7 @@ NineVolume9_LockBox( struct NineVolume9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_UnlockBox( struct NineVolume9 *This ) { DBG("This=%p lock_count=%u\n", This, This->lock_count); @@ -443,7 +441,7 @@ NineVolume9_new( struct NineDevice9 *pDevice, /*** The boring stuff. TODO: Unify with Resource. ***/ -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_SetPrivateData( struct NineVolume9 *This, REFGUID refguid, const void *pData, @@ -490,7 +488,7 @@ NineVolume9_SetPrivateData( struct NineVolume9 *This, return D3DERR_DRIVERINTERNALERROR; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_GetPrivateData( struct NineVolume9 *This, REFGUID refguid, void *pData, @@ -517,7 +515,7 @@ NineVolume9_GetPrivateData( struct NineVolume9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_FreePrivateData( struct NineVolume9 *This, REFGUID refguid ) { diff --git a/src/gallium/state_trackers/nine/volume9.h b/src/gallium/state_trackers/nine/volume9.h index 26ca8a32605..2eb19a5b9e8 100644 --- a/src/gallium/state_trackers/nine/volume9.h +++ b/src/gallium/state_trackers/nine/volume9.h @@ -96,39 +96,39 @@ NineVolume9_UploadSelf( struct NineVolume9 *This, /*** Direct3D public ***/ -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_SetPrivateData( struct NineVolume9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_GetPrivateData( struct NineVolume9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_FreePrivateData( struct NineVolume9 *This, REFGUID refguid ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_GetContainer( struct NineVolume9 *This, REFIID riid, void **ppContainer ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_GetDesc( struct NineVolume9 *This, D3DVOLUME_DESC *pDesc ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_LockBox( struct NineVolume9 *This, D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolume9_UnlockBox( struct NineVolume9 *This ); #endif /* _NINE_VOLUME9_H_ */ diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c index e5b2b53148d..e70f3c12337 100644 --- a/src/gallium/state_trackers/nine/volumetexture9.c +++ b/src/gallium/state_trackers/nine/volumetexture9.c @@ -48,19 +48,24 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This, This, pParams, Width, Height, Depth, Levels, Usage, Format, Pool, pSharedHandle); + user_assert(Width && Height && Depth, D3DERR_INVALIDCALL); + + /* user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); */ + user_assert(!pSharedHandle, D3DERR_INVALIDCALL); /* TODO */ + /* An IDirect3DVolume9 cannot be bound as a render target can it ? */ user_assert(!(Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL)), D3DERR_INVALIDCALL); user_assert(!(Usage & D3DUSAGE_AUTOGENMIPMAP), D3DERR_INVALIDCALL); - user_assert(!pSharedHandle, D3DERR_INVALIDCALL); /* TODO */ - pf = d3d9_to_pipe_format_checked(screen, Format, PIPE_TEXTURE_3D, 0, - PIPE_BIND_SAMPLER_VIEW, FALSE); + PIPE_BIND_SAMPLER_VIEW, FALSE, + Pool == D3DPOOL_SCRATCH); + if (pf == PIPE_FORMAT_NONE) return D3DERR_INVALIDCALL; - /* We support ATI1 and ATI2 hacks only for 2D textures */ + /* We support ATI1 and ATI2 hacks only for 2D and Cube textures */ if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2) return D3DERR_INVALIDCALL; @@ -145,7 +150,7 @@ NineVolumeTexture9_dtor( struct NineVolumeTexture9 *This ) NineBaseTexture9_dtor(&This->base); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This, UINT Level, D3DVOLUME_DESC *pDesc ) @@ -157,7 +162,7 @@ NineVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This, UINT Level, IDirect3DVolume9 **ppVolumeLevel ) @@ -170,7 +175,7 @@ NineVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolumeTexture9_LockBox( struct NineVolumeTexture9 *This, UINT Level, D3DLOCKED_BOX *pLockedVolume, @@ -186,7 +191,7 @@ NineVolumeTexture9_LockBox( struct NineVolumeTexture9 *This, Flags); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolumeTexture9_UnlockBox( struct NineVolumeTexture9 *This, UINT Level ) { @@ -197,7 +202,7 @@ NineVolumeTexture9_UnlockBox( struct NineVolumeTexture9 *This, return NineVolume9_UnlockBox(This->volumes[Level]); } -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This, const D3DBOX *pDirtyBox ) { diff --git a/src/gallium/state_trackers/nine/volumetexture9.h b/src/gallium/state_trackers/nine/volumetexture9.h index b8f250ad72e..dfe564cfc18 100644 --- a/src/gallium/state_trackers/nine/volumetexture9.h +++ b/src/gallium/state_trackers/nine/volumetexture9.h @@ -47,28 +47,28 @@ NineVolumeTexture9_new( struct NineDevice9 *pDevice, struct NineVolumeTexture9 **ppOut, HANDLE *pSharedHandle ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This, UINT Level, D3DVOLUME_DESC *pDesc ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This, UINT Level, IDirect3DVolume9 **ppVolumeLevel ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolumeTexture9_LockBox( struct NineVolumeTexture9 *This, UINT Level, D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, DWORD Flags ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolumeTexture9_UnlockBox( struct NineVolumeTexture9 *This, UINT Level ); -HRESULT WINAPI +HRESULT NINE_WINAPI NineVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This, const D3DBOX *pDirtyBox ); diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c index 5ce12abe227..288cf2ad629 100644 --- a/src/gallium/tests/trivial/compute.c +++ b/src/gallium/tests/trivial/compute.c @@ -424,8 +424,17 @@ static void launch_grid(struct context *ctx, const uint *block_layout, const void *input) { struct pipe_context *pipe = ctx->pipe; + struct pipe_grid_info info; + int i; + + for (i = 0; i < 3; i++) { + info.block[i] = block_layout[i]; + info.grid[i] = grid_layout[i]; + } + info.pc = pc; + info.input = input; - pipe->launch_grid(pipe, block_layout, grid_layout, pc, input); + pipe->launch_grid(pipe, &info); } static void test_default_init(void *p, int s, int x, int y) diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c index c44424f4f4a..598ffcbd069 100644 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c @@ -122,6 +122,7 @@ nouveau_drm_screen_create(int fd) case 0xf0: case 0x100: case 0x110: + case 0x120: init = nvc0_screen_create; break; default: diff --git a/src/mapi/glapi/gen/EXT_texture_integer.xml b/src/mapi/glapi/gen/EXT_texture_integer.xml index 5f9ea2963d3..eb6b220ea6b 100644 --- a/src/mapi/glapi/gen/EXT_texture_integer.xml +++ b/src/mapi/glapi/gen/EXT_texture_integer.xml @@ -69,25 +69,25 @@ <param name="a" type="GLuint"/> </function> - <function name="TexParameterIivEXT" alias="TexParameterIiv"> + <function name="TexParameterIivEXT" alias="TexParameterIiv" es2="3.0"> <param name="target" type="GLenum"/> <param name="pname" type="GLenum"/> <param name="params" type="const GLint *"/> </function> - <function name="TexParameterIuivEXT" alias="TexParameterIuiv"> + <function name="TexParameterIuivEXT" alias="TexParameterIuiv" es2="3.0"> <param name="target" type="GLenum"/> <param name="pname" type="GLenum"/> <param name="params" type="const GLuint *"/> </function> - <function name="GetTexParameterIivEXT" alias="GetTexParameterIiv"> + <function name="GetTexParameterIivEXT" alias="GetTexParameterIiv" es2="3.0"> <param name="target" type="GLenum"/> <param name="pname" type="GLenum"/> <param name="params" type="GLint *"/> </function> - <function name="GetTexParameterIuivEXT" alias="GetTexParameterIuiv"> + <function name="GetTexParameterIuivEXT" alias="GetTexParameterIuiv" es2="3.0"> <param name="target" type="GLenum"/> <param name="pname" type="GLenum"/> <param name="params" type="GLuint *"/> diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 86df980304b..178f7c027bc 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -817,6 +817,36 @@ </function> </category> +<category name="GL_EXT_texture_border_clamp" number="182"> + + <!-- The *TexParameter* functions are added in EXT_texture_integer --> + + <function name="SamplerParameterIivEXT" es2="3.0" alias="SamplerParameterIiv"> + <param name="sampler" type="GLuint"/> + <param name="pname" type="GLenum"/> + <param name="params" type="const GLint *"/> + </function> + + <function name="SamplerParameterIuivEXT" es2="3.0" alias="SamplerParameterIuiv"> + <param name="sampler" type="GLuint"/> + <param name="pname" type="GLenum"/> + <param name="params" type="const GLuint *"/> + </function> + + <function name="GetSamplerParameterIivEXT" es2="3.0" alias="GetSamplerParameterIiv"> + <param name="sampler" type="GLuint"/> + <param name="pname" type="GLenum"/> + <param name="params" type="GLint *"/> + </function> + + <function name="GetSamplerParameterIuivEXT" es2="3.0" alias="GetSamplerParameterIuiv"> + <param name="sampler" type="GLuint"/> + <param name="pname" type="GLenum"/> + <param name="params" type="GLfloat *"/> + </function> + +</category> + <category name="GL_EXT_draw_elements_base_vertex" number="204"> <function name="DrawElementsBaseVertexEXT" alias="DrawElementsBaseVertex" @@ -982,5 +1012,61 @@ <param name="texture" type="GLuint"/> <param name="level" type="GLint"/> </function> - </category> +</category> + +<category name="GL_OES_texture_border_clamp" number="215"> + + <enum name="TEXTURE_BORDER_COLOR_OES" value ="0x1004"/> + <enum name="CLAMP_TO_BORDER_OES" value="0x812D"/> + + <function name="TexParameterIivOES" es2="3.0" alias="TexParameterIiv"> + <param name="target" type="GLenum"/> + <param name="pname" type="GLenum"/> + <param name="params" type="const GLint *"/> + </function> + + <function name="TexParameterIuivOES" es2="3.0" alias="TexParameterIuiv"> + <param name="target" type="GLenum"/> + <param name="pname" type="GLenum"/> + <param name="params" type="const GLuint *"/> + </function> + + <function name="GetTexParameterIivOES" es2="3.0" alias="GetTexParameterIiv"> + <param name="target" type="GLenum"/> + <param name="pname" type="GLenum"/> + <param name="params" type="GLint *"/> + </function> + + <function name="GetTexParameterIuivOES" es2="3.0" alias="GetTexParameterIuiv"> + <param name="target" type="GLenum"/> + <param name="pname" type="GLenum"/> + <param name="params" type="GLuint *"/> + </function> + + <function name="SamplerParameterIivOES" es2="3.0" alias="SamplerParameterIiv"> + <param name="sampler" type="GLuint"/> + <param name="pname" type="GLenum"/> + <param name="params" type="const GLint *"/> + </function> + + <function name="SamplerParameterIuivOES" es2="3.0" alias="SamplerParameterIuiv"> + <param name="sampler" type="GLuint"/> + <param name="pname" type="GLenum"/> + <param name="params" type="const GLuint *"/> + </function> + + <function name="GetSamplerParameterIivOES" es2="3.0" alias="GetSamplerParameterIiv"> + <param name="sampler" type="GLuint"/> + <param name="pname" type="GLenum"/> + <param name="params" type="GLint *"/> + </function> + + <function name="GetSamplerParameterIuivOES" es2="3.0" alias="GetSamplerParameterIuiv"> + <param name="sampler" type="GLuint"/> + <param name="pname" type="GLenum"/> + <param name="params" type="GLfloat *"/> + </function> + +</category> + </OpenGLAPI> diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 6669f295399..a6c12c64828 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -405,6 +405,7 @@ STATETRACKER_FILES = \ state_tracker/st_atom_depth.c \ state_tracker/st_atom_framebuffer.c \ state_tracker/st_atom.h \ + state_tracker/st_atom_image.c \ state_tracker/st_atom_msaa.c \ state_tracker/st_atom_pixeltransfer.c \ state_tracker/st_atom_rasterizer.c \ @@ -427,6 +428,8 @@ STATETRACKER_FILES = \ state_tracker/st_cb_bufferobjects.h \ state_tracker/st_cb_clear.c \ state_tracker/st_cb_clear.h \ + state_tracker/st_cb_compute.c \ + state_tracker/st_cb_compute.h \ state_tracker/st_cb_condrender.c \ state_tracker/st_cb_condrender.h \ state_tracker/st_cb_copyimage.c \ diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c index 6534d432e61..57c3f686b0c 100644 --- a/src/mesa/drivers/common/meta_copy_image.c +++ b/src/mesa/drivers/common/meta_copy_image.c @@ -203,7 +203,7 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, src_tex_image = wrap_renderbuffer(ctx, src_renderbuffer); } if (!make_view(ctx, src_tex_image, &src_view_tex_image, &src_view_texture, - dst_tex_image->InternalFormat)) + dst_internal_format)) goto cleanup; } diff --git a/src/mesa/drivers/dri/i915/intel_tex_validate.c b/src/mesa/drivers/dri/i915/intel_tex_validate.c index 224a67fc6eb..c2111e5e209 100644 --- a/src/mesa/drivers/dri/i915/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i915/intel_tex_validate.c @@ -1,6 +1,7 @@ #include "main/mtypes.h" #include "main/macros.h" #include "main/samplerobj.h" +#include "main/teximage.h" #include "main/texobj.h" #include "intel_context.h" diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 45f7a91295d..8f92fd7cfd2 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -175,6 +175,7 @@ i965_FILES = \ gen6_blorp.cpp \ gen6_cc.c \ gen6_clip_state.c \ + gen6_constant_state.c \ gen6_depth_state.c \ gen6_depthstencil.c \ gen6_gs_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index c7cb3944870..05fff91ed57 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -70,8 +70,13 @@ brw_blorp_blit_miptrees(struct brw_context *brw, * the destination buffer because we use the standard render path to render * to destination color buffers, and the standard render path is * fast-color-aware. + * Lossless compression is only introduced for gen9 onwards whereas + * blorp is not supported even for gen8. Therefore it should be impossible + * to end up here with single sampled compressed surfaces. */ - intel_miptree_resolve_color(brw, src_mt); + assert(!intel_miptree_is_lossless_compressed(brw, src_mt)); + assert(!intel_miptree_is_lossless_compressed(brw, dst_mt)); + intel_miptree_resolve_color(brw, src_mt, 0); intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_layer); intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_layer); diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index d9f181a1cbf..0d2aca5b78a 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -35,6 +35,84 @@ static void +prepare_indirect_gpgpu_walker(struct brw_context *brw) +{ + GLintptr indirect_offset = brw->compute.num_work_groups_offset; + drm_intel_bo *bo = brw->compute.num_work_groups_bo; + + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo, + I915_GEM_DOMAIN_VERTEX, 0, + indirect_offset + 0); + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo, + I915_GEM_DOMAIN_VERTEX, 0, + indirect_offset + 4); + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo, + I915_GEM_DOMAIN_VERTEX, 0, + indirect_offset + 8); + + if (brw->gen > 7) + return; + + /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */ + BEGIN_BATCH(7); + OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2)); + OUT_BATCH(MI_PREDICATE_SRC0 + 4); + OUT_BATCH(0u); + OUT_BATCH(MI_PREDICATE_SRC1 + 0); + OUT_BATCH(0u); + OUT_BATCH(MI_PREDICATE_SRC1 + 4); + OUT_BATCH(0u); + ADVANCE_BATCH(); + + /* Load compute_dispatch_indirect_x_size into SRC0 */ + brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + indirect_offset + 0); + + /* predicate = (compute_dispatch_indirect_x_size == 0); */ + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + MI_PREDICATE_LOADOP_LOAD | + MI_PREDICATE_COMBINEOP_SET | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL); + ADVANCE_BATCH(); + + /* Load compute_dispatch_indirect_y_size into SRC0 */ + brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + indirect_offset + 4); + + /* predicate |= (compute_dispatch_indirect_y_size == 0); */ + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + MI_PREDICATE_LOADOP_LOAD | + MI_PREDICATE_COMBINEOP_OR | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL); + ADVANCE_BATCH(); + + /* Load compute_dispatch_indirect_z_size into SRC0 */ + brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + indirect_offset + 8); + + /* predicate |= (compute_dispatch_indirect_z_size == 0); */ + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + MI_PREDICATE_LOADOP_LOAD | + MI_PREDICATE_COMBINEOP_OR | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL); + ADVANCE_BATCH(); + + /* predicate = !predicate; */ + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + MI_PREDICATE_LOADOP_LOADINV | + MI_PREDICATE_COMBINEOP_OR | + MI_PREDICATE_COMPAREOP_FALSE); + ADVANCE_BATCH(); +} + +static void brw_emit_gpgpu_walker(struct brw_context *brw) { const struct brw_cs_prog_data *prog_data = brw->cs.prog_data; @@ -45,20 +123,10 @@ brw_emit_gpgpu_walker(struct brw_context *brw) if (brw->compute.num_work_groups_bo == NULL) { indirect_flag = 0; } else { - GLintptr indirect_offset = brw->compute.num_work_groups_offset; - drm_intel_bo *bo = brw->compute.num_work_groups_bo; - - indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE; - - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo, - I915_GEM_DOMAIN_VERTEX, 0, - indirect_offset + 0); - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo, - I915_GEM_DOMAIN_VERTEX, 0, - indirect_offset + 4); - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo, - I915_GEM_DOMAIN_VERTEX, 0, - indirect_offset + 8); + indirect_flag = + GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE | + (brw->gen == 7 ? GEN7_GPGPU_PREDICATE_ENABLE : 0); + prepare_indirect_gpgpu_walker(brw); } const unsigned simd_size = prog_data->simd_size; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 44d2fe4d9e4..31b6b2a3641 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -208,7 +208,11 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) if (!tex_obj || !tex_obj->mt) continue; intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); - intel_miptree_resolve_color(brw, tex_obj->mt); + /* Sampling engine understands lossless compression and resolving + * those surfaces should be skipped for performance reasons. + */ + intel_miptree_resolve_color(brw, tex_obj->mt, + INTEL_MIPTREE_IGNORE_CCS_E); brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); } @@ -223,7 +227,13 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) tex_obj = intel_texture_object(u->TexObj); if (tex_obj && tex_obj->mt) { - intel_miptree_resolve_color(brw, tex_obj->mt); + /* Access to images is implemented using indirect messages + * against data port. Normal render target write understands + * lossless compression but unfortunately the typed/untyped + * read/write interface doesn't. Therefore the compressed + * surfaces need to be resolved prior to accessing them. + */ + intel_miptree_resolve_color(brw, tex_obj->mt, 0); brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); } } @@ -252,7 +262,11 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) _mesa_get_srgb_format_linear(mt->format) == mt->format) continue; - intel_miptree_resolve_color(brw, mt); + /* Lossless compression is not supported for SRGB formats, it + * should be impossible to get here with such surfaces. + */ + assert(!intel_miptree_is_lossless_compressed(brw, mt)); + intel_miptree_resolve_color(brw, mt, 0); brw_render_cache_set_check_flush(brw, mt->bo); } } @@ -710,6 +724,7 @@ brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_thread ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; + ctx->Const.MaxComputeSharedMemorySize = 64 * 1024; } /** @@ -1227,7 +1242,7 @@ intel_resolve_for_dri2_flush(struct brw_context *brw, if (rb == NULL || rb->mt == NULL) continue; if (rb->mt->num_samples <= 1) - intel_miptree_resolve_color(brw, rb->mt); + intel_miptree_resolve_color(brw, rb->mt, 0); else intel_renderbuffer_downsample(brw, rb); } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 8416a1d4708..8ef5afea149 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2729,6 +2729,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8) # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) # define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6) +# define GEN9_PS_RENDER_TARGET_RESOLVE_FULL (3 << 6) # define HSW_PS_UAV_ACCESS_ENABLE (1 << 5) # define GEN7_PS_POSOFFSET_NONE (0 << 3) # define GEN7_PS_POSOFFSET_CENTROID (2 << 3) @@ -2939,6 +2940,7 @@ enum brw_wm_barycentric_interp_mode { #define GPGPU_WALKER 0x7105 /* GEN7 DW0 */ # define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10) +# define GEN7_GPGPU_PREDICATE_ENABLE (1 << 8) /* GEN8+ DW2 */ # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0 # define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0) diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index 05872255865..38a27da898c 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -384,20 +384,19 @@ static const struct brw_device_info brw_device_info_bxt = { .gt = 1, .has_llc = false, - /* XXX: These are preliminary thread counts and URB sizes. */ .num_slices = 1, - .max_vs_threads = 56, - .max_hs_threads = 56, - .max_ds_threads = 56, - .max_gs_threads = 56, + .max_vs_threads = 112, + .max_hs_threads = 112, + .max_ds_threads = 112, + .max_gs_threads = 112, .max_wm_threads = 64 * 3, - .max_cs_threads = 28, + .max_cs_threads = 6 * 6, .urb = { - .size = 64, + .size = 192, .min_vs_entries = 34, - .max_vs_entries = 640, - .max_hs_entries = 80, - .max_ds_entries = 80, + .max_vs_entries = 704, + .max_hs_entries = 256, + .max_ds_entries = 416, .max_gs_entries = 256, } }; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 23e71fd9ba6..afa8a4e9eae 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -391,6 +391,15 @@ brw_try_draw_prims(struct gl_context *ctx, if (ctx->NewState) _mesa_update_state(ctx); + /* We have to validate the textures *before* checking for fallbacks; + * otherwise, the software fallback won't be able to rely on the + * texture state, the firstLevel and lastLevel fields won't be + * set in the intel texture object (they'll both be 0), and the + * software fallback will segfault if it attempts to access any + * texture level other than level 0. + */ + brw_validate_textures(brw); + /* Find the highest sampler unit used by each shader program. A bit-count * won't work since ARB programs use the texture unit number as the sampler * index. @@ -406,15 +415,6 @@ brw_try_draw_prims(struct gl_context *ctx, brw->vs.base.sampler_count = _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); - /* We have to validate the textures *before* checking for fallbacks; - * otherwise, the software fallback won't be able to rely on the - * texture state, the firstLevel and lastLevel fields won't be - * set in the intel texture object (they'll both be 0), and the - * software fallback will segfault if it attempts to access any - * texture level other than level 0. - */ - brw_validate_textures(brw); - intel_prepare_render(brw); /* This workaround has to happen outside of brw_upload_render_state() diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index bb22cfa5fab..81a83400ea0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3524,6 +3524,36 @@ fs_visitor::lower_integer_multiplication() return progress; } +bool +fs_visitor::lower_minmax() +{ + assert(devinfo->gen < 6); + + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + const fs_builder ibld(this, block, inst); + + if (inst->opcode == BRW_OPCODE_SEL && + inst->predicate == BRW_PREDICATE_NONE) { + /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of + * the original SEL.L/GE instruction + */ + ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1], + inst->conditional_mod); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_NONE; + + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + static void setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key, fs_reg *dst, fs_reg color, unsigned components) @@ -5134,7 +5164,7 @@ fs_visitor::optimize() if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { char filename[64]; - snprintf(filename, 64, "%s%d-%s-00-start", + snprintf(filename, 64, "%s%d-%s-00-00-start", stage_abbrev, dispatch_width, nir->info.name); backend_shader::dump_instructions(filename); @@ -5187,6 +5217,13 @@ fs_visitor::optimize() OPT(opt_combine_constants); OPT(lower_integer_multiplication); + if (devinfo->gen <= 5 && OPT(lower_minmax)) { + OPT(opt_cmod_propagation); + OPT(opt_cse); + OPT(opt_copy_propagate); + OPT(dead_code_eliminate); + } + lower_uniform_pull_constant_loads(); validate(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 89fbbfc90e4..f1a81c13ef9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -175,6 +175,7 @@ public: bool lower_load_payload(); bool lower_logical_sends(); bool lower_integer_multiplication(); + bool lower_minmax(); bool lower_simd_width(); bool opt_combine_constants(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index dd3c383a17d..9a3cc3a50c2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -369,20 +369,14 @@ namespace brw { * * Generally useful to get the minimum or maximum of two values. */ - void + instruction * emit_minmax(const dst_reg &dst, const src_reg &src0, const src_reg &src1, brw_conditional_mod mod) const { assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); - if (shader->devinfo->gen >= 6) { - set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), - fix_unsigned_negate(src1))); - } else { - CMP(null_reg_d(), src0, src1, mod); - set_predicate(BRW_PREDICATE_NORMAL, - SEL(dst, src0, src1)); - } + return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); } /** diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index fd25307c587..9dbe13df514 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -87,7 +87,7 @@ public: void setup_initial_values(); void run(); - void dump_block_data() const; + void dump_block_data() const UNUSED; void *mem_ctx; cfg_t *cfg; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 956cf53fd6c..8611b8dc443 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1015,28 +1015,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_fmin: case nir_op_imin: case nir_op_umin: - if (devinfo->gen >= 6) { - inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_L; - } else { - bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L); - inst = bld.SEL(result, op[0], op[1]); - inst->predicate = BRW_PREDICATE_NORMAL; - } + inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_L); inst->saturate = instr->dest.saturate; break; case nir_op_fmax: case nir_op_imax: case nir_op_umax: - if (devinfo->gen >= 6) { - inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_GE; - } else { - bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE); - inst = bld.SEL(result, op[0], op[1]); - inst->predicate = BRW_PREDICATE_NORMAL; - } + inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_GE); inst->saturate = instr->dest.saturate; break; @@ -1318,7 +1304,7 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, static brw_reg_type get_image_base_type(const glsl_type *type) { - switch ((glsl_base_type)type->sampler_type) { + switch ((glsl_base_type)type->sampled_type) { case GLSL_TYPE_UINT: return BRW_REGISTER_TYPE_UD; case GLSL_TYPE_INT: diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp index 9b0750026c4..4adffdd75fb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -1143,7 +1143,7 @@ namespace brw { if (rsize && pred) set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0))); - return tmp; + return retype(tmp, src0.type); } } } diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index b2b07e7e58e..488fa6c0c45 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -228,7 +228,9 @@ get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb, unsigned int x_align, y_align; unsigned int x_scaledown, y_scaledown; - if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE) { + /* Only single sampled surfaces need to (and actually can) be resolved. */ + if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE || + intel_miptree_is_lossless_compressed(brw, irb->mt)) { /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p327): * @@ -872,7 +874,10 @@ brw_meta_resolve_color(struct brw_context *brw, * bits to let us select the type of resolve. For fast clear resolves, it * turns out we can use the same value as pre-SKL though. */ - set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE); + if (intel_miptree_is_lossless_compressed(brw, mt)) + set_fast_clear_op(brw, GEN9_PS_RENDER_TARGET_RESOLVE_FULL); + else + set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; get_resolve_rect(brw, mt, &rect); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index f44ccd6e071..6b85eac77d6 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -76,6 +76,7 @@ extern const struct brw_tracked_state brw_tcs_samplers; extern const struct brw_tracked_state brw_tes_samplers; extern const struct brw_tracked_state brw_gs_samplers; extern const struct brw_tracked_state brw_cs_samplers; +extern const struct brw_tracked_state brw_cs_texture_surfaces; extern const struct brw_tracked_state brw_vs_ubo_surfaces; extern const struct brw_tracked_state brw_vs_abo_surfaces; extern const struct brw_tracked_state brw_vs_image_surfaces; diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index cac06fa8918..c6aa1344270 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -387,8 +387,27 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) /* We need to make sure that the programs get regenerated, since * any offsets leftover in brw_context will no longer be valid. */ - brw->NewGLState |= ~0; - brw->ctx.NewDriverState |= ~0ull; + brw->NewGLState = ~0; + brw->ctx.NewDriverState = ~0ull; + brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0; + brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull; + brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0; + brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull; + + /* Also, NULL out any stale program pointers. */ + brw->vs.prog_data = NULL; + brw->vs.base.prog_data = NULL; + brw->tcs.prog_data = NULL; + brw->tcs.base.prog_data = NULL; + brw->tes.prog_data = NULL; + brw->tes.base.prog_data = NULL; + brw->gs.prog_data = NULL; + brw->gs.base.prog_data = NULL; + brw->wm.prog_data = NULL; + brw->wm.base.prog_data = NULL; + brw->cs.prog_data = NULL; + brw->cs.base.prog_data = NULL; + intel_batchbuffer_flush(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index ee75ca88549..a91d07446ce 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -280,7 +280,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] = &brw_cs_pull_constants, &brw_cs_ubo_surfaces, &brw_cs_abo_surfaces, - &brw_texture_surfaces, + &brw_cs_texture_surfaces, &brw_cs_work_groups_surface, &brw_cs_samplers, &brw_cs_state, @@ -395,7 +395,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] = &brw_cs_pull_constants, &brw_cs_ubo_surfaces, &brw_cs_abo_surfaces, - &brw_texture_surfaces, + &brw_cs_texture_surfaces, &brw_cs_work_groups_surface, &brw_cs_samplers, &brw_cs_state, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 2e79de6a883..8136339332b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -26,6 +26,7 @@ #include "brw_cfg.h" #include "brw_vs.h" #include "brw_nir.h" +#include "brw_vec4_builder.h" #include "brw_vec4_live_variables.h" #include "brw_dead_control_flow.h" #include "program/prog_parameter.h" @@ -1555,11 +1556,6 @@ vec4_vs_visitor::setup_attributes(int payload_reg) } } - if (vs_prog_data->uses_drawid) { - attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes; - nr_attributes++; - } - /* VertexID is stored by the VF as the last vertex element, but we * don't represent it with a flag in inputs_read, so we call it * VERT_ATTRIB_MAX. @@ -1570,6 +1566,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg) nr_attributes++; } + if (vs_prog_data->uses_drawid) { + attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes; + nr_attributes++; + } + lower_attributes_to_hw_regs(attribute_map, false /* interleaved */); return payload_reg + vs_prog_data->nr_attributes; @@ -1624,6 +1625,36 @@ vec4_vs_visitor::setup_payload(void) this->first_non_payload_grf = reg; } +bool +vec4_visitor::lower_minmax() +{ + assert(devinfo->gen < 6); + + bool progress = false; + + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + const vec4_builder ibld(this, block, inst); + + if (inst->opcode == BRW_OPCODE_SEL && + inst->predicate == BRW_PREDICATE_NONE) { + /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of + * the original SEL.L/GE instruction + */ + ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1], + inst->conditional_mod); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_NONE; + + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + src_reg vec4_visitor::get_timestamp() { @@ -1861,7 +1892,7 @@ vec4_visitor::run() if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { char filename[64]; - snprintf(filename, 64, "%s-%s-00-start", + snprintf(filename, 64, "%s-%s-00-00-start", stage_abbrev, nir->info.name); backend_shader::dump_instructions(filename); @@ -1896,6 +1927,13 @@ vec4_visitor::run() OPT(dead_code_eliminate); } + if (devinfo->gen <= 5 && OPT(lower_minmax)) { + OPT(opt_cmod_propagation); + OPT(opt_cse); + OPT(opt_copy_propagation); + OPT(dead_code_eliminate); + } + if (failed) return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 6d388b04fdb..7b86e1bc050 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -304,6 +304,8 @@ public: void resolve_ud_negate(src_reg *reg); + bool lower_minmax(); + src_reg get_timestamp(); void dump_instruction(backend_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h index be1427c7db7..3a8617e05ac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h @@ -66,7 +66,8 @@ namespace brw { * instruction passed as argument. */ vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) : - shader(shader), block(block), cursor(inst) + shader(shader), block(block), cursor(inst), + force_writemask_all(inst->force_writemask_all) { annotation.str = inst->annotation; annotation.ir = inst->ir; @@ -298,18 +299,14 @@ namespace brw { * * Generally useful to get the minimum or maximum of two values. */ - void + instruction * emit_minmax(const dst_reg &dst, const src_reg &src0, const src_reg &src1, brw_conditional_mod mod) const { - if (shader->devinfo->gen >= 6) { - set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), - fix_unsigned_negate(src1))); - } else { - CMP(null_reg_d(), src0, src1, mod); - set_predicate(BRW_PREDICATE_NORMAL, - SEL(dst, src0, src1)); - } + assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); + + return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); } /** diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3bb768f82dc..8418a3cdc01 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -679,18 +679,8 @@ vec4_instruction * vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, src_reg src0, src_reg src1) { - vec4_instruction *inst; - - if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->conditional_mod = conditionalmod; - } else { - emit(CMP(dst, src0, src1, conditionalmod)); - - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->predicate = BRW_PREDICATE_NORMAL; - } - + vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1); + inst->conditional_mod = conditionalmod; return inst; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 5ab2f7f09df..f1b8fc4c84d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -872,16 +872,12 @@ brw_update_texture_surfaces(struct brw_context *brw) /* BRW_NEW_FRAGMENT_PROGRAM */ struct gl_program *fs = (struct gl_program *) brw->fragment_program; - /* BRW_NEW_COMPUTE_PROGRAM */ - struct gl_program *cs = (struct gl_program *) brw->compute_program; - /* _NEW_TEXTURE */ update_stage_texture_surfaces(brw, vs, &brw->vs.base, false); update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false); update_stage_texture_surfaces(brw, tes, &brw->tes.base, false); update_stage_texture_surfaces(brw, gs, &brw->gs.base, false); update_stage_texture_surfaces(brw, fs, &brw->wm.base, false); - update_stage_texture_surfaces(brw, cs, &brw->cs.base, false); /* emit alternate set of surface state for gather. this * allows the surface format to be overriden for only the @@ -897,8 +893,6 @@ brw_update_texture_surfaces(struct brw_context *brw) update_stage_texture_surfaces(brw, gs, &brw->gs.base, true); if (fs && fs->UsesGather) update_stage_texture_surfaces(brw, fs, &brw->wm.base, true); - if (cs && cs->UsesGather) - update_stage_texture_surfaces(brw, cs, &brw->cs.base, true); } brw->ctx.NewDriverState |= BRW_NEW_SURFACES; @@ -908,7 +902,6 @@ const struct brw_tracked_state brw_texture_surfaces = { .dirty = { .mesa = _NEW_TEXTURE, .brw = BRW_NEW_BATCH | - BRW_NEW_COMPUTE_PROGRAM | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | BRW_NEW_GEOMETRY_PROGRAM | @@ -923,6 +916,37 @@ const struct brw_tracked_state brw_texture_surfaces = { .emit = brw_update_texture_surfaces, }; +static void +brw_update_cs_texture_surfaces(struct brw_context *brw) +{ + /* BRW_NEW_COMPUTE_PROGRAM */ + struct gl_program *cs = (struct gl_program *) brw->compute_program; + + /* _NEW_TEXTURE */ + update_stage_texture_surfaces(brw, cs, &brw->cs.base, false); + + /* emit alternate set of surface state for gather. this + * allows the surface format to be overriden for only the + * gather4 messages. + */ + if (brw->gen < 8) { + if (cs && cs->UsesGather) + update_stage_texture_surfaces(brw, cs, &brw->cs.base, true); + } + + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; +} + +const struct brw_tracked_state brw_cs_texture_surfaces = { + .dirty = { + .mesa = _NEW_TEXTURE, + .brw = BRW_NEW_BATCH | + BRW_NEW_COMPUTE_PROGRAM, + }, + .emit = brw_update_cs_texture_surfaces, +}; + + void brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_shader *shader, @@ -1340,6 +1364,11 @@ brw_upload_image_surfaces(struct brw_context *brw, } brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + /* This may have changed the image metadata dependent on the context + * image unit state and passed to the program as uniforms, make sure + * that push and pull constants are reuploaded. + */ + brw->NewGLState |= _NEW_PROGRAM_CONSTANTS; } } diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c new file mode 100644 index 00000000000..6c0c32b26f7 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c @@ -0,0 +1,190 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" +#include "program/prog_parameter.h" + +void +gen7_upload_constant_state(struct brw_context *brw, + const struct brw_stage_state *stage_state, + bool active, unsigned opcode) +{ + uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0; + + /* Disable if the shader stage is inactive or there are no push constants. */ + active = active && stage_state->push_const_size != 0; + + int dwords = brw->gen >= 8 ? 11 : 7; + BEGIN_BATCH(dwords); + OUT_BATCH(opcode << 16 | (dwords - 2)); + + /* Workaround for SKL+ (we use option #2 until we have a need for more + * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_* + * + * The driver must ensure The following case does not occur without a flush + * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to + * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length + * not equal to zero committed. Possible ways to avoid this condition + * include: + * 1. always force buffer 3 to have a non zero read length + * 2. always force buffer 0 to a zero read length + */ + if (brw->gen >= 9 && active) { + OUT_BATCH(0); + OUT_BATCH(stage_state->push_const_size); + } else { + OUT_BATCH(active ? stage_state->push_const_size : 0); + OUT_BATCH(0); + } + /* Pointer to the constant buffer. Covered by the set of state flags + * from gen6_prepare_wm_contants + */ + if (brw->gen >= 9 && active) { + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + /* XXX: When using buffers other than 0, you need to specify the + * graphics virtual address regardless of INSPM/debug bits + */ + OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0, + stage_state->push_const_offset); + OUT_BATCH(0); + OUT_BATCH(0); + } else if (brw->gen >= 8) { + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } + + ADVANCE_BATCH(); + + /* On SKL+ the new constants don't take effect until the next corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure + * that is sent + */ + if (brw->gen >= 9) + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; +} + +/** + * Creates a streamed BO containing the push constants for the VS or GS on + * gen6+. + * + * Push constants are constant values (such as GLSL uniforms) that are + * pre-loaded into a shader stage's register space at thread spawn time. + * + * Not all GLSL uniforms will be uploaded as push constants: The hardware has + * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be + * uploaded as push constants, while GL 4.4 requires at least 1024 components + * to be usable for the VS. Plus, currently we always use pull constants + * instead of push constants when doing variable-index array access. + * + * See brw_curbe.c for the equivalent gen4/5 code. + */ +void +gen6_upload_push_constants(struct brw_context *brw, + const struct gl_program *prog, + const struct brw_stage_prog_data *prog_data, + struct brw_stage_state *stage_state, + enum aub_state_struct_type type) +{ + struct gl_context *ctx = &brw->ctx; + + if (prog_data->nr_params == 0) { + stage_state->push_const_size = 0; + } else { + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + /* XXX: Should this happen somewhere before to get our state flag set? */ + if (prog) + _mesa_load_state_parameters(ctx, prog->Parameters); + + gl_constant_value *param; + int i; + + param = brw_state_batch(brw, type, + prog_data->nr_params * sizeof(gl_constant_value), + 32, &stage_state->push_const_offset); + + STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); + + /* _NEW_PROGRAM_CONSTANTS + * + * Also _NEW_TRANSFORM -- we may reference clip planes other than as a + * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS + * wouldn't be set for them. + */ + for (i = 0; i < prog_data->nr_params; i++) { + param[i] = *prog_data->param[i]; + } + + if (0) { + fprintf(stderr, "%s constants:\n", + _mesa_shader_stage_to_string(stage_state->stage)); + for (i = 0; i < prog_data->nr_params; i++) { + if ((i & 7) == 0) + fprintf(stderr, "g%d: ", + prog_data->dispatch_grf_start_reg + i / 8); + fprintf(stderr, "%8f ", param[i].f); + if ((i & 7) == 7) + fprintf(stderr, "\n"); + } + if ((i & 7) != 0) + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + } + + stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8; + /* We can only push 32 registers of constants at a time. */ + + /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to + * 32" + * + * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS: + * + * "The sum of all four read length fields must be less than or + * equal to the size of 64" + * + * The other shader stages all match the VS's limits. + */ + assert(stage_state->push_const_size <= 32); + } +} diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 4bc0a8598d6..c18610ca38c 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -33,95 +33,6 @@ #include "program/prog_statevars.h" #include "intel_batchbuffer.h" -/** - * Creates a streamed BO containing the push constants for the VS or GS on - * gen6+. - * - * Push constants are constant values (such as GLSL uniforms) that are - * pre-loaded into a shader stage's register space at thread spawn time. - * - * Not all GLSL uniforms will be uploaded as push constants: The hardware has - * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be - * uploaded as push constants, while GL 4.4 requires at least 1024 components - * to be usable for the VS. Plus, currently we always use pull constants - * instead of push constants when doing variable-index array access. - * - * See brw_curbe.c for the equivalent gen4/5 code. - */ -void -gen6_upload_push_constants(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_stage_prog_data *prog_data, - struct brw_stage_state *stage_state, - enum aub_state_struct_type type) -{ - struct gl_context *ctx = &brw->ctx; - - if (prog_data->nr_params == 0) { - stage_state->push_const_size = 0; - } else { - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - /* XXX: Should this happen somewhere before to get our state flag set? */ - if (prog) - _mesa_load_state_parameters(ctx, prog->Parameters); - - gl_constant_value *param; - unsigned i; - - param = brw_state_batch(brw, type, - prog_data->nr_params * sizeof(gl_constant_value), - 32, &stage_state->push_const_offset); - - STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); - - /* _NEW_PROGRAM_CONSTANTS - * - * Also _NEW_TRANSFORM -- we may reference clip planes other than as a - * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS - * wouldn't be set for them. - */ - for (i = 0; i < prog_data->nr_params; i++) { - param[i] = *prog_data->param[i]; - } - - if (0) { - fprintf(stderr, "%s constants:\n", - _mesa_shader_stage_to_string(stage_state->stage)); - for (i = 0; i < prog_data->nr_params; i++) { - if ((i & 7) == 0) - fprintf(stderr, "g%d: ", - prog_data->dispatch_grf_start_reg + i / 8); - fprintf(stderr, "%8f ", param[i].f); - if ((i & 7) == 7) - fprintf(stderr, "\n"); - } - if ((i & 7) != 0) - fprintf(stderr, "\n"); - fprintf(stderr, "\n"); - } - - stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8; - /* We can only push 32 registers of constants at a time. */ - - /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to - * 32" - * - * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS: - * - * "The sum of all four read length fields must be less than or - * equal to the size of 64" - * - * The other shader stages all match the VS's limits. - */ - assert(stage_state->push_const_size <= 32); - } -} - static void gen6_upload_vs_push_constants(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c index 0c1813f9048..c4babc24f43 100644 --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c @@ -298,7 +298,12 @@ static struct brw_l3_weights get_pipeline_state_l3_weights(const struct brw_context *brw) { const struct brw_stage_state *stage_states[] = { - &brw->vs.base, &brw->gs.base, &brw->wm.base, &brw->cs.base + [MESA_SHADER_VERTEX] = &brw->vs.base, + [MESA_SHADER_TESS_CTRL] = &brw->tcs.base, + [MESA_SHADER_TESS_EVAL] = &brw->tes.base, + [MESA_SHADER_GEOMETRY] = &brw->gs.base, + [MESA_SHADER_FRAGMENT] = &brw->wm.base, + [MESA_SHADER_COMPUTE] = &brw->cs.base }; bool needs_dc = false, needs_slm = false; diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index a18dc697651..0daecdc9ed2 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -29,81 +29,6 @@ #include "program/prog_statevars.h" #include "intel_batchbuffer.h" - -void -gen7_upload_constant_state(struct brw_context *brw, - const struct brw_stage_state *stage_state, - bool active, unsigned opcode) -{ - uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0; - - /* Disable if the shader stage is inactive or there are no push constants. */ - active = active && stage_state->push_const_size != 0; - - int dwords = brw->gen >= 8 ? 11 : 7; - BEGIN_BATCH(dwords); - OUT_BATCH(opcode << 16 | (dwords - 2)); - - /* Workaround for SKL+ (we use option #2 until we have a need for more - * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_* - * - * The driver must ensure The following case does not occur without a flush - * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to - * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length - * not equal to zero committed. Possible ways to avoid this condition - * include: - * 1. always force buffer 3 to have a non zero read length - * 2. always force buffer 0 to a zero read length - */ - if (brw->gen >= 9 && active) { - OUT_BATCH(0); - OUT_BATCH(stage_state->push_const_size); - } else { - OUT_BATCH(active ? stage_state->push_const_size : 0); - OUT_BATCH(0); - } - - /* Pointer to the constant buffer. Covered by the set of state flags - * from gen6_prepare_wm_contants - */ - if (brw->gen >= 9 && active) { - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - /* XXX: When using buffers other than 0, you need to specify the - * graphics virtual address regardless of INSPM/debug bits - */ - OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0, - stage_state->push_const_offset); - OUT_BATCH(0); - OUT_BATCH(0); - } else if (brw->gen>= 8) { - OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - } else { - OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - } - - ADVANCE_BATCH(); - - /* On SKL+ the new constants don't take effect until the next corresponding - * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure - * that is sent - */ - if (brw->gen >= 9) - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; -} - static void upload_vs_state(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index fc8f701a24c..0a52815b8a6 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -197,6 +197,28 @@ gen8_emit_fast_clear_color(struct brw_context *brw, surf[7] |= mt->fast_clear_color_value; } +static uint32_t +gen8_get_aux_mode(const struct brw_context *brw, + const struct intel_mipmap_tree *mt, + uint32_t surf_type) +{ + if (mt->mcs_mt == NULL) + return GEN8_SURFACE_AUX_MODE_NONE; + + /* + * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): + * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" + * + * From the hardware spec for GEN9: + * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN + * 16 must be used." + */ + if (brw->gen >= 9 || mt->num_samples == 1) + assert(mt->halign == 16); + + return GEN8_SURFACE_AUX_MODE_MCS; +} + static void gen8_emit_texture_surface_state(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -209,13 +231,13 @@ gen8_emit_texture_surface_state(struct brw_context *brw, bool rw, bool for_gather) { const unsigned depth = max_layer - min_layer; - struct intel_mipmap_tree *aux_mt = NULL; - uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE; + struct intel_mipmap_tree *aux_mt = mt->mcs_mt; uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; unsigned tiling_mode, pitch; const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode); const uint32_t surf_type = translate_tex_target(target); + uint32_t aux_mode = gen8_get_aux_mode(brw, mt, surf_type); if (mt->format == MESA_FORMAT_S_UINT8) { tiling_mode = GEN8_SURFACE_TILING_W; @@ -229,20 +251,9 @@ gen8_emit_texture_surface_state(struct brw_context *brw, * buffer should always have been resolved before it is used as a texture * so there is no need for it. */ - if (mt->mcs_mt && mt->num_samples > 1) { - aux_mt = mt->mcs_mt; - aux_mode = GEN8_SURFACE_AUX_MODE_MCS; - - /* - * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): - * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" - * - * From the hardware spec for GEN9: - * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN - * 16 must be used." - */ - if (brw->gen >= 9 || mt->num_samples == 1) - assert(mt->halign == 16); + if (mt->num_samples <= 1) { + aux_mt = NULL; + aux_mode = GEN8_SURFACE_AUX_MODE_NONE; } uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); @@ -418,8 +429,6 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, struct gl_context *ctx = &brw->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_mipmap_tree *mt = irb->mt; - struct intel_mipmap_tree *aux_mt = NULL; - uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE; unsigned width = mt->logical_width0; unsigned height = mt->logical_height0; unsigned pitch = mt->pitch; @@ -472,21 +481,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, __func__, _mesa_get_format_name(rb_format)); } - if (mt->mcs_mt) { - aux_mt = mt->mcs_mt; - aux_mode = GEN8_SURFACE_AUX_MODE_MCS; - - /* - * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): - * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" - * - * From the hardware spec for GEN9: - * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN - * 16 must be used." - */ - if (brw->gen >= 9 || mt->num_samples == 1) - assert(mt->halign == 16); - } + struct intel_mipmap_tree *aux_mt = mt->mcs_mt; + const uint32_t aux_mode = gen8_get_aux_mode(brw, mt, surf_type); uint32_t *surf = allocate_surface_state(brw, &offset, surf_index); diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 6d29fbdde21..72cf9af5b53 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -317,8 +317,8 @@ intel_miptree_blit(struct brw_context *brw, */ intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_slice); intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice); - intel_miptree_resolve_color(brw, src_mt); - intel_miptree_resolve_color(brw, dst_mt); + intel_miptree_resolve_color(brw, src_mt, 0); + intel_miptree_resolve_color(brw, dst_mt, 0); if (src_flip) src_y = minify(src_mt->physical_height0, src_level - src_mt->first_level) - src_y - height; diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c index dbbac1c95a2..08b7623e63d 100644 --- a/src/mesa/drivers/dri/i965/intel_copy_image.c +++ b/src/mesa/drivers/dri/i965/intel_copy_image.c @@ -270,11 +270,11 @@ intel_copy_image_sub_data(struct gl_context *ctx, */ intel_miptree_all_slices_resolve_hiz(brw, src_mt); intel_miptree_all_slices_resolve_depth(brw, src_mt); - intel_miptree_resolve_color(brw, src_mt); + intel_miptree_resolve_color(brw, src_mt, 0); intel_miptree_all_slices_resolve_hiz(brw, dst_mt); intel_miptree_all_slices_resolve_depth(brw, dst_mt); - intel_miptree_resolve_color(brw, dst_mt); + intel_miptree_resolve_color(brw, dst_mt, 0); _mesa_get_format_block_size(src_mt->format, &bw, &bh); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 108dd87dd8b..6c233d84df9 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -161,8 +161,9 @@ intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt, } } -static bool -intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) +bool +intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw, + unsigned tiling) { /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p326): @@ -200,9 +201,9 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf * non-MSRTs only. */ -static bool +bool intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, - struct intel_mipmap_tree *mt) + const struct intel_mipmap_tree *mt) { /* MCS support does not exist prior to Gen7 */ if (brw->gen < 7) @@ -266,6 +267,32 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, return true; } +/* On Gen9 support for color buffer compression was extended to single + * sampled surfaces. This is a helper considering both auxiliary buffer + * type and number of samples telling if the given miptree represents + * the new single sampled case - also called lossless compression. + */ +bool +intel_miptree_is_lossless_compressed(const struct brw_context *brw, + const struct intel_mipmap_tree *mt) +{ + /* Only available from Gen9 onwards. */ + if (brw->gen < 9) + return false; + + /* Compression always requires auxiliary buffer. */ + if (!mt->mcs_mt) + return false; + + /* Single sample compression is represented re-using msaa compression + * layout type: "Compressed Multisampled Surfaces". + */ + if (mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS) + return false; + + /* And finally distinguish between msaa and single sample case. */ + return mt->num_samples <= 1; +} /** * Determine depth format corresponding to a depth+stencil format, @@ -609,22 +636,21 @@ intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment, return size; } -struct intel_mipmap_tree * -intel_miptree_create(struct brw_context *brw, - GLenum target, - mesa_format format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - GLuint num_samples, - uint32_t layout_flags) +static struct intel_mipmap_tree * +miptree_create(struct brw_context *brw, + GLenum target, + mesa_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + GLuint num_samples, + uint32_t layout_flags) { struct intel_mipmap_tree *mt; mesa_format tex_format = format; mesa_format etc_format = MESA_FORMAT_NONE; - GLuint total_width, total_height; uint32_t alloc_flags = 0; format = intel_lower_compressed_format(brw, format); @@ -645,21 +671,8 @@ intel_miptree_create(struct brw_context *brw, return NULL; } - total_width = mt->total_width; - total_height = mt->total_height; - - if (format == MESA_FORMAT_S_UINT8) { - /* Align to size of W tile, 64x64. */ - total_width = ALIGN(total_width, 64); - total_height = ALIGN(total_height, 64); - } - - bool y_or_x = false; - - if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) { - y_or_x = true; + if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) mt->tiling = I915_TILING_Y; - } if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) alloc_flags |= BO_ALLOC_FOR_RENDER; @@ -675,26 +688,61 @@ intel_miptree_create(struct brw_context *brw, mt->bo = drm_intel_bo_alloc_for_render(brw->bufmgr, "miptree", size, alignment); } else { - mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", - total_width, total_height, mt->cpp, - &mt->tiling, &pitch, - alloc_flags); + if (format == MESA_FORMAT_S_UINT8) { + /* Align to size of W tile, 64x64. */ + mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", + ALIGN(mt->total_width, 64), + ALIGN(mt->total_height, 64), + mt->cpp, &mt->tiling, &pitch, + alloc_flags); + } else { + mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", + mt->total_width, mt->total_height, + mt->cpp, &mt->tiling, &pitch, + alloc_flags); + } } mt->pitch = pitch; + return mt; +} + +struct intel_mipmap_tree * +intel_miptree_create(struct brw_context *brw, + GLenum target, + mesa_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + GLuint num_samples, + uint32_t layout_flags) +{ + struct intel_mipmap_tree *mt = miptree_create( + brw, target, format, + first_level, last_level, + width0, height0, depth0, num_samples, + layout_flags); + /* If the BO is too large to fit in the aperture, we need to use the * BLT engine to support it. Prior to Sandybridge, the BLT paths can't * handle Y-tiling, so we need to fall back to X. */ - if (brw->gen < 6 && y_or_x && mt->bo->size >= brw->max_gtt_map_object_size) { + if (brw->gen < 6 && mt->bo->size >= brw->max_gtt_map_object_size && + mt->tiling == I915_TILING_Y) { + unsigned long pitch = mt->pitch; + const uint32_t alloc_flags = + (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) ? + BO_ALLOC_FOR_RENDER : 0; perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n", mt->total_width, mt->total_height); mt->tiling = I915_TILING_X; drm_intel_bo_unreference(mt->bo); mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", - total_width, total_height, mt->cpp, + mt->total_width, mt->total_height, mt->cpp, &mt->tiling, &pitch, alloc_flags); mt->pitch = pitch; } @@ -1402,6 +1450,27 @@ intel_miptree_copy_teximage(struct brw_context *brw, intel_obj->needs_validate = true; } +static void +intel_miptree_init_mcs(struct brw_context *brw, + struct intel_mipmap_tree *mt, + int init_value) +{ + /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: + * + * When MCS buffer is enabled and bound to MSRT, it is required that it + * is cleared prior to any rendering. + * + * Since we don't use the MCS buffer for any purpose other than rendering, + * it makes sense to just clear it immediately upon allocation. + * + * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff. + */ + void *data = intel_miptree_map_raw(brw, mt->mcs_mt); + memset(data, init_value, mt->mcs_mt->total_height * mt->mcs_mt->pitch); + intel_miptree_unmap_raw(mt->mcs_mt); + mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; +} + static bool intel_miptree_alloc_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -1447,31 +1516,18 @@ intel_miptree_alloc_mcs(struct brw_context *brw, */ const uint32_t mcs_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD | MIPTREE_LAYOUT_TILING_Y; - mt->mcs_mt = intel_miptree_create(brw, - mt->target, - format, - mt->first_level, - mt->last_level, - mt->logical_width0, - mt->logical_height0, - mt->logical_depth0, - 0 /* num_samples */, - mcs_flags); - - /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: - * - * When MCS buffer is enabled and bound to MSRT, it is required that it - * is cleared prior to any rendering. - * - * Since we don't use the MCS buffer for any purpose other than rendering, - * it makes sense to just clear it immediately upon allocation. - * - * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff. - */ - void *data = intel_miptree_map_raw(brw, mt->mcs_mt); - memset(data, 0xff, mt->mcs_mt->total_height * mt->mcs_mt->pitch); - intel_miptree_unmap_raw(mt->mcs_mt); - mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; + mt->mcs_mt = miptree_create(brw, + mt->target, + format, + mt->first_level, + mt->last_level, + mt->logical_width0, + mt->logical_height0, + mt->logical_depth0, + 0 /* num_samples */, + mcs_flags); + + intel_miptree_init_mcs(brw, mt, 0xFF); return mt->mcs_mt; } @@ -1520,16 +1576,16 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, if (brw->gen >= 8) { layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; } - mt->mcs_mt = intel_miptree_create(brw, - mt->target, - format, - mt->first_level, - mt->last_level, - mcs_width, - mcs_height, - mt->logical_depth0, - 0 /* num_samples */, - layout_flags); + mt->mcs_mt = miptree_create(brw, + mt->target, + format, + mt->first_level, + mt->last_level, + mcs_width, + mcs_height, + mt->logical_depth0, + 0 /* num_samples */, + layout_flags); return mt->mcs_mt; } @@ -1991,8 +2047,17 @@ intel_miptree_all_slices_resolve_depth(struct brw_context *brw, void intel_miptree_resolve_color(struct brw_context *brw, - struct intel_mipmap_tree *mt) + struct intel_mipmap_tree *mt, + int flags) { + /* From gen9 onwards there is new compression scheme for single sampled + * surfaces called "lossless compressed". These don't need to be always + * resolved. + */ + if ((flags & INTEL_MIPTREE_IGNORE_CCS_E) && + intel_miptree_is_lossless_compressed(brw, mt)) + return; + switch (mt->fast_clear_state) { case INTEL_FAST_CLEAR_STATE_NO_MCS: case INTEL_FAST_CLEAR_STATE_RESOLVED: @@ -2001,8 +2066,10 @@ intel_miptree_resolve_color(struct brw_context *brw, case INTEL_FAST_CLEAR_STATE_UNRESOLVED: case INTEL_FAST_CLEAR_STATE_CLEAR: /* Fast color clear resolves only make sense for non-MSAA buffers. */ - if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE) + if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE || + intel_miptree_is_lossless_compressed(brw, mt)) { brw_meta_resolve_color(brw, mt); + } break; } } @@ -2029,7 +2096,7 @@ intel_miptree_make_shareable(struct brw_context *brw, assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE); if (mt->mcs_mt) { - intel_miptree_resolve_color(brw, mt); + intel_miptree_resolve_color(brw, mt, 0); intel_miptree_release(&mt->mcs_mt); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; } @@ -2137,7 +2204,7 @@ intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt) /* CPU accesses to color buffers don't understand fast color clears, so * resolve any pending fast color clears before we map. */ - intel_miptree_resolve_color(brw, mt); + intel_miptree_resolve_color(brw, mt, 0); drm_intel_bo *bo = mt->bo; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 64f73ea9ae5..7cdfb37ff41 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -667,6 +667,18 @@ intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt, unsigned *width_px, unsigned *height); bool +intel_miptree_is_lossless_compressed(const struct brw_context *brw, + const struct intel_mipmap_tree *mt); + +bool +intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw, + unsigned tiling); + +bool +intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, + const struct intel_mipmap_tree *mt); + +bool intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt); @@ -884,9 +896,19 @@ intel_miptree_used_for_rendering(struct intel_mipmap_tree *mt) mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED; } +/** + * Flag values telling color resolve pass which special types of buffers + * can be ignored. + * + * INTEL_MIPTREE_IGNORE_CCS_E: Lossless compressed (single-sample + * compression scheme since gen9) + */ +#define INTEL_MIPTREE_IGNORE_CCS_E (1 << 0) + void intel_miptree_resolve_color(struct brw_context *brw, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + int flags); void intel_miptree_make_shareable(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c index 699e48a297c..54a741395eb 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c @@ -257,7 +257,7 @@ do_blit_bitmap( struct gl_context *ctx, /* The blitter has no idea about fast color clears, so we need to resolve * the miptree before we do anything. */ - intel_miptree_resolve_color(brw, irb->mt); + intel_miptree_resolve_color(brw, irb->mt, 0); /* Chop it all into chunks that can be digested by hardware: */ for (py = 0; py < height; py += DY) { diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index 10d14623fe1..31030b1b4ea 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -155,7 +155,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, /* Since we are going to read raw data to the miptree, we need to resolve * any pending fast color clears before we start. */ - intel_miptree_resolve_color(brw, irb->mt); + intel_miptree_resolve_color(brw, irb->mt, 0); bo = irb->mt->bo; diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 5d32a4ce650..e21c3ac543f 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -423,7 +423,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, /* Since we are going to write raw data to the miptree, we need to resolve * any pending fast color clears before we start. */ - intel_miptree_resolve_color(brw, image->mt); + intel_miptree_resolve_color(brw, image->mt, 0); bo = image->mt->bo; diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index 970ded1e66b..573f701acdd 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -140,7 +140,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, /* Since we are going to write raw data to the miptree, we need to resolve * any pending fast color clears before we start. */ - intel_miptree_resolve_color(brw, image->mt); + intel_miptree_resolve_color(brw, image->mt, 0); bo = image->mt->bo; diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index 2cf9c13be33..3a811e10359 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -24,6 +24,7 @@ #include "main/mtypes.h" #include "main/macros.h" #include "main/samplerobj.h" +#include "main/teximage.h" #include "main/texobj.h" #include "brw_context.h" diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index 08f13178f84..61f703667aa 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -766,7 +766,7 @@ pop_texture_group(struct gl_context *ctx, struct texture_state *texstate) _mesa_set_enable(ctx, GL_TEXTURE_2D, !!(unit->Enabled & TEXTURE_2D_BIT)); _mesa_set_enable(ctx, GL_TEXTURE_3D, !!(unit->Enabled & TEXTURE_3D_BIT)); if (ctx->Extensions.ARB_texture_cube_map) { - _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP_ARB, + _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, !!(unit->Enabled & TEXTURE_CUBE_BIT)); } if (ctx->Extensions.NV_texture_rectangle) { @@ -837,7 +837,7 @@ pop_texture_group(struct gl_context *ctx, struct texture_state *texstate) /* don't restore state for unsupported targets to prevent * raising GL errors. */ - if (obj->Target == GL_TEXTURE_CUBE_MAP_ARB && + if (obj->Target == GL_TEXTURE_CUBE_MAP && !ctx->Extensions.ARB_texture_cube_map) { continue; } diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index de1aba44c1b..9aec42508a7 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -3007,8 +3007,8 @@ set_atomic_buffer_binding(struct gl_context *ctx, _mesa_reference_buffer_object(ctx, &binding->BufferObject, bufObj); if (bufObj == ctx->Shared->NullBufferObj) { - binding->Offset = -1; - binding->Size = -1; + binding->Offset = 0; + binding->Size = 0; } else { binding->Offset = offset; binding->Size = size; diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 83e238ae825..26dafd1b786 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -58,10 +58,7 @@ supported_buffer_bitmask(const struct gl_context *ctx, if (_mesa_is_user_fbo(fb)) { /* A user-created renderbuffer */ - GLuint i; - for (i = 0; i < ctx->Const.MaxColorAttachments; i++) { - mask |= (BUFFER_BIT_COLOR0 << i); - } + mask = ((1 << ctx->Const.MaxColorAttachments) - 1) << BUFFER_COLOR0; } else { /* A window system framebuffer */ @@ -159,6 +156,9 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer) case GL_COLOR_ATTACHMENT7_EXT: return BUFFER_BIT_COLOR7; default: + /* not an error, but also not supported */ + if (buffer >= GL_COLOR_ATTACHMENT8 && buffer <= GL_COLOR_ATTACHMENT31) + return 1 << BUFFER_COUNT; /* error */ return BAD_MASK; } @@ -171,7 +171,7 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer) * renderbuffer (a BUFFER_* value). * return -1 for an invalid buffer. */ -static GLint +static gl_buffer_index read_buffer_enum_to_index(GLenum buffer) { switch (buffer) { @@ -214,6 +214,9 @@ read_buffer_enum_to_index(GLenum buffer) case GL_COLOR_ATTACHMENT7_EXT: return BUFFER_COLOR7; default: + /* not an error, but also not supported */ + if (buffer >= GL_COLOR_ATTACHMENT8 && buffer <= GL_COLOR_ATTACHMENT31) + return BUFFER_COUNT; /* error */ return -1; } @@ -221,7 +224,7 @@ read_buffer_enum_to_index(GLenum buffer) /** - * Called by glDrawBuffer(). + * Called by glDrawBuffer() and glNamedFramebufferDrawBuffer(). * Specify which renderbuffer(s) to draw into for the first color output. * <buffer> can name zero, one, two or four renderbuffers! * \sa _mesa_DrawBuffers @@ -242,9 +245,9 @@ read_buffer_enum_to_index(GLenum buffer) * * See the GL_EXT_framebuffer_object spec for more info. */ -void -_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, const char *caller) +static void +draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller) { GLbitfield destMask; @@ -293,7 +296,7 @@ void GLAPIENTRY _mesa_DrawBuffer(GLenum buffer) { GET_CURRENT_CONTEXT(ctx); - _mesa_draw_buffer(ctx, ctx->DrawBuffer, buffer, "glDrawBuffer"); + draw_buffer(ctx, ctx->DrawBuffer, buffer, "glDrawBuffer"); } @@ -312,22 +315,22 @@ _mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf) else fb = ctx->WinSysDrawBuffer; - _mesa_draw_buffer(ctx, fb, buf, "glNamedFramebufferDrawBuffer"); + draw_buffer(ctx, fb, buf, "glNamedFramebufferDrawBuffer"); } /** - * Called by glDrawBuffersARB; specifies the destination color renderbuffers - * for N fragment program color outputs. + * Called by glDrawBuffersARB() and glNamedFramebufferDrawBuffers() to specify + * the destination color renderbuffers for N fragment program color outputs. * \sa _mesa_DrawBuffer * \param n number of outputs * \param buffers array [n] of renderbuffer names. Unlike glDrawBuffer, the * names cannot specify more than one buffer. For example, * GL_FRONT_AND_BACK is illegal. */ -void -_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, - GLsizei n, const GLenum *buffers, const char *caller) +static void +draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, + GLsizei n, const GLenum *buffers, const char *caller) { GLuint output; GLbitfield usedBufferMask, supportedMask; @@ -502,7 +505,7 @@ void GLAPIENTRY _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) { GET_CURRENT_CONTEXT(ctx); - _mesa_draw_buffers(ctx, ctx->DrawBuffer, n, buffers, "glDrawBuffers"); + draw_buffers(ctx, ctx->DrawBuffer, n, buffers, "glDrawBuffers"); } @@ -522,7 +525,7 @@ _mesa_NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n, else fb = ctx->WinSysDrawBuffer; - _mesa_draw_buffers(ctx, fb, n, bufs, "glNamedFramebufferDrawBuffers"); + draw_buffers(ctx, fb, n, bufs, "glNamedFramebufferDrawBuffers"); } @@ -545,8 +548,8 @@ updated_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb) /** - * Helper function to set the GL_DRAW_BUFFER state in the context and - * current FBO. Called via glDrawBuffer(), glDrawBuffersARB() + * Helper function to set the GL_DRAW_BUFFER state for the given context and + * FBO. Called via glDrawBuffer(), glDrawBuffersARB() * * All error checking will have been done prior to calling this function * so nothing should go wrong at this point. @@ -662,14 +665,17 @@ _mesa_update_draw_buffers(struct gl_context *ctx) /** * Like \sa _mesa_drawbuffers(), this is a helper function for setting - * GL_READ_BUFFER state in the context and current FBO. + * GL_READ_BUFFER state for the given context and FBO. + * Note that all error checking should have been done before calling + * this function. * \param ctx the rendering context + * \param fb the framebuffer object to update * \param buffer GL_FRONT, GL_BACK, GL_COLOR_ATTACHMENT0, etc. * \param bufferIndex the numerical index corresponding to 'buffer' */ void _mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, GLint bufferIndex) + GLenum buffer, gl_buffer_index bufferIndex) { if ((fb == ctx->ReadBuffer) && _mesa_is_winsys_fbo(fb)) { /* Only update the per-context READ_BUFFER state if we're bound to @@ -687,15 +693,16 @@ _mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, /** - * Called by glReadBuffer to set the source renderbuffer for reading pixels. + * Called by glReadBuffer and glNamedFramebufferReadBuffer to set the source + * renderbuffer for reading pixels. * \param mode color buffer such as GL_FRONT, GL_BACK, etc. */ -void -_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, const char *caller) +static void +read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller) { GLbitfield supportedMask; - GLint srcBuffer; + gl_buffer_index srcBuffer; FLUSH_VERTICES(ctx, 0); @@ -740,7 +747,7 @@ void GLAPIENTRY _mesa_ReadBuffer(GLenum buffer) { GET_CURRENT_CONTEXT(ctx); - _mesa_read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer"); + read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer"); } @@ -759,5 +766,5 @@ _mesa_NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src) else fb = ctx->WinSysReadBuffer; - _mesa_read_buffer(ctx, fb, src, "glNamedFramebufferReadBuffer"); + read_buffer(ctx, fb, src, "glNamedFramebufferReadBuffer"); } diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index 5aa79fda54b..9df08154688 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -34,13 +34,11 @@ #include "glheader.h" +#include "mtypes.h" struct gl_context; struct gl_framebuffer; -extern void -_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, const char *caller); extern void GLAPIENTRY _mesa_DrawBuffer( GLenum mode ); @@ -48,10 +46,6 @@ _mesa_DrawBuffer( GLenum mode ); extern void GLAPIENTRY _mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf); -extern void -_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, - GLsizei n, const GLenum *buffers, const char *caller); - extern void GLAPIENTRY _mesa_DrawBuffers(GLsizei n, const GLenum *buffers); @@ -66,16 +60,12 @@ _mesa_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb, extern void _mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, GLint bufferIndex); + GLenum buffer, gl_buffer_index bufferIndex); extern void _mesa_update_draw_buffers(struct gl_context *ctx); -extern void -_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, const char *caller); - extern void GLAPIENTRY _mesa_ReadBuffer( GLenum mode ); diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index 3bfcc5c0e39..92f69ab9b59 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -325,18 +325,6 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value) _mesa_update_state( ctx ); } - /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers' - * of the OpenGL 4.5 spec states: - * - * "An INVALID_ENUM error is generated by ClearBufferiv and - * ClearNamedFramebufferiv if buffer is not COLOR or STENCIL." - */ - if (buffer == GL_DEPTH || buffer == GL_DEPTH_STENCIL) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glClearBufferiv(buffer=GL_DEPTH || GL_DEPTH_STENCIL)"); - return; - } - switch (buffer) { case GL_STENCIL: /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says: @@ -386,26 +374,13 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value) } } break; - case GL_DEPTH: - /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says: - * - * "The result of ClearBuffer is undefined if no conversion between - * the type of the specified value and the type of the buffer being - * cleared is defined (for example, if ClearBufferiv is called for a - * fixed- or floating-point buffer, or if ClearBufferfv is called - * for a signed or unsigned integer buffer). This is not an error." + default: + /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers' + * of the OpenGL 4.5 spec states: * - * In this case we take "undefined" and "not an error" to mean "ignore." - * Note that we still need to generate an error for the invalid - * drawbuffer case (see the GL_STENCIL case above). + * "An INVALID_ENUM error is generated by ClearBufferiv and + * ClearNamedFramebufferiv if buffer is not COLOR or STENCIL." */ - if (drawbuffer != 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferiv(drawbuffer=%d)", - drawbuffer); - return; - } - return; - default: _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferiv(buffer=%s)", _mesa_enum_to_string(buffer)); return; @@ -470,32 +445,13 @@ _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value) } } break; - case GL_DEPTH: - case GL_STENCIL: - /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says: - * - * "The result of ClearBuffer is undefined if no conversion between - * the type of the specified value and the type of the buffer being - * cleared is defined (for example, if ClearBufferiv is called for a - * fixed- or floating-point buffer, or if ClearBufferfv is called - * for a signed or unsigned integer buffer). This is not an error." - * - * In this case we take "undefined" and "not an error" to mean "ignore." - * Even though we could do something sensible for GL_STENCIL, page 263 - * (page 279 of the PDF) says: - * - * "Only ClearBufferiv should be used to clear stencil buffers." + default: + /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers' + * of the OpenGL 4.5 spec states: * - * Note that we still need to generate an error for the invalid - * drawbuffer case (see the GL_STENCIL case in _mesa_ClearBufferiv). + * "An INVALID_ENUM error is generated by ClearBufferuiv and + * ClearNamedFramebufferuiv if buffer is not COLOR." */ - if (drawbuffer != 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferuiv(drawbuffer=%d)", - drawbuffer); - return; - } - return; - default: _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferuiv(buffer=%s)", _mesa_enum_to_string(buffer)); return; @@ -587,26 +543,13 @@ _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value) } } break; - case GL_STENCIL: - /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says: - * - * "The result of ClearBuffer is undefined if no conversion between - * the type of the specified value and the type of the buffer being - * cleared is defined (for example, if ClearBufferiv is called for a - * fixed- or floating-point buffer, or if ClearBufferfv is called - * for a signed or unsigned integer buffer). This is not an error." + default: + /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers' + * of the OpenGL 4.5 spec states: * - * In this case we take "undefined" and "not an error" to mean "ignore." - * Note that we still need to generate an error for the invalid - * drawbuffer case (see the GL_DEPTH case above). + * "An INVALID_ENUM error is generated by ClearBufferfv and + * ClearNamedFramebufferfv if buffer is not COLOR or DEPTH." */ - if (drawbuffer != 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferfv(drawbuffer=%d)", - drawbuffer); - return; - } - return; - default: _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferfv(buffer=%s)", _mesa_enum_to_string(buffer)); return; diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c index 53e7a500f61..b71430f2b12 100644 --- a/src/mesa/main/compute.c +++ b/src/mesa/main/compute.c @@ -41,6 +41,9 @@ _mesa_DispatchCompute(GLuint num_groups_x, if (!_mesa_validate_DispatchCompute(ctx, num_groups)) return; + if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u) + return; + ctx->Driver.DispatchCompute(ctx, num_groups); } diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 2d53e2fa72f..820ae072da6 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -288,17 +288,6 @@ #define PERFQUERY_HAVE_GPA_EXTENDED_COUNTERS 0 /*@}*/ -/** For GL_ARB_compute_shader */ -/*@{*/ -#define MAX_COMPUTE_UNIFORM_BLOCKS 12 -#define MAX_COMPUTE_TEXTURE_IMAGE_UNITS 16 -#define MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 8 -#define MAX_COMPUTE_ATOMIC_COUNTERS 8 -#define MAX_COMPUTE_SHARED_MEMORY_SIZE 32768 -#define MAX_COMPUTE_UNIFORM_COMPONENTS 512 -#define MAX_COMPUTE_IMAGE_UNIFORMS 8 -/*@}*/ - /** For GL_ARB_pipeline_statistics_query */ #define MAX_PIPELINE_STATISTICS 11 diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 9388a1ca51d..26eee28db4e 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1600,9 +1600,6 @@ _mesa_check_init_viewport(struct gl_context *ctx, GLuint width, GLuint height) static void handle_first_current(struct gl_context *ctx) { - GLenum buffer; - GLint bufferIndex; - if (ctx->Version == 0) { /* probably in the process of tearing down the context */ return; @@ -1617,6 +1614,8 @@ handle_first_current(struct gl_context *ctx) * For GLES it is always GL_BACK which has a magic interpretation */ if (!ctx->HasConfig && _mesa_is_desktop_gl(ctx)) { if (ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) { + GLenum buffer; + if (ctx->DrawBuffer->Visual.doubleBufferMode) buffer = GL_BACK; else @@ -1627,6 +1626,9 @@ handle_first_current(struct gl_context *ctx) } if (ctx->ReadBuffer != _mesa_get_incomplete_framebuffer()) { + gl_buffer_index bufferIndex; + GLenum buffer; + if (ctx->ReadBuffer->Visual.doubleBufferMode) { buffer = GL_BACK; bufferIndex = BUFFER_BACK_LEFT; diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 19ef3042548..3f5aa5db051 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -35,6 +35,7 @@ #include "glheader.h" +struct gl_bitmap_atlas; struct gl_buffer_object; struct gl_context; struct gl_display_list; @@ -154,6 +155,14 @@ struct dd_function_table { GLint x, GLint y, GLsizei width, GLsizei height, const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap ); + + /** + * Called by display list code for optimized glCallLists/glBitmap rendering + * The driver must support texture rectangles of width 1024 or more. + */ + void (*DrawAtlasBitmaps)(struct gl_context *ctx, + const struct gl_bitmap_atlas *atlas, + GLuint count, const GLubyte *ids); /*@}*/ diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 0e25efbae72..afd2d83cb59 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -72,6 +72,9 @@ #include "vbo/vbo.h" +#define USE_BITMAP_ATLAS 1 + + /** * Other parts of Mesa (such as the VBO module) can plug into the display @@ -606,6 +609,261 @@ void mesa_print_display_list(GLuint list); /** + * Does the given display list only contain a single glBitmap call? + */ +static bool +is_bitmap_list(const struct gl_display_list *dlist) +{ + const Node *n = dlist->Head; + if (n[0].opcode == OPCODE_BITMAP) { + n += InstSize[OPCODE_BITMAP]; + if (n[0].opcode == OPCODE_END_OF_LIST) + return true; + } + return false; +} + + +/** + * Is the given display list an empty list? + */ +static bool +is_empty_list(const struct gl_display_list *dlist) +{ + const Node *n = dlist->Head; + return n[0].opcode == OPCODE_END_OF_LIST; +} + + +/** + * Delete/free a gl_bitmap_atlas. Called during context tear-down. + */ +void +_mesa_delete_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas *atlas) +{ + if (atlas->texObj) { + ctx->Driver.DeleteTexture(ctx, atlas->texObj); + } + free(atlas->glyphs); +} + + +/** + * Lookup a gl_bitmap_atlas by listBase ID. + */ +static struct gl_bitmap_atlas * +lookup_bitmap_atlas(struct gl_context *ctx, GLuint listBase) +{ + struct gl_bitmap_atlas *atlas; + + assert(listBase > 0); + atlas = _mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase); + return atlas; +} + + +/** + * Create new bitmap atlas and insert into hash table. + */ +static struct gl_bitmap_atlas * +alloc_bitmap_atlas(struct gl_context *ctx, GLuint listBase) +{ + struct gl_bitmap_atlas *atlas; + + assert(listBase > 0); + assert(_mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase) == NULL); + + atlas = calloc(1, sizeof(*atlas)); + if (atlas) { + _mesa_HashInsert(ctx->Shared->BitmapAtlas, listBase, atlas); + } + + return atlas; +} + + +/** + * Try to build a bitmap atlas. This involves examining a sequence of + * display lists which contain glBitmap commands and putting the bitmap + * images into a texture map (the atlas). + * If we succeed, gl_bitmap_atlas::complete will be set to true. + * If we fail, gl_bitmap_atlas::incomplete will be set to true. + */ +static void +build_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas *atlas, + GLuint listBase) +{ + unsigned i, row_height = 0, xpos = 0, ypos = 0; + GLubyte *map; + GLint map_stride; + + assert(atlas); + assert(!atlas->complete); + assert(atlas->numBitmaps > 0); + + /* We use a rectangle texture (non-normalized coords) for the atlas */ + assert(ctx->Extensions.NV_texture_rectangle); + assert(ctx->Const.MaxTextureRectSize >= 1024); + + atlas->texWidth = 1024; + atlas->texHeight = 0; /* determined below */ + + atlas->glyphs = malloc(atlas->numBitmaps * sizeof(atlas->glyphs[0])); + if (!atlas->glyphs) { + /* give up */ + atlas->incomplete = true; + return; + } + + /* Loop over the display lists. They should all contain a single glBitmap + * call. If not, bail out. Also, compute the position and sizes of each + * bitmap in the atlas to determine the texture atlas size. + */ + for (i = 0; i < atlas->numBitmaps; i++) { + const struct gl_display_list *list = _mesa_lookup_list(ctx, listBase + i); + const Node *n; + struct gl_bitmap_glyph *g = &atlas->glyphs[i]; + unsigned bitmap_width, bitmap_height; + float bitmap_xmove, bitmap_ymove, bitmap_xorig, bitmap_yorig; + + if (!list || is_empty_list(list)) { + /* stop here */ + atlas->numBitmaps = i; + break; + } + + if (!is_bitmap_list(list)) { + /* This list does not contain exactly one glBitmap command. Give up. */ + atlas->incomplete = true; + return; + } + + /* get bitmap info from the display list command */ + n = list->Head; + assert(n[0].opcode == OPCODE_BITMAP); + bitmap_width = n[1].i; + bitmap_height = n[2].i; + bitmap_xorig = n[3].f; + bitmap_yorig = n[4].f; + bitmap_xmove = n[5].f; + bitmap_ymove = n[6].f; + + if (xpos + bitmap_width > atlas->texWidth) { + /* advance to the next row of the texture */ + xpos = 0; + ypos += row_height; + row_height = 0; + } + + /* save the bitmap's position in the atlas */ + g->x = xpos; + g->y = ypos; + g->w = bitmap_width; + g->h = bitmap_height; + g->xorig = bitmap_xorig; + g->yorig = bitmap_yorig; + g->xmove = bitmap_xmove; + g->ymove = bitmap_ymove; + + xpos += bitmap_width; + + /* keep track of tallest bitmap in the row */ + row_height = MAX2(row_height, bitmap_height); + } + + /* Now we know the texture height */ + atlas->texHeight = ypos + row_height; + + if (atlas->texHeight == 0) { + /* no glyphs found, give up */ + goto fail; + } + else if (atlas->texHeight > ctx->Const.MaxTextureRectSize) { + /* too large, give up */ + goto fail; + } + + /* Create atlas texture (texture ID is irrelevant) */ + atlas->texObj = ctx->Driver.NewTextureObject(ctx, 999, GL_TEXTURE_RECTANGLE); + if (!atlas->texObj) { + goto out_of_memory; + } + + atlas->texObj->Sampler.MinFilter = GL_NEAREST; + atlas->texObj->Sampler.MagFilter = GL_NEAREST; + atlas->texObj->MaxLevel = 0; + atlas->texObj->Immutable = GL_TRUE; + + atlas->texImage = _mesa_get_tex_image(ctx, atlas->texObj, + GL_TEXTURE_RECTANGLE, 0); + if (!atlas->texImage) { + goto out_of_memory; + } + + _mesa_init_teximage_fields(ctx, atlas->texImage, + atlas->texWidth, atlas->texHeight, 1, 0, + GL_ALPHA, MESA_FORMAT_A_UNORM8); + + /* alloc image storage */ + if (!ctx->Driver.AllocTextureImageBuffer(ctx, atlas->texImage)) { + goto out_of_memory; + } + + /* map teximage, load with bitmap glyphs */ + ctx->Driver.MapTextureImage(ctx, atlas->texImage, 0, + 0, 0, atlas->texWidth, atlas->texHeight, + GL_MAP_WRITE_BIT, &map, &map_stride); + if (!map) { + goto out_of_memory; + } + + /* Background/clear pixels are 0xff, foreground/set pixels are 0x0 */ + memset(map, 0xff, map_stride * atlas->texHeight); + + for (i = 0; i < atlas->numBitmaps; i++) { + const struct gl_display_list *list = _mesa_lookup_list(ctx, listBase + i); + const Node *n = list->Head; + + assert(n[0].opcode == OPCODE_BITMAP || + n[0].opcode == OPCODE_END_OF_LIST); + + if (n[0].opcode == OPCODE_BITMAP) { + unsigned bitmap_width = n[1].i; + unsigned bitmap_height = n[2].i; + unsigned xpos = atlas->glyphs[i].x; + unsigned ypos = atlas->glyphs[i].y; + const void *bitmap_image = get_pointer(&n[7]); + + assert(atlas->glyphs[i].w == bitmap_width); + assert(atlas->glyphs[i].h == bitmap_height); + + /* put the bitmap image into the texture image */ + _mesa_expand_bitmap(bitmap_width, bitmap_height, + &ctx->DefaultPacking, bitmap_image, + map + map_stride * ypos + xpos, /* dest addr */ + map_stride, 0x0); + } + } + + ctx->Driver.UnmapTextureImage(ctx, atlas->texImage, 0); + + atlas->complete = true; + + return; + +out_of_memory: + _mesa_error(ctx, GL_OUT_OF_MEMORY, "Display list bitmap atlas"); +fail: + if (atlas->texObj) { + ctx->Driver.DeleteTexture(ctx, atlas->texObj); + } + free(atlas->glyphs); + atlas->glyphs = NULL; + atlas->incomplete = true; +} + + +/** * Allocate a gl_display_list object with an initial block of storage. * \param count how many display list nodes/tokens to allocate */ @@ -856,6 +1114,30 @@ _mesa_delete_list(struct gl_context *ctx, struct gl_display_list *dlist) /** + * Called by _mesa_HashWalk() to check if a display list which is being + * deleted belongs to a bitmap texture atlas. + */ +static void +check_atlas_for_deleted_list(GLuint atlas_id, void *data, void *userData) +{ + struct gl_bitmap_atlas *atlas = (struct gl_bitmap_atlas *) data; + GLuint list_id = *((GLuint *) userData); /* the list being deleted */ + + /* See if the list_id falls in the range contained in this texture atlas */ + if (atlas->complete && + list_id >= atlas_id && + list_id < atlas_id + atlas->numBitmaps) { + /* Mark the atlas as incomplete so it doesn't get used. But don't + * delete it yet since we don't want to try to recreate it in the next + * glCallLists. + */ + atlas->complete = false; + atlas->incomplete = true; + } +} + + +/** * Destroy a display list and remove from hash table. * \param list - display list number */ @@ -871,6 +1153,16 @@ destroy_list(struct gl_context *ctx, GLuint list) if (!dlist) return; + if (is_bitmap_list(dlist)) { + /* If we're destroying a simple glBitmap display list, there's a + * chance that we're destroying a bitmap image that's in a texture + * atlas. Examine all atlases to see if that's the case. There's + * usually few (if any) atlases so this isn't expensive. + */ + _mesa_HashWalk(ctx->Shared->BitmapAtlas, + check_atlas_for_deleted_list, &list); + } + _mesa_delete_list(ctx, dlist); _mesa_HashRemove(ctx->Shared->DisplayList, list); } @@ -8895,6 +9187,18 @@ _mesa_DeleteLists(GLuint list, GLsizei range) _mesa_error(ctx, GL_INVALID_VALUE, "glDeleteLists"); return; } + + if (range > 1) { + /* We may be deleting a set of bitmap lists. See if there's a + * bitmap atlas to free. + */ + struct gl_bitmap_atlas *atlas = lookup_bitmap_atlas(ctx, list); + if (atlas) { + _mesa_delete_bitmap_atlas(ctx, atlas); + _mesa_HashRemove(ctx->Shared->BitmapAtlas, list); + } + } + for (i = list; i < list + range; i++) { destroy_list(ctx, i); } @@ -8936,6 +9240,24 @@ _mesa_GenLists(GLsizei range) } } + if (USE_BITMAP_ATLAS && + range > 16 && + ctx->Driver.DrawAtlasBitmaps) { + /* "range > 16" is a rough heuristic to guess when glGenLists might be + * used to allocate display lists for glXUseXFont or wglUseFontBitmaps. + * Create the empty atlas now. + */ + struct gl_bitmap_atlas *atlas = lookup_bitmap_atlas(ctx, base); + if (!atlas) { + atlas = alloc_bitmap_atlas(ctx, base); + } + if (atlas) { + /* Atlas _should_ be new/empty now, but clobbering is OK */ + assert(atlas->numBitmaps == 0); + atlas->numBitmaps = range; + } + } + mtx_unlock(&ctx->Shared->Mutex); return base; @@ -9085,6 +9407,65 @@ _mesa_CallList(GLuint list) /** + * Try to execute a glCallLists() command where the display lists contain + * glBitmap commands with a texture atlas. + * \return true for success, false otherwise + */ +static bool +render_bitmap_atlas(struct gl_context *ctx, GLsizei n, GLenum type, + const void *lists) +{ + struct gl_bitmap_atlas *atlas; + int i; + + if (!USE_BITMAP_ATLAS || + !ctx->Current.RasterPosValid || + ctx->List.ListBase == 0 || + type != GL_UNSIGNED_BYTE || + !ctx->Driver.DrawAtlasBitmaps) { + /* unsupported */ + return false; + } + + atlas = lookup_bitmap_atlas(ctx, ctx->List.ListBase); + + if (!atlas) { + /* Even if glGenLists wasn't called, we can still try to create + * the atlas now. + */ + atlas = alloc_bitmap_atlas(ctx, ctx->List.ListBase); + } + + if (atlas && !atlas->complete && !atlas->incomplete) { + /* Try to build the bitmap atlas now. + * If the atlas was created in glGenLists, we'll have recorded the + * number of lists (bitmaps). Otherwise, take a guess at 256. + */ + if (atlas->numBitmaps == 0) + atlas->numBitmaps = 256; + build_bitmap_atlas(ctx, atlas, ctx->List.ListBase); + } + + if (!atlas || !atlas->complete) { + return false; + } + + /* check that all display list IDs are in the atlas */ + for (i = 0; i < n; i++) { + const GLubyte *ids = (const GLubyte *) lists; + + if (ids[i] >= atlas->numBitmaps) { + return false; + } + } + + ctx->Driver.DrawAtlasBitmaps(ctx, atlas, n, (const GLubyte *) lists); + + return true; +} + + +/** * Execute glCallLists: call multiple display lists. */ void GLAPIENTRY @@ -9123,6 +9504,10 @@ _mesa_CallLists(GLsizei n, GLenum type, const GLvoid * lists) return; } + if (render_bitmap_atlas(ctx, n, type, lists)) { + return; + } + /* Save the CompileFlag status, turn it off, execute display list, * and restore the CompileFlag. */ diff --git a/src/mesa/main/dlist.h b/src/mesa/main/dlist.h index 7a23208ba5a..22b696f50c1 100644 --- a/src/mesa/main/dlist.h +++ b/src/mesa/main/dlist.h @@ -36,6 +36,44 @@ #include "main/mtypes.h" +/** + * Describes the location and size of a glBitmap image in a texture atlas. + */ +struct gl_bitmap_glyph +{ + unsigned short x, y, w, h; /**< position and size in the texture */ + float xorig, yorig; /**< bitmap origin */ + float xmove, ymove; /**< rasterpos move */ +}; + + +/** + * Describes a set of glBitmap display lists which live in a texture atlas. + * The idea is when we see a code sequence of glListBase(b), glCallLists(n) + * we're probably drawing bitmap font glyphs. We try to put all the bitmap + * glyphs into one texture map then render the glCallLists as a textured + * quadstrip. + */ +struct gl_bitmap_atlas +{ + bool complete; /**< Is the atlas ready to use? */ + bool incomplete; /**< Did we fail to construct this atlas? */ + + unsigned numBitmaps; + unsigned texWidth, texHeight; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + + unsigned glyphHeight; + + struct gl_bitmap_glyph *glyphs; +}; + +void +_mesa_delete_bitmap_atlas(struct gl_context *ctx, + struct gl_bitmap_atlas *atlas); + + GLboolean GLAPIENTRY _mesa_IsList(GLuint list); diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index 3fd3c2747ea..3985457f21a 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -749,7 +749,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) return; /* GL_ARB_texture_cube_map */ - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP: if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES) goto invalid_enum_error; CHECK_EXTENSION(ARB_texture_cube_map, cap); @@ -1450,7 +1450,7 @@ _mesa_IsEnabled( GLenum cap ) return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled; /* GL_ARB_texture_cube_map */ - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP: CHECK_EXTENSION(ARB_texture_cube_map); return is_texture_enabled(ctx, TEXTURE_CUBE_BIT); diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index d1e3a99fdc0..68f36178f32 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -224,6 +224,7 @@ EXT(EXT_subtexture , dummy_true EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture3D , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture_array , EXT_texture_array , GLL, GLC, x , x , 2006) +EXT(EXT_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014) EXT(EXT_texture_compression_dxt1 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2004) EXT(EXT_texture_compression_latc , EXT_texture_compression_latc , GLL, x , x , x , 2006) EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004) @@ -325,6 +326,7 @@ EXT(OES_point_sprite , ARB_point_sprite EXT(OES_query_matrix , dummy_true , x , x , ES1, x , 2003) EXT(OES_read_format , dummy_true , GLL, GLC, ES1, x , 2003) EXT(OES_rgb8_rgba8 , dummy_true , x , x , ES1, ES2, 2005) +EXT(OES_shader_image_atomic , ARB_shader_image_load_store , x , x , x , 31, 2015) EXT(OES_single_precision , dummy_true , x , x , ES1, x , 2003) EXT(OES_standard_derivatives , OES_standard_derivatives , x , x , x , ES2, 2005) EXT(OES_stencil1 , dummy_false , x , x , x , x , 2005) @@ -333,6 +335,7 @@ EXT(OES_stencil8 , dummy_true EXT(OES_stencil_wrap , dummy_true , x , x , ES1, x , 2002) EXT(OES_surfaceless_context , dummy_true , x , x , ES1, ES2, 2012) EXT(OES_texture_3D , dummy_true , x , x , x , ES2, 2005) +EXT(OES_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014) EXT(OES_texture_cube_map , ARB_texture_cube_map , x , x , ES1, x , 2007) EXT(OES_texture_env_crossbar , ARB_texture_env_crossbar , x , x , ES1, x , 2005) EXT(OES_texture_float , OES_texture_float , x , x , x , ES2, 2005) @@ -341,6 +344,7 @@ EXT(OES_texture_half_float , OES_texture_half_float EXT(OES_texture_half_float_linear , OES_texture_half_float_linear , x , x , x , ES2, 2005) EXT(OES_texture_mirrored_repeat , dummy_true , x , x , ES1, x , 2005) EXT(OES_texture_npot , ARB_texture_non_power_of_two , x , x , ES1, ES2, 2005) +EXT(OES_texture_stencil8 , ARB_texture_stencil8 , x , x , x , 30, 2014) EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014) EXT(OES_vertex_array_object , dummy_true , x , x , ES1, ES2, 2010) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 1b9b692f001..1f10050c891 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -811,7 +811,7 @@ test_attachment_completeness(const struct gl_context *ctx, GLenum format, break; } - baseFormat = _mesa_get_format_base_format(texImage->TexFormat); + baseFormat = texImage->_BaseFormat; if (format == GL_COLOR) { if (!_mesa_is_legal_color_format(ctx, baseFormat)) { @@ -868,8 +868,7 @@ test_attachment_completeness(const struct gl_context *ctx, GLenum format, } } else if (att->Type == GL_RENDERBUFFER_EXT) { - const GLenum baseFormat = - _mesa_get_format_base_format(att->Renderbuffer->Format); + const GLenum baseFormat = att->Renderbuffer->_BaseFormat; assert(att->Renderbuffer); if (!att->Renderbuffer->InternalFormat || diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c index 5fdabd5b97f..d16d69c3795 100644 --- a/src/mesa/main/format_utils.c +++ b/src/mesa/main/format_utils.c @@ -179,6 +179,63 @@ _mesa_compute_rgba2base2rgba_component_mapping(GLenum baseFormat, uint8_t *map) } } + +/** + * Special case conversion function to swap r/b channels from the source + * image to the dest image. + */ +static void +convert_ubyte_rgba_to_bgra(size_t width, size_t height, + const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride) +{ + int row; + + if (sizeof(void *) == 8 && + src_stride % 8 == 0 && + dst_stride % 8 == 0 && + (GLsizeiptr) src % 8 == 0 && + (GLsizeiptr) dst % 8 == 0) { + /* use 64-bit word to swizzle two 32-bit pixels. We need 8-byte + * alignment for src/dst addresses and strides. + */ + for (row = 0; row < height; row++) { + const GLuint64 *s = (const GLuint64 *) src; + GLuint64 *d = (GLuint64 *) dst; + int i; + for (i = 0; i < width/2; i++) { + d[i] = ( (s[i] & 0xff00ff00ff00ff00) | + ((s[i] & 0xff000000ff) << 16) | + ((s[i] & 0xff000000ff0000) >> 16)); + } + if (width & 1) { + /* handle the case of odd widths */ + const GLuint s = ((const GLuint *) src)[width - 1]; + GLuint *d = (GLuint *) dst + width - 1; + *d = ( (s & 0xff00ff00) | + ((s & 0xff) << 16) | + ((s & 0xff0000) >> 16)); + } + src += src_stride; + dst += dst_stride; + } + } else { + for (row = 0; row < height; row++) { + const GLuint *s = (const GLuint *) src; + GLuint *d = (GLuint *) dst; + int i; + for (i = 0; i < width; i++) { + d[i] = ( (s[i] & 0xff00ff00) | + ((s[i] & 0xff) << 16) | + ((s[i] & 0xff0000) >> 16)); + } + src += src_stride; + dst += dst_stride; + } + } +} + + /** * This can be used to convert between most color formats. * @@ -299,11 +356,18 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride, return; } else if (src_array_format == RGBA8_UBYTE) { assert(!_mesa_is_format_integer_color(dst_format)); - for (row = 0; row < height; ++row) { - _mesa_pack_ubyte_rgba_row(dst_format, width, - (const uint8_t (*)[4])src, dst); - src += src_stride; - dst += dst_stride; + + if (dst_format == MESA_FORMAT_B8G8R8A8_UNORM) { + convert_ubyte_rgba_to_bgra(width, height, src, src_stride, + dst, dst_stride); + } + else { + for (row = 0; row < height; ++row) { + _mesa_pack_ubyte_rgba_row(dst_format, width, + (const uint8_t (*)[4])src, dst); + src += src_stride; + dst += dst_stride; + } } return; } else if (src_array_format == RGBA32_UINT && diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c index 85f7b6b5664..816f12bf9e2 100644 --- a/src/mesa/main/formatquery.c +++ b/src/mesa/main/formatquery.c @@ -131,11 +131,14 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, internalformat, buffer); break; case GL_NUM_SAMPLE_COUNTS: { - if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalformat)) { + if ((ctx->API == API_OPENGLES2 && ctx->Version == 30) && + _mesa_is_enum_format_integer(internalformat)) { /* From GL ES 3.0 specification, section 6.1.15 page 236: "Since * multisampling is not supported for signed and unsigned integer * internal formats, the value of NUM_SAMPLE_COUNTS will be zero * for such formats. + * + * Such a restriction no longer exists in GL ES 3.1. */ buffer[0] = 0; count = 1; diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c index 4ec8385ec2f..6c2d31dbcf3 100644 --- a/src/mesa/main/genmipmap.c +++ b/src/mesa/main/genmipmap.c @@ -123,7 +123,7 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx, GLuint face; for (face = 0; face < 6; face++) { ctx->Driver.GenerateMipmap(ctx, - GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB + face, texObj); + GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, texObj); } } else { diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 9005dc5897d..f40c5705813 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -642,7 +642,7 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_3D: - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_RECTANGLE_NV: case GL_TEXTURE_EXTERNAL_OES: v->value_bool = _mesa_IsEnabled(d->pname); diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 164095c103c..07d2d20df7a 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -455,13 +455,13 @@ descriptor=[ # GL_ARB_compute_shader / GLES 3.1 [ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONST(MAX_COMPUTE_UNIFORM_BLOCKS), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONST(MAX_COMPUTE_TEXTURE_IMAGE_UNITS), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTERS), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxUniformBlocks), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONTEXT_INT(Const.MaxComputeSharedMemorySize), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms), extra_ARB_compute_shader_es31" ], [ "DISPATCH_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_compute_shader_es31" ], [ "MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents), extra_ARB_compute_shader_es31" ], diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index f5284447b6e..987cd0db45c 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -3153,6 +3153,14 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, } break; + case GL_STENCIL_INDEX: + if (!_mesa_has_OES_texture_stencil8(ctx) || + type != GL_UNSIGNED_BYTE || + internalFormat != GL_STENCIL_INDEX8) { + return GL_INVALID_OPERATION; + } + break; + case GL_ALPHA: case GL_LUMINANCE: case GL_LUMINANCE_ALPHA: diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index 99f253cd373..4d6ab6f2b56 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -408,9 +408,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height, const GLint srcStride = _mesa_image_row_stride(unpack, width, GL_COLOR_INDEX, GL_BITMAP); GLint row, col; - -#define SET_PIXEL(COL, ROW) \ - destBuffer[(ROW) * destStride + (COL)] = onValue; + GLubyte *dstRow = destBuffer; for (row = 0; row < height; row++) { const GLubyte *src = srcRow; @@ -421,7 +419,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height, for (col = 0; col < width; col++) { if (*src & mask) { - SET_PIXEL(col, row); + dstRow[col] = onValue; } if (mask == 128U) { @@ -443,7 +441,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height, for (col = 0; col < width; col++) { if (*src & mask) { - SET_PIXEL(col, row); + dstRow[col] = onValue; } if (mask == 1U) { @@ -461,9 +459,8 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height, } srcRow += srcStride; + dstRow += destStride; } /* row */ - -#undef SET_PIXEL } diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 230ebbc67f4..14cd58870f7 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -89,7 +89,7 @@ _mesa_align_malloc(size_t bytes, unsigned long alignment) if (err) return NULL; return mem; -#elif defined(_WIN32) && defined(_MSC_VER) +#elif defined(_WIN32) return _aligned_malloc(bytes, alignment); #else uintptr_t ptr, buf; @@ -131,7 +131,7 @@ _mesa_align_calloc(size_t bytes, unsigned long alignment) } return mem; -#elif defined(_WIN32) && defined(_MSC_VER) +#elif defined(_WIN32) void *mem; mem = _aligned_malloc(bytes, alignment); @@ -178,7 +178,7 @@ _mesa_align_free(void *ptr) { #if defined(HAVE_POSIX_MEMALIGN) free(ptr); -#elif defined(_WIN32) && defined(_MSC_VER) +#elif defined(_WIN32) _aligned_free(ptr); #else if (ptr) { @@ -196,7 +196,7 @@ void * _mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize, unsigned long alignment) { -#if defined(_WIN32) && defined(_MSC_VER) +#if defined(_WIN32) (void) oldSize; return _aligned_realloc(oldBuffer, newSize, alignment); #else diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index ad7af5c1d8c..d96d666e15f 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -83,9 +83,6 @@ typedef union { GLfloat f; GLint i; GLuint u; } fi_type; #if defined(_MSC_VER) -#if _MSC_VER < 1800 /* Not req'd on VS2013 and above */ -#define strtoll(p, e, b) _strtoi64(p, e, b) -#endif /* _MSC_VER < 1800 */ #define strcasecmp(s1, s2) _stricmp(s1, s2) #endif /*@}*/ diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 50469956c6e..5a02780b960 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -1715,12 +1715,12 @@ _mesa_generate_mipmap_level(GLenum target, dstWidth, dstData[0]); break; case GL_TEXTURE_2D: - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: make_2d_mipmap(datatype, comps, border, srcWidth, srcHeight, srcData[0], srcRowStride, dstWidth, dstHeight, dstData[0], dstRowStride); @@ -1838,12 +1838,7 @@ _mesa_prepare_mipmap_level(struct gl_context *ctx, for (face = 0; face < numFaces; face++) { struct gl_texture_image *dstImage; - GLenum target; - - if (numFaces == 1) - target = texObj->Target; - else - target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face; + const GLenum target = _mesa_cube_face_target(texObj->Target, face); dstImage = _mesa_get_tex_image(ctx, texObj, target, level); if (!dstImage) { @@ -2024,7 +2019,7 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target, /* only two types of compressed textures at this time */ assert(texObj->Target == GL_TEXTURE_2D || texObj->Target == GL_TEXTURE_2D_ARRAY || - texObj->Target == GL_TEXTURE_CUBE_MAP_ARB || + texObj->Target == GL_TEXTURE_CUBE_MAP || texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY); /* diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index a66b56c62bf..2ca9cbf808a 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -44,6 +44,7 @@ #include "math/m_matrix.h" /* GLmatrix */ #include "compiler/shader_enums.h" #include "main/formats.h" /* MESA_FORMAT_COUNT */ +#include "compiler/glsl/list.h" #ifdef __cplusplus @@ -1872,6 +1873,8 @@ typedef enum PROGRAM_UNDEFINED, /**< Invalid/TBD value */ PROGRAM_IMMEDIATE, /**< Immediate value, used by TGSI */ PROGRAM_BUFFER, /**< for shader buffers, compile-time only */ + PROGRAM_MEMORY, /**< for shared, global and local memory */ + PROGRAM_IMAGE, /**< for shader images, compile-time only */ PROGRAM_FILE_MAX } gl_register_file; @@ -2044,6 +2047,11 @@ struct gl_compute_program * Size specified using local_size_{x,y,z}. */ unsigned LocalSize[3]; + + /** + * Size of shared variables accessed by the compute shader. + */ + unsigned SharedSize; }; @@ -2769,6 +2777,13 @@ struct gl_shader_program struct gl_uniform_storage **UniformRemapTable; /** + * Sometimes there are empty slots left over in UniformRemapTable after we + * allocate slots to explicit locations. This list stores the blocks of + * continuous empty slots inside UniformRemapTable. + */ + struct exec_list EmptyUniformLocations; + + /** * Size of the gl_ClipDistance array that is output from the last pipeline * stage before the fragment shader. */ @@ -3044,6 +3059,7 @@ struct gl_shared_state mtx_t Mutex; /**< for thread safety */ GLint RefCount; /**< Reference count */ struct _mesa_HashTable *DisplayList; /**< Display lists hash table */ + struct _mesa_HashTable *BitmapAtlas; /**< For optimized glBitmap text */ struct _mesa_HashTable *TexObjects; /**< Texture objects hash table */ /** Default texture objects (shared by all texture units) */ @@ -3727,6 +3743,7 @@ struct gl_constants GLuint MaxComputeWorkGroupCount[3]; /* Array of x, y, z dimensions */ GLuint MaxComputeWorkGroupSize[3]; /* Array of x, y, z dimensions */ GLuint MaxComputeWorkGroupInvocations; + GLuint MaxComputeSharedMemorySize; /** GL_ARB_gpu_shader5 */ GLfloat MinFragmentInterpolationOffset; diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 0e4a0af0b0f..af17be2a3f4 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -964,8 +964,5 @@ _mesa_GetProgramPipelineInfoLog(GLuint pipeline, GLsizei bufSize, return; } - if (pipe->InfoLog) - _mesa_copy_string(infoLog, bufSize, length, pipe->InfoLog); - else - *length = 0; + _mesa_copy_string(infoLog, bufSize, length, pipe->InfoLog); } diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c index 50659b0cd21..0d9f8aecf08 100644 --- a/src/mesa/main/program_resource.c +++ b/src/mesa/main/program_resource.c @@ -357,10 +357,6 @@ _mesa_GetProgramResourceiv(GLuint program, GLenum programInterface, return; } - /* No need to write any properties, user requested none. */ - if (bufSize == 0) - return; - _mesa_get_program_resourceiv(shProg, programInterface, index, propCount, props, bufSize, length, params); } diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 470182ab23d..882d863c1c7 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -582,7 +582,7 @@ read_rgba_pixels( struct gl_context *ctx, void *luminance; uint32_t luminance_format; - luminance_stride = width * sizeof(GL_FLOAT); + luminance_stride = width * sizeof(GLfloat); if (format == GL_LUMINANCE_ALPHA) luminance_stride *= 2; luminance_bytes = height * luminance_stride; diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index fe15508696e..ca366d967ab 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -1518,7 +1518,8 @@ _mesa_GetSamplerParameterIiv(GLuint sampler, GLenum pname, GLint *params) sampObj = _mesa_lookup_samplerobj(ctx, sampler); if (!sampObj) { - _mesa_error(ctx, GL_INVALID_VALUE, + _mesa_error(ctx, (_mesa_is_gles(ctx) ? + GL_INVALID_OPERATION : GL_INVALID_VALUE), "glGetSamplerParameterIiv(sampler %u)", sampler); return; @@ -1593,7 +1594,8 @@ _mesa_GetSamplerParameterIuiv(GLuint sampler, GLenum pname, GLuint *params) sampObj = _mesa_lookup_samplerobj(ctx, sampler); if (!sampObj) { - _mesa_error(ctx, GL_INVALID_VALUE, + _mesa_error(ctx, (_mesa_is_gles(ctx) ? + GL_INVALID_OPERATION : GL_INVALID_VALUE), "glGetSamplerParameterIuiv(sampler %u)", sampler); return; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index df92c0912af..cdf15b48a0d 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -2124,6 +2124,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type, int i; for (i = 0; i < 3; i++) dst_cp->LocalSize[i] = src->Comp.LocalSize[i]; + dst_cp->SharedSize = src->Comp.SharedSize; break; } default: diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index b9f7bb65fb6..49e5f028045 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -65,6 +65,7 @@ _mesa_alloc_shared_state(struct gl_context *ctx) mtx_init(&shared->Mutex, mtx_plain); shared->DisplayList = _mesa_NewHashTable(); + shared->BitmapAtlas = _mesa_NewHashTable(); shared->TexObjects = _mesa_NewHashTable(); shared->Programs = _mesa_NewHashTable(); @@ -144,6 +145,18 @@ delete_displaylist_cb(GLuint id, void *data, void *userData) /** + * Callback for deleting a bitmap atlas. Called by _mesa_HashDeleteAll(). + */ +static void +delete_bitmap_atlas_cb(GLuint id, void *data, void *userData) +{ + struct gl_bitmap_atlas *atlas = (struct gl_bitmap_atlas *) data; + struct gl_context *ctx = (struct gl_context *) userData; + _mesa_delete_bitmap_atlas(ctx, atlas); +} + + +/** * Callback for deleting a texture object. Called by _mesa_HashDeleteAll(). */ static void @@ -309,6 +322,8 @@ free_shared_state(struct gl_context *ctx, struct gl_shared_state *shared) */ _mesa_HashDeleteAll(shared->DisplayList, delete_displaylist_cb, ctx); _mesa_DeleteHashTable(shared->DisplayList); + _mesa_HashDeleteAll(shared->BitmapAtlas, delete_bitmap_atlas_cb, ctx); + _mesa_DeleteHashTable(shared->BitmapAtlas); _mesa_HashWalk(shared->ShaderObjects, free_shader_program_data_cb, ctx); _mesa_HashDeleteAll(shared->ShaderObjects, delete_shader_cb, ctx); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index e6412962251..24e3d189091 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2436,6 +2436,16 @@ const struct function gles3_functions_possible[] = { { "glGetFragDataIndexEXT", 30, -1 }, { "glBindFragDataLocationEXT", 30, -1 }, + /* GL_OES_texture_border_clamp */ + { "glTexParameterIivOES", 30, -1 }, + { "glTexParameterIuivOES", 30, -1 }, + { "glGetTexParameterIivOES", 30, -1 }, + { "glGetTexParameterIuivOES", 30, -1 }, + { "glSamplerParameterIivOES", 30, -1 }, + { "glSamplerParameterIuivOES", 30, -1 }, + { "glGetSamplerParameterIivOES", 30, -1 }, + { "glGetSamplerParameterIuivOES", 30, -1 }, + { NULL, 0, -1 } }; diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index b273aaac2a1..06bc8f1ba15 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -869,12 +869,12 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa) * the targets from table 8.19 (for GetTexImage and GetnTexImage *only*), * or TEXTURE_CUBE_MAP (for GetTextureImage *only*)." (Emphasis added.) */ - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: return dsa ? GL_FALSE : ctx->Extensions.ARB_texture_cube_map; case GL_TEXTURE_CUBE_MAP: return dsa ? GL_TRUE : GL_FALSE; @@ -886,7 +886,7 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa) /** * Wrapper for _mesa_select_tex_image() which can handle target being - * GL_TEXTURE_CUBE_MAP_ARB in which case we use zoffset to select a cube face. + * GL_TEXTURE_CUBE_MAP in which case we use zoffset to select a cube face. * This can happen for glGetTextureImage and glGetTextureSubImage (DSA * functions). */ diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 50141be8693..8a4c6286cbe 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -116,20 +116,6 @@ adjust_for_oes_float_texture(GLenum format, GLenum type) return format; } -/** - * For cube map faces, return a face index in [0,5]. - * For other targets return 0; - */ -GLuint -_mesa_tex_target_to_face(GLenum target) -{ - if (_mesa_is_cube_face(target)) - return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - else - return 0; -} - - /** * Install gl_texture_image in a gl_texture_object according to the target @@ -273,15 +259,15 @@ proxy_target(GLenum target) case GL_TEXTURE_3D: case GL_PROXY_TEXTURE_3D: return GL_PROXY_TEXTURE_3D; - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_ARB: - case GL_PROXY_TEXTURE_CUBE_MAP_ARB: - return GL_PROXY_TEXTURE_CUBE_MAP_ARB; + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + case GL_TEXTURE_CUBE_MAP: + case GL_PROXY_TEXTURE_CUBE_MAP: + return GL_PROXY_TEXTURE_CUBE_MAP; case GL_TEXTURE_RECTANGLE_NV: case GL_PROXY_TEXTURE_RECTANGLE_NV: return GL_PROXY_TEXTURE_RECTANGLE_NV; @@ -472,13 +458,13 @@ _mesa_max_texture_levels(struct gl_context *ctx, GLenum target) case GL_PROXY_TEXTURE_3D: return ctx->Const.Max3DTextureLevels; case GL_TEXTURE_CUBE_MAP: - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - case GL_PROXY_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + case GL_PROXY_TEXTURE_CUBE_MAP: return ctx->Extensions.ARB_texture_cube_map ? ctx->Const.MaxCubeTextureLevels : 0; case GL_TEXTURE_RECTANGLE_NV: @@ -1016,7 +1002,7 @@ _mesa_legal_texture_dimensions(struct gl_context *ctx, GLenum target, case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - case GL_PROXY_TEXTURE_CUBE_MAP_ARB: + case GL_PROXY_TEXTURE_CUBE_MAP: maxSize = 1 << (ctx->Const.MaxCubeTextureLevels - 1); maxSize >>= level; if (width != height) @@ -2299,8 +2285,10 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, } if (baseFormat == GL_DEPTH_COMPONENT || baseFormat == GL_DEPTH_STENCIL || + baseFormat == GL_STENCIL_INDEX || rb_base_format == GL_DEPTH_COMPONENT || rb_base_format == GL_DEPTH_STENCIL || + rb_base_format == GL_STENCIL_INDEX || ((baseFormat == GL_LUMINANCE_ALPHA || baseFormat == GL_ALPHA) && rb_base_format != GL_RGBA) || diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h index 5df36c59a28..17f2c908ecc 100644 --- a/src/mesa/main/teximage.h +++ b/src/mesa/main/teximage.h @@ -43,10 +43,63 @@ extern "C" { static inline GLboolean _mesa_is_cube_face(GLenum target) { - return (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && - target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB); + return (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X && + target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z); } + +/** + * Return number of faces for a texture target. This will be 6 for + * cube maps and 1 otherwise. + * NOTE: this function is not used for cube map arrays which operate + * more like 2D arrays than cube maps. + */ +static inline GLuint +_mesa_num_tex_faces(GLenum target) +{ + switch (target) { + case GL_TEXTURE_CUBE_MAP: + case GL_PROXY_TEXTURE_CUBE_MAP: + return 6; + default: + return 1; + } +} + + +/** + * If the target is GL_TEXTURE_CUBE_MAP, return one of the + * GL_TEXTURE_CUBE_MAP_POSITIVE/NEGATIVE_X/Y/Z targets corresponding to + * the face parameter. + * Else, return target as-is. + */ +static inline GLenum +_mesa_cube_face_target(GLenum target, unsigned face) +{ + if (target == GL_TEXTURE_CUBE_MAP) { + assert(face < 6); + return GL_TEXTURE_CUBE_MAP_POSITIVE_X + face; + } + else { + return target; + } +} + + +/** + * For cube map faces, return a face index in [0,5]. + * For other targets return 0; + */ +static inline GLuint +_mesa_tex_target_to_face(GLenum target) +{ + if (_mesa_is_cube_face(target)) + return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + else + return 0; +} + + /** Are any of the dimensions of given texture equal to zero? */ static inline GLboolean _mesa_is_zero_size_texture(const struct gl_texture_image *texImage) @@ -131,9 +184,6 @@ extern GLboolean _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, GLenum intFormat, GLenum *error); -extern GLuint -_mesa_tex_target_to_face(GLenum target); - extern GLint _mesa_get_texture_dimensions(GLenum target); diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index e926c7b6cd2..d8407f04340 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -171,16 +171,16 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target) return texUnit->CurrentTex[TEXTURE_3D_INDEX]; case GL_PROXY_TEXTURE_3D: return ctx->Texture.ProxyTex[TEXTURE_3D_INDEX]; - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + case GL_TEXTURE_CUBE_MAP: return ctx->Extensions.ARB_texture_cube_map ? texUnit->CurrentTex[TEXTURE_CUBE_INDEX] : NULL; - case GL_PROXY_TEXTURE_CUBE_MAP_ARB: + case GL_PROXY_TEXTURE_CUBE_MAP: return ctx->Extensions.ARB_texture_cube_map ? ctx->Texture.ProxyTex[TEXTURE_CUBE_INDEX] : NULL; case GL_TEXTURE_CUBE_MAP_ARRAY: @@ -239,7 +239,7 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target) * \param shared the shared GL state structure to contain the texture object * \param name integer name for the texture object * \param target either GL_TEXTURE_1D, GL_TEXTURE_2D, GL_TEXTURE_3D, - * GL_TEXTURE_CUBE_MAP_ARB or GL_TEXTURE_RECTANGLE_NV. zero is ok for the sake + * GL_TEXTURE_CUBE_MAP or GL_TEXTURE_RECTANGLE_NV. zero is ok for the sake * of GenTextures() * * \return pointer to new texture object. @@ -270,7 +270,7 @@ _mesa_initialize_texture_object( struct gl_context *ctx, target == GL_TEXTURE_1D || target == GL_TEXTURE_2D || target == GL_TEXTURE_3D || - target == GL_TEXTURE_CUBE_MAP_ARB || + target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_RECTANGLE_NV || target == GL_TEXTURE_1D_ARRAY_EXT || target == GL_TEXTURE_2D_ARRAY_EXT || @@ -513,7 +513,7 @@ valid_texture_object(const struct gl_texture_object *tex) case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_3D: - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_RECTANGLE_NV: case GL_TEXTURE_1D_ARRAY_EXT: case GL_TEXTURE_2D_ARRAY_EXT: @@ -725,7 +725,7 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, case GL_TEXTURE_3D: maxLevels = ctx->Const.Max3DTextureLevels; break; - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_CUBE_MAP_ARRAY: maxLevels = ctx->Const.MaxCubeTextureLevels; break; @@ -768,7 +768,7 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, return; } - if (t->Target == GL_TEXTURE_CUBE_MAP_ARB) { + if (t->Target == GL_TEXTURE_CUBE_MAP) { /* Make sure that all six cube map level 0 images are the same size and * format. * Note: we know that the image's width==height (we enforce that @@ -1036,12 +1036,7 @@ _mesa_get_fallback_texture(struct gl_context *ctx, gl_texture_index tex) /* need a loop here just for cube maps */ for (face = 0; face < numFaces; face++) { - GLenum faceTarget; - - if (target == GL_TEXTURE_CUBE_MAP) - faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face; - else - faceTarget = target; + const GLenum faceTarget = _mesa_cube_face_target(target, face); /* initialize level[0] texture image */ texImage = _mesa_get_tex_image(ctx, texObj, faceTarget, 0); diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h index 8421337de4d..378d87a1cbb 100644 --- a/src/mesa/main/texobj.h +++ b/src/mesa/main/texobj.h @@ -120,25 +120,6 @@ _mesa_unlock_texture(struct gl_context *ctx, struct gl_texture_object *texObj) } -/** - * Return number of faces for a texture target. This will be 6 for - * cube maps (and cube map arrays) and 1 otherwise. - * NOTE: this function is not used for cube map arrays which operate - * more like 2D arrays than cube maps. - */ -static inline GLuint -_mesa_num_tex_faces(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP: - case GL_PROXY_TEXTURE_CUBE_MAP: - return 6; - default: - return 1; - } -} - - /** Is the texture "complete" with respect to the given sampler state? */ static inline GLboolean _mesa_is_texture_complete(const struct gl_texture_object *texObj, diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 89f286cc05e..20770a77e15 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -72,7 +72,7 @@ validate_texture_wrap_mode(struct gl_context * ctx, GLenum target, GLenum wrap) break; case GL_CLAMP_TO_BORDER: - supported = is_desktop_gl && e->ARB_texture_border_clamp + supported = ctx->API != API_OPENGLES && e->ARB_texture_border_clamp && (target != GL_TEXTURE_EXTERNAL_OES); break; @@ -500,9 +500,7 @@ set_tex_parameteri(struct gl_context *ctx, goto invalid_pname; case GL_DEPTH_STENCIL_TEXTURE_MODE: - if ((_mesa_is_desktop_gl(ctx) && - ctx->Extensions.ARB_stencil_texturing) || - _mesa_is_gles31(ctx)) { + if (_mesa_has_ARB_stencil_texturing(ctx) || _mesa_is_gles31(ctx)) { bool stencil = params[0] == GL_STENCIL_INDEX; if (!stencil && params[0] != GL_DEPTH_COMPONENT) goto invalid_param; @@ -719,7 +717,8 @@ set_tex_parameterf(struct gl_context *ctx, break; case GL_TEXTURE_BORDER_COLOR: - if (!_mesa_is_desktop_gl(ctx)) + if (ctx->API == API_OPENGLES || + !ctx->Extensions.ARB_texture_border_clamp) goto invalid_pname; if (!target_allows_setting_sampler_parameters(texObj->Target)) @@ -1215,12 +1214,12 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target, return GL_TRUE; case GL_TEXTURE_2D_ARRAY_EXT: return ctx->Extensions.EXT_texture_array; - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: return ctx->Extensions.ARB_texture_cube_map; case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: @@ -1237,7 +1236,7 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target, case GL_PROXY_TEXTURE_2D: case GL_PROXY_TEXTURE_3D: return GL_TRUE; - case GL_PROXY_TEXTURE_CUBE_MAP_ARB: + case GL_PROXY_TEXTURE_CUBE_MAP: return ctx->Extensions.ARB_texture_cube_map; case GL_TEXTURE_CUBE_MAP_ARRAY_ARB: case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY_ARB: @@ -1312,6 +1311,7 @@ get_tex_level_parameter_image(struct gl_context *ctx, dummy_image.TexFormat = MESA_FORMAT_NONE; dummy_image.InternalFormat = GL_RGBA; dummy_image._BaseFormat = GL_NONE; + dummy_image.FixedSampleLocations = GL_TRUE; img = &dummy_image; } @@ -1736,7 +1736,8 @@ get_tex_parameterfv(struct gl_context *ctx, *params = ENUM_TO_FLOAT(obj->Sampler.WrapR); break; case GL_TEXTURE_BORDER_COLOR: - if (!_mesa_is_desktop_gl(ctx)) + if (ctx->API == API_OPENGLES || + !ctx->Extensions.ARB_texture_border_clamp) goto invalid_pname; if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) @@ -1819,7 +1820,7 @@ get_tex_parameterfv(struct gl_context *ctx, *params = (GLfloat) obj->DepthMode; break; case GL_DEPTH_STENCIL_TEXTURE_MODE: - if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_stencil_texturing) + if (!_mesa_has_ARB_stencil_texturing(ctx) && !_mesa_is_gles31(ctx)) goto invalid_pname; *params = (GLfloat) (obj->StencilSampling ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT); @@ -1970,7 +1971,8 @@ get_tex_parameteriv(struct gl_context *ctx, *params = (GLint) obj->Sampler.WrapR; break; case GL_TEXTURE_BORDER_COLOR: - if (!_mesa_is_desktop_gl(ctx)) + if (ctx->API == API_OPENGLES || + !ctx->Extensions.ARB_texture_border_clamp) goto invalid_pname; { @@ -2054,7 +2056,7 @@ get_tex_parameteriv(struct gl_context *ctx, *params = (GLint) obj->DepthMode; break; case GL_DEPTH_STENCIL_TEXTURE_MODE: - if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_stencil_texturing) + if (!_mesa_has_ARB_stencil_texturing(ctx) && !_mesa_is_gles31(ctx)) goto invalid_pname; *params = (GLint) (obj->StencilSampling ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT); diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c index 9d88554d945..9ee5c6974df 100644 --- a/src/mesa/main/texstate.c +++ b/src/mesa/main/texstate.c @@ -786,7 +786,7 @@ alloc_proxy_textures( struct gl_context *ctx ) GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_1D_ARRAY_EXT, GL_TEXTURE_EXTERNAL_OES, - GL_TEXTURE_CUBE_MAP_ARB, + GL_TEXTURE_CUBE_MAP, GL_TEXTURE_3D, GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_2D, diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index d7671738b18..c33b1095900 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -87,9 +87,6 @@ enum { * Texture image storage function. */ typedef GLboolean (*StoreTexImageFunc)(TEXSTORE_PARAMS); -static const GLubyte map_identity[6] = { 0, 1, 2, 3, ZERO, ONE }; -static const GLubyte map_3210[6] = { 3, 2, 1, 0, ZERO, ONE }; -static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE }; /** diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c index 04b7d73da5c..316d8280338 100644 --- a/src/mesa/main/textureview.c +++ b/src/mesa/main/textureview.c @@ -211,10 +211,7 @@ initialize_texture_fields(struct gl_context *ctx, for (level = 0; level < levels; level++) { for (face = 0; face < numFaces; face++) { struct gl_texture_image *texImage; - GLenum faceTarget = target; - - if (target == GL_TEXTURE_CUBE_MAP) - faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face; + const GLenum faceTarget = _mesa_cube_face_target(target, face); texImage = _mesa_get_tex_image(ctx, texObj, faceTarget, level); @@ -536,9 +533,7 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, newViewNumLevels = MIN2(numlevels, origTexObj->NumLevels - minlevel); newViewNumLayers = MIN2(numlayers, origTexObj->NumLayers - minlayer); - faceTarget = origTexObj->Target; - if (faceTarget == GL_TEXTURE_CUBE_MAP) - faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + minlayer; + faceTarget = _mesa_cube_face_target(origTexObj->Target, minlayer); /* Get a reference to what will become this View's base level */ origTexImage = _mesa_select_tex_image(origTexObj, faceTarget, minlevel); diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index eed241271df..db53377d705 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -40,7 +40,7 @@ #include "prog_statevars.h" #include "prog_parameter.h" #include "main/samplerobj.h" -#include "framebuffer.h" +#include "main/framebuffer.h" #define ONE_DIV_SQRT_LN2 (1.201122408786449815) @@ -353,7 +353,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[], } return; case STATE_NUM_SAMPLES: - ((int *)value)[0] = _mesa_geometric_samples(ctx->DrawBuffer); + ((int *)value)[0] = MAX2(1, _mesa_geometric_samples(ctx->DrawBuffer)); return; case STATE_DEPTH_RANGE: value[0] = ctx->ViewportArray[0].Near; /* near */ diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index 4b89ade1b15..622621bdcbb 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -38,9 +38,9 @@ /** - * This is used to initialize st->atoms[]. + * This is used to initialize st->render_atoms[]. */ -static const struct st_tracked_state *atoms[] = +static const struct st_tracked_state *render_atoms[] = { &st_update_depth_stencil_alpha, &st_update_clip, @@ -85,6 +85,11 @@ static const struct st_tracked_state *atoms[] = &st_bind_tes_ssbos, &st_bind_fs_ssbos, &st_bind_gs_ssbos, + &st_bind_vs_images, + &st_bind_tcs_images, + &st_bind_tes_images, + &st_bind_gs_images, + &st_bind_fs_images, &st_update_pixel_transfer, &st_update_tess, @@ -93,6 +98,22 @@ static const struct st_tracked_state *atoms[] = }; +/** + * This is used to initialize st->compute_atoms[]. + */ +static const struct st_tracked_state *compute_atoms[] = +{ + &st_update_cp, + &st_update_compute_texture, + &st_update_sampler, /* depends on update_compute_texture for swizzle */ + &st_update_cs_constants, + &st_bind_cs_ubos, + &st_bind_cs_atomics, + &st_bind_cs_ssbos, + &st_bind_cs_images, +}; + + void st_init_atoms( struct st_context *st ) { /* no-op */ @@ -178,20 +199,41 @@ static void check_attrib_edgeflag(struct st_context *st) * Update all derived state: */ -void st_validate_state( struct st_context *st ) +void st_validate_state( struct st_context *st, enum st_pipeline pipeline ) { - struct st_state_flags *state = &st->dirty; + const struct st_tracked_state **atoms; + struct st_state_flags *state; + GLuint num_atoms; GLuint i; + /* Get pipeline state. */ + switch (pipeline) { + case ST_PIPELINE_RENDER: + atoms = render_atoms; + num_atoms = ARRAY_SIZE(render_atoms); + state = &st->dirty; + break; + case ST_PIPELINE_COMPUTE: + atoms = compute_atoms; + num_atoms = ARRAY_SIZE(compute_atoms); + state = &st->dirty_cp; + break; + default: + unreachable("Invalid pipeline specified"); + } + /* Get Mesa driver state. */ st->dirty.st |= st->ctx->NewDriverState; + st->dirty_cp.st |= st->ctx->NewDriverState; st->ctx->NewDriverState = 0; - check_attrib_edgeflag(st); + if (pipeline == ST_PIPELINE_RENDER) { + check_attrib_edgeflag(st); - check_program_state( st ); + check_program_state(st); - st_manager_validate_framebuffers(st); + st_manager_validate_framebuffers(st); + } if (state->st == 0 && state->mesa == 0) return; @@ -211,7 +253,7 @@ void st_validate_state( struct st_context *st ) memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < ARRAY_SIZE(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct st_tracked_state *atom = atoms[i]; struct st_state_flags generated; @@ -242,7 +284,7 @@ void st_validate_state( struct st_context *st ) } else { - for (i = 0; i < ARRAY_SIZE(atoms); i++) { + for (i = 0; i < num_atoms; i++) { if (check_state(state, &atoms[i]->dirty)) atoms[i]->update( st ); } diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h index 3a9153c80cb..31bb2dd98bf 100644 --- a/src/mesa/state_tracker/st_atom.h +++ b/src/mesa/state_tracker/st_atom.h @@ -36,6 +36,9 @@ #include "main/glheader.h" +#include "state_tracker/st_api.h" +#include "state_tracker/st_context.h" + struct st_context; struct st_tracked_state; @@ -43,7 +46,7 @@ void st_init_atoms( struct st_context *st ); void st_destroy_atoms( struct st_context *st ); -void st_validate_state( struct st_context *st ); +void st_validate_state( struct st_context *st, enum st_pipeline pipeline ); extern const struct st_tracked_state st_update_array; @@ -55,6 +58,7 @@ extern const struct st_tracked_state st_update_gp; extern const struct st_tracked_state st_update_tep; extern const struct st_tracked_state st_update_tcp; extern const struct st_tracked_state st_update_vp; +extern const struct st_tracked_state st_update_cp; extern const struct st_tracked_state st_update_rasterizer; extern const struct st_tracked_state st_update_polygon_stipple; extern const struct st_tracked_state st_update_viewport; @@ -68,26 +72,37 @@ extern const struct st_tracked_state st_update_vertex_texture; extern const struct st_tracked_state st_update_geometry_texture; extern const struct st_tracked_state st_update_tessctrl_texture; extern const struct st_tracked_state st_update_tesseval_texture; +extern const struct st_tracked_state st_update_compute_texture; extern const struct st_tracked_state st_update_fs_constants; extern const struct st_tracked_state st_update_gs_constants; extern const struct st_tracked_state st_update_tes_constants; extern const struct st_tracked_state st_update_tcs_constants; extern const struct st_tracked_state st_update_vs_constants; +extern const struct st_tracked_state st_update_cs_constants; extern const struct st_tracked_state st_bind_fs_ubos; extern const struct st_tracked_state st_bind_vs_ubos; extern const struct st_tracked_state st_bind_gs_ubos; extern const struct st_tracked_state st_bind_tcs_ubos; extern const struct st_tracked_state st_bind_tes_ubos; +extern const struct st_tracked_state st_bind_cs_ubos; extern const struct st_tracked_state st_bind_fs_atomics; extern const struct st_tracked_state st_bind_vs_atomics; extern const struct st_tracked_state st_bind_gs_atomics; extern const struct st_tracked_state st_bind_tcs_atomics; extern const struct st_tracked_state st_bind_tes_atomics; +extern const struct st_tracked_state st_bind_cs_atomics; extern const struct st_tracked_state st_bind_fs_ssbos; extern const struct st_tracked_state st_bind_vs_ssbos; extern const struct st_tracked_state st_bind_gs_ssbos; extern const struct st_tracked_state st_bind_tcs_ssbos; extern const struct st_tracked_state st_bind_tes_ssbos; +extern const struct st_tracked_state st_bind_cs_ssbos; +extern const struct st_tracked_state st_bind_fs_images; +extern const struct st_tracked_state st_bind_vs_images; +extern const struct st_tracked_state st_bind_gs_images; +extern const struct st_tracked_state st_bind_tcs_images; +extern const struct st_tracked_state st_bind_tes_images; +extern const struct st_tracked_state st_bind_cs_images; extern const struct st_tracked_state st_update_pixel_transfer; extern const struct st_tracked_state st_update_tess; diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c b/src/mesa/state_tracker/st_atom_atomicbuf.c index 1c30d1fb701..a27dbe0460d 100644 --- a/src/mesa/state_tracker/st_atom_atomicbuf.c +++ b/src/mesa/state_tracker/st_atom_atomicbuf.c @@ -58,9 +58,11 @@ st_bind_atomics(struct st_context *st, st_buffer_object(binding->BufferObject); struct pipe_shader_buffer sb = { 0 }; - sb.buffer = st_obj->buffer; - sb.buffer_offset = binding->Offset; - sb.buffer_size = st_obj->buffer->width0 - binding->Offset; + if (st_obj && st_obj->buffer) { + sb.buffer = st_obj->buffer; + sb.buffer_offset = binding->Offset; + sb.buffer_size = st_obj->buffer->width0 - binding->Offset; + } st->pipe->set_shader_buffers(st->pipe, shader_type, atomic->Binding, 1, &sb); @@ -156,3 +158,21 @@ const struct st_tracked_state st_bind_tes_atomics = { }, bind_tes_atomics }; + +static void +bind_cs_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + st_bind_atomics(st, prog, PIPE_SHADER_COMPUTE); +} + +const struct st_tracked_state st_bind_cs_atomics = { + "st_bind_cs_atomics", + { + 0, + ST_NEW_COMPUTE_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_cs_atomics +}; diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 66811d29c29..407dfd31c80 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -61,7 +61,8 @@ void st_upload_constants( struct st_context *st, shader_type == PIPE_SHADER_FRAGMENT || shader_type == PIPE_SHADER_GEOMETRY || shader_type == PIPE_SHADER_TESS_CTRL || - shader_type == PIPE_SHADER_TESS_EVAL); + shader_type == PIPE_SHADER_TESS_EVAL || + shader_type == PIPE_SHADER_COMPUTE); /* update constants */ if (params && params->NumParameters) { @@ -226,6 +227,28 @@ const struct st_tracked_state st_update_tes_constants = { update_tes_constants /* update */ }; +/* Compute shader: + */ +static void update_cs_constants(struct st_context *st ) +{ + struct st_compute_program *cp = st->cp; + struct gl_program_parameter_list *params; + + if (cp) { + params = cp->Base.Base.Parameters; + st_upload_constants( st, params, PIPE_SHADER_COMPUTE ); + } +} + +const struct st_tracked_state st_update_cs_constants = { + "st_update_cs_constants", /* name */ + { /* dirty */ + _NEW_PROGRAM_CONSTANTS, /* mesa */ + ST_NEW_COMPUTE_PROGRAM, /* st */ + }, + update_cs_constants /* update */ +}; + static void st_bind_ubos(struct st_context *st, struct gl_shader *shader, unsigned shader_type) @@ -363,3 +386,24 @@ const struct st_tracked_state st_bind_tes_ubos = { }, bind_tes_ubos }; + +static void bind_cs_ubos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + if (!prog) + return; + + st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE], + PIPE_SHADER_COMPUTE); +} + +const struct st_tracked_state st_bind_cs_ubos = { + "st_bind_cs_ubos", + { + 0, + ST_NEW_COMPUTE_PROGRAM | ST_NEW_UNIFORM_BUFFER, + }, + bind_cs_ubos +}; diff --git a/src/mesa/state_tracker/st_atom_image.c b/src/mesa/state_tracker/st_atom_image.c new file mode 100644 index 00000000000..4b48bc30b69 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_image.c @@ -0,0 +1,236 @@ +/************************************************************************** + * + * Copyright 2016 Ilia Mirkin. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/imports.h" +#include "program/prog_parameter.h" +#include "program/prog_print.h" +#include "compiler/glsl/ir_uniform.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" + +#include "st_cb_texture.h" +#include "st_debug.h" +#include "st_texture.h" +#include "st_context.h" +#include "st_atom.h" +#include "st_program.h" +#include "st_format.h" + +static void +st_bind_images(struct st_context *st, struct gl_shader *shader, + unsigned shader_type) +{ + unsigned i; + struct pipe_image_view images[MAX_IMAGE_UNIFORMS]; + struct gl_program_constants *c = &st->ctx->Const.Program[shader->Stage]; + + if (!shader || !st->pipe->set_shader_images) + return; + + for (i = 0; i < shader->NumImages; i++) { + struct gl_image_unit *u = &st->ctx->ImageUnits[shader->ImageUnits[i]]; + struct st_texture_object *stObj = st_texture_object(u->TexObj); + struct pipe_image_view *img = &images[i]; + + if (!stObj || + !st_finalize_texture(st->ctx, st->pipe, u->TexObj) || + !stObj->pt) { + memset(img, 0, sizeof(*img)); + continue; + } + + img->resource = stObj->pt; + img->format = st_mesa_format_to_pipe_format(st, u->_ActualFormat); + if (stObj->pt->target == PIPE_BUFFER) { + unsigned base, size; + unsigned f, n; + const struct util_format_description *desc + = util_format_description(img->format); + + base = stObj->base.BufferOffset; + assert(base < stObj->pt->width0); + size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize); + + f = (base / (desc->block.bits / 8)) * desc->block.width; + n = (size / (desc->block.bits / 8)) * desc->block.width; + assert(n > 0); + img->u.buf.first_element = f; + img->u.buf.last_element = f + (n - 1); + } else { + img->u.tex.level = u->Level + stObj->base.MinLevel; + if (stObj->pt->target == PIPE_TEXTURE_3D) { + if (u->Layered) { + img->u.tex.first_layer = 0; + img->u.tex.last_layer = u_minify(stObj->pt->depth0, img->u.tex.level) - 1; + } else { + img->u.tex.first_layer = u->_Layer; + img->u.tex.last_layer = u->_Layer; + } + } else { + img->u.tex.first_layer = u->_Layer + stObj->base.MinLayer; + img->u.tex.last_layer = u->_Layer + stObj->base.MinLayer; + if (u->Layered && img->resource->array_size > 1) { + if (stObj->base.Immutable) + img->u.tex.last_layer += stObj->base.NumLayers - 1; + else + img->u.tex.last_layer += img->resource->array_size - 1; + } + } + } + } + st->pipe->set_shader_images(st->pipe, shader_type, 0, shader->NumImages, + images); + /* clear out any stale shader images */ + if (shader->NumImages < c->MaxImageUniforms) + st->pipe->set_shader_images( + st->pipe, shader_type, + shader->NumImages, + c->MaxImageUniforms - shader->NumImages, + NULL); +} + +static void bind_vs_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_VERTEX], PIPE_SHADER_VERTEX); +} + +const struct st_tracked_state st_bind_vs_images = { + "st_bind_vs_images", + { + 0, + ST_NEW_VERTEX_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_vs_images +}; + +static void bind_fs_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], PIPE_SHADER_FRAGMENT); +} + +const struct st_tracked_state st_bind_fs_images = { + "st_bind_fs_images", + { + 0, + ST_NEW_FRAGMENT_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_fs_images +}; + +static void bind_gs_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], PIPE_SHADER_GEOMETRY); +} + +const struct st_tracked_state st_bind_gs_images = { + "st_bind_gs_images", + { + 0, + ST_NEW_GEOMETRY_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_gs_images +}; + +static void bind_tcs_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL], PIPE_SHADER_TESS_CTRL); +} + +const struct st_tracked_state st_bind_tcs_images = { + "st_bind_tcs_images", + { + 0, + ST_NEW_TESSCTRL_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_tcs_images +}; + +static void bind_tes_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL], PIPE_SHADER_TESS_EVAL); +} + +const struct st_tracked_state st_bind_tes_images = { + "st_bind_tes_images", + { + 0, + ST_NEW_TESSEVAL_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_tes_images +}; + +static void bind_cs_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE], PIPE_SHADER_COMPUTE); +} + +const struct st_tracked_state st_bind_cs_images = { + "st_bind_cs_images", + { + 0, + ST_NEW_COMPUTE_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_cs_images +}; diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 94231cf1946..82dcf5ee0ca 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -321,6 +321,14 @@ update_samplers(struct st_context *st) st->state.samplers[PIPE_SHADER_TESS_EVAL], &st->state.num_samplers[PIPE_SHADER_TESS_EVAL]); } + if (ctx->ComputeProgram._Current) { + update_shader_samplers(st, + PIPE_SHADER_COMPUTE, + &ctx->ComputeProgram._Current->Base, + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits, + st->state.samplers[PIPE_SHADER_COMPUTE], + &st->state.num_samplers[PIPE_SHADER_COMPUTE]); + } } diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index c8650a5899a..a88f0352746 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -255,3 +255,35 @@ const struct st_tracked_state st_update_tep = { }, update_tep /* update */ }; + + + +static void +update_cp( struct st_context *st ) +{ + struct st_compute_program *stcp; + + if (!st->ctx->ComputeProgram._Current) { + cso_set_compute_shader_handle(st->cso_context, NULL); + return; + } + + stcp = st_compute_program(st->ctx->ComputeProgram._Current); + assert(stcp->Base.Base.Target == GL_COMPUTE_PROGRAM_NV); + + st->cp_variant = st_get_cp_variant(st, &stcp->tgsi, &stcp->variants); + + st_reference_compprog(st, &st->cp, stcp); + + cso_set_compute_shader_handle(st->cso_context, + st->cp_variant->driver_shader); +} + +const struct st_tracked_state st_update_cp = { + "st_update_cp", /* name */ + { /* dirty */ + 0, /* mesa */ + ST_NEW_COMPUTE_PROGRAM /* st */ + }, + update_cp /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_storagebuf.c b/src/mesa/state_tracker/st_atom_storagebuf.c index f165cc3e0a1..37b4c4d256d 100644 --- a/src/mesa/state_tracker/st_atom_storagebuf.c +++ b/src/mesa/state_tracker/st_atom_storagebuf.c @@ -194,3 +194,24 @@ const struct st_tracked_state st_bind_tes_ssbos = { }, bind_tes_ssbos }; + +static void bind_cs_ssbos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + if (!prog) + return; + + st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE], + PIPE_SHADER_COMPUTE); +} + +const struct st_tracked_state st_bind_cs_ssbos = { + "st_bind_cs_ssbos", + { + 0, + ST_NEW_COMPUTE_PROGRAM | ST_NEW_STORAGE_BUFFER, + }, + bind_cs_ssbos +}; diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 3e3775200a8..4b7ad77b47a 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -32,6 +32,7 @@ */ +#include "main/context.h" #include "main/macros.h" #include "main/mtypes.h" #include "main/samplerobj.h" @@ -147,9 +148,7 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode, else return SWIZZLE_XYZW; case GL_STENCIL_INDEX: - return SWIZZLE_XYZW; case GL_DEPTH_STENCIL: - /* fall-through */ case GL_DEPTH_COMPONENT: /* Now examine the depth mode */ switch (depthMode) { @@ -193,15 +192,29 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode, static unsigned -get_texture_format_swizzle(const struct st_texture_object *stObj, +get_texture_format_swizzle(const struct st_context *st, + const struct st_texture_object *stObj, unsigned glsl_version) { GLenum baseFormat = _mesa_texture_base_format(&stObj->base); unsigned tex_swizzle; if (baseFormat != GL_NONE) { + GLenum depth_mode = stObj->base.DepthMode; + /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures + * with depth component data specified with a sized internal format. + */ + if (_mesa_is_gles3(st->ctx) && + util_format_is_depth_or_stencil(stObj->pt->format)) { + const struct st_texture_image *firstImage = + st_texture_image_const(_mesa_base_tex_image(&stObj->base)); + if (firstImage->base.InternalFormat != GL_DEPTH_COMPONENT && + firstImage->base.InternalFormat != GL_DEPTH_STENCIL && + firstImage->base.InternalFormat != GL_STENCIL_INDEX) + depth_mode = GL_RED; + } tex_swizzle = compute_texture_format_swizzle(baseFormat, - stObj->base.DepthMode, + depth_mode, stObj->pt->format, glsl_version); } @@ -221,10 +234,11 @@ get_texture_format_swizzle(const struct st_texture_object *stObj, * \param stObj the st texture object, */ static boolean -check_sampler_swizzle(const struct st_texture_object *stObj, +check_sampler_swizzle(const struct st_context *st, + const struct st_texture_object *stObj, struct pipe_sampler_view *sv, unsigned glsl_version) { - unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version); + unsigned swizzle = get_texture_format_swizzle(st, stObj, glsl_version); return ((sv->swizzle_r != GET_SWZ(swizzle, 0)) || (sv->swizzle_g != GET_SWZ(swizzle, 1)) || @@ -251,13 +265,13 @@ static unsigned last_layer(struct st_texture_object *stObj) } static struct pipe_sampler_view * -st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe, +st_create_texture_sampler_view_from_stobj(struct st_context *st, struct st_texture_object *stObj, enum pipe_format format, unsigned glsl_version) { struct pipe_sampler_view templ; - unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version); + unsigned swizzle = get_texture_format_swizzle(st, stObj, glsl_version); u_sampler_view_default_template(&templ, stObj->pt, @@ -297,7 +311,7 @@ st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe, templ.swizzle_a = GET_SWZ(swizzle, 3); } - return pipe->create_sampler_view(pipe, stObj->pt, &templ); + return st->pipe->create_sampler_view(st->pipe, stObj->pt, &templ); } @@ -327,7 +341,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st, /* if sampler view has changed dereference it */ if (*sv) { - if (check_sampler_swizzle(stObj, *sv, glsl_version) || + if (check_sampler_swizzle(st, stObj, *sv, glsl_version) || (format != (*sv)->format) || gl_target_to_pipe(stObj->base.Target) != (*sv)->target || stObj->base.MinLevel + stObj->base.BaseLevel != (*sv)->u.tex.first_level || @@ -339,7 +353,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st, } if (!*sv) { - *sv = st_create_texture_sampler_view_from_stobj(st->pipe, stObj, + *sv = st_create_texture_sampler_view_from_stobj(st, stObj, format, glsl_version); } else if ((*sv)->context != st->pipe) { @@ -534,6 +548,22 @@ update_tesseval_textures(struct st_context *st) } +static void +update_compute_textures(struct st_context *st) +{ + const struct gl_context *ctx = st->ctx; + + if (ctx->ComputeProgram._Current) { + update_textures(st, + MESA_SHADER_COMPUTE, + &ctx->ComputeProgram._Current->Base, + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits, + st->state.sampler_views[PIPE_SHADER_COMPUTE], + &st->state.num_sampler_views[PIPE_SHADER_COMPUTE]); + } +} + + const struct st_tracked_state st_update_fragment_texture = { "st_update_texture", /* name */ { /* dirty */ @@ -582,3 +612,13 @@ const struct st_tracked_state st_update_tesseval_texture = { }, update_tesseval_textures /* update */ }; + + +const struct st_tracked_state st_update_compute_texture = { + "st_update_compute_texture", /* name */ + { /* dirty */ + _NEW_TEXTURE, /* mesa */ + ST_NEW_COMPUTE_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */ + }, + update_compute_textures /* update */ +}; diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 627b8cbd598..4fd2dfef8cc 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -33,6 +33,7 @@ #include "main/imports.h" #include "main/image.h" #include "main/bufferobj.h" +#include "main/dlist.h" #include "main/macros.h" #include "main/pbo.h" #include "program/program.h" @@ -41,6 +42,7 @@ #include "st_context.h" #include "st_atom.h" #include "st_atom_constbuf.h" +#include "st_draw.h" #include "st_program.h" #include "st_cb_bitmap.h" #include "st_texture.h" @@ -49,7 +51,6 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "util/u_inlines.h" -#include "util/u_draw_quad.h" #include "util/u_simple_shaders.h" #include "util/u_upload_mgr.h" #include "program/prog_instruction.h" @@ -183,7 +184,8 @@ make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height, static void setup_render_state(struct gl_context *ctx, struct pipe_sampler_view *sv, - const GLfloat *color) + const GLfloat *color, + bool atlas) { struct st_context *st = st_context(ctx); struct cso_context *cso = st->cso_context; @@ -194,7 +196,7 @@ setup_render_state(struct gl_context *ctx, key.st = st->has_shareable_shaders ? NULL : st; key.bitmap = GL_TRUE; key.clamp_color = st->clamp_frag_color_in_shader && - st->ctx->Color._ClampFragmentColor; + ctx->Color._ClampFragmentColor; fpv = st_get_fp_variant(st, st->fp, &key); @@ -214,18 +216,15 @@ setup_render_state(struct gl_context *ctx, COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave); } - cso_save_rasterizer(cso); - cso_save_fragment_samplers(cso); - cso_save_fragment_sampler_views(cso); - cso_save_viewport(cso); - cso_save_fragment_shader(cso); - cso_save_stream_outputs(cso); - cso_save_vertex_shader(cso); - cso_save_tessctrl_shader(cso); - cso_save_tesseval_shader(cso); - cso_save_geometry_shader(cso); - cso_save_vertex_elements(cso); - cso_save_aux_vertex_buffer_slot(cso); + cso_save_state(cso, (CSO_BIT_RASTERIZER | + CSO_BIT_FRAGMENT_SAMPLERS | + CSO_BIT_FRAGMENT_SAMPLER_VIEWS | + CSO_BIT_VIEWPORT | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BITS_ALL_SHADERS)); + /* rasterizer state: just scissor */ st->bitmap.rasterizer.scissor = ctx->Scissor.EnableFlags & 1; @@ -251,7 +250,10 @@ setup_render_state(struct gl_context *ctx, for (i = 0; i < st->state.num_samplers[PIPE_SHADER_FRAGMENT]; i++) { samplers[i] = &st->state.samplers[PIPE_SHADER_FRAGMENT][i]; } - samplers[fpv->bitmap_sampler] = &st->bitmap.sampler; + if (atlas) + samplers[fpv->bitmap_sampler] = &st->bitmap.atlas_sampler; + else + samplers[fpv->bitmap_sampler] = &st->bitmap.sampler; cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num, (const struct pipe_sampler_state **) samplers); } @@ -268,21 +270,12 @@ setup_render_state(struct gl_context *ctx, } /* viewport state: viewport matching window dims */ - { - const GLboolean invert = st->state.fb_orientation == Y_0_TOP; - const GLfloat width = (GLfloat)st->state.framebuffer.width; - const GLfloat height = (GLfloat)st->state.framebuffer.height; - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * width; - vp.scale[1] = height * (invert ? -0.5f : 0.5f); - vp.scale[2] = 0.5f; - vp.translate[0] = 0.5f * width; - vp.translate[1] = 0.5f * height; - vp.translate[2] = 0.5f; - cso_set_viewport(cso, &vp); - } + cso_set_viewport_dims(cso, st->state.framebuffer.width, + st->state.framebuffer.height, + st->state.fb_orientation == Y_0_TOP); + + cso_set_vertex_elements(cso, 3, st->util_velems); - cso_set_vertex_elements(cso, 3, st->velems_util_draw); cso_set_stream_outputs(st->cso_context, 0, NULL, NULL); } @@ -296,18 +289,7 @@ restore_render_state(struct gl_context *ctx) struct st_context *st = st_context(ctx); struct cso_context *cso = st->cso_context; - cso_restore_rasterizer(cso); - cso_restore_fragment_samplers(cso); - cso_restore_fragment_sampler_views(cso); - cso_restore_viewport(cso); - cso_restore_fragment_shader(cso); - cso_restore_vertex_shader(cso); - cso_restore_tessctrl_shader(cso); - cso_restore_tesseval_shader(cso); - cso_restore_geometry_shader(cso); - cso_restore_vertex_elements(cso); - cso_restore_aux_vertex_buffer_slot(cso); - cso_restore_stream_outputs(cso); + cso_restore_state(cso); } @@ -322,7 +304,6 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; - struct pipe_resource *vbuf = NULL; const float fb_width = (float) st->state.framebuffer.width; const float fb_height = (float) st->state.framebuffer.height; const float x0 = (float) x; @@ -335,8 +316,6 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, const float clip_y0 = y0 / fb_height * 2.0f - 1.0f; const float clip_x1 = x1 / fb_width * 2.0f - 1.0f; const float clip_y1 = y1 / fb_height * 2.0f - 1.0f; - float (*vertices)[3][4]; /**< vertex pos + color + texcoord */ - unsigned offset, i; /* limit checks */ { @@ -349,7 +328,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, assert(height <= (GLsizei) maxSize); } - setup_render_state(ctx, sv, color); + setup_render_state(ctx, sv, color, false); /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ z = z * 2.0f - 1.0f; @@ -360,62 +339,13 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, tBot = (float) height; } - u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4, - &offset, &vbuf, (void **) &vertices); - if (!vbuf) { + if (!st_draw_quad(st, clip_x0, clip_y0, clip_x1, clip_y1, z, + sLeft, tBot, sRight, tTop, color, 0)) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBitmap"); - restore_render_state(ctx); - return; } - /* Positions are in clip coords since we need to do clipping in case - * the bitmap quad goes beyond the window bounds. - */ - vertices[0][0][0] = clip_x0; - vertices[0][0][1] = clip_y0; - vertices[0][2][0] = sLeft; - vertices[0][2][1] = tTop; - - vertices[1][0][0] = clip_x1; - vertices[1][0][1] = clip_y0; - vertices[1][2][0] = sRight; - vertices[1][2][1] = tTop; - - vertices[2][0][0] = clip_x1; - vertices[2][0][1] = clip_y1; - vertices[2][2][0] = sRight; - vertices[2][2][1] = tBot; - - vertices[3][0][0] = clip_x0; - vertices[3][0][1] = clip_y1; - vertices[3][2][0] = sLeft; - vertices[3][2][1] = tBot; - - /* same for all verts: */ - for (i = 0; i < 4; i++) { - vertices[i][0][2] = z; - vertices[i][0][3] = 1.0f; - vertices[i][1][0] = color[0]; - vertices[i][1][1] = color[1]; - vertices[i][1][2] = color[2]; - vertices[i][1][3] = color[3]; - vertices[i][2][2] = 0.0; /*R*/ - vertices[i][2][3] = 1.0; /*Q*/ - } - - u_upload_unmap(st->uploader); - - util_draw_vertex_buffer(pipe, st->cso_context, vbuf, - cso_get_aux_vertex_buffer_slot(st->cso_context), - offset, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 3); /* attribs/vert */ - restore_render_state(ctx); - pipe_resource_reference(&vbuf, NULL); - /* We uploaded modified constants, need to invalidate them. */ st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS; } @@ -555,7 +485,7 @@ accum_bitmap(struct gl_context *ctx, struct st_context *st = ctx->st; struct bitmap_cache *cache = st->bitmap.cache; int px = -999, py = -999; - const GLfloat z = st->ctx->Current.RasterPos[2]; + const GLfloat z = ctx->Current.RasterPos[2]; if (width > BITMAP_CACHE_WIDTH || height > BITMAP_CACHE_HEIGHT) @@ -566,7 +496,7 @@ accum_bitmap(struct gl_context *ctx, py = y - cache->ypos; if (px < 0 || px + width > BITMAP_CACHE_WIDTH || py < 0 || py + height > BITMAP_CACHE_HEIGHT || - !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) || + !TEST_EQ_4V(ctx->Current.RasterColor, cache->color) || ((fabs(z - cache->zpos) > Z_EPSILON))) { /* This bitmap would extend beyond cache bounds, or the bitmap * color is changing @@ -584,7 +514,7 @@ accum_bitmap(struct gl_context *ctx, cache->ypos = y - py; cache->zpos = z; cache->empty = GL_FALSE; - COPY_4FV(cache->color, st->ctx->Current.RasterColor); + COPY_4FV(cache->color, ctx->Current.RasterColor); } assert(px != -999); @@ -645,6 +575,9 @@ init_bitmap_state(struct st_context *st) st->bitmap.sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; st->bitmap.sampler.normalized_coords = st->internal_target == PIPE_TEXTURE_2D; + st->bitmap.atlas_sampler = st->bitmap.sampler; + st->bitmap.atlas_sampler.normalized_coords = 0; + /* init baseline rasterizer state once */ memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer)); st->bitmap.rasterizer.half_pixel_center = 1; @@ -713,7 +646,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y, * explicitly uploaded in the draw_bitmap_quad() function. */ if ((st->dirty.mesa & ~_NEW_PROGRAM_CONSTANTS) || st->dirty.st) { - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); } if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap)) @@ -728,8 +661,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y, if (sv) { draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2], - width, height, sv, - st->ctx->Current.RasterColor); + width, height, sv, ctx->Current.RasterColor); pipe_sampler_view_reference(&sv, NULL); } @@ -740,11 +672,139 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y, } +/** + * Called via ctx->Driver.DrawAtlasBitmap() + */ +static void +st_DrawAtlasBitmaps(struct gl_context *ctx, + const struct gl_bitmap_atlas *atlas, + GLuint count, const GLubyte *ids) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct st_texture_object *stObj = st_texture_object(atlas->texObj); + struct pipe_sampler_view *sv; + /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ + const float z = ctx->Current.RasterPos[2] * 2.0f - 1.0f; + const float *color = ctx->Current.RasterColor; + const float clip_x_scale = 2.0f / st->state.framebuffer.width; + const float clip_y_scale = 2.0f / st->state.framebuffer.height; + const unsigned num_verts = count * 4; + const unsigned num_vert_bytes = num_verts * sizeof(struct st_util_vertex); + struct st_util_vertex *verts; + struct pipe_vertex_buffer vb = {0}; + unsigned i; + + if (!st->bitmap.cache) { + init_bitmap_state(st); + } + + st_flush_bitmap_cache(st); + + st_validate_state(st, ST_PIPELINE_RENDER); + + sv = st_create_texture_sampler_view(pipe, stObj->pt); + + setup_render_state(ctx, sv, color, true); + + vb.stride = sizeof(struct st_util_vertex); + + u_upload_alloc(st->uploader, 0, num_vert_bytes, 4, + &vb.buffer_offset, &vb.buffer, (void **) &verts); + + /* build quads vertex data */ + for (i = 0; i < count; i++) { + const GLfloat epsilon = 0.0001F; + const struct gl_bitmap_glyph *g = &atlas->glyphs[ids[i]]; + const float xmove = g->xmove, ymove = g->ymove; + const float xorig = g->xorig, yorig = g->yorig; + const float s0 = g->x, t0 = g->y; + const float s1 = s0 + g->w, t1 = t0 + g->h; + const float x0 = IFLOOR(ctx->Current.RasterPos[0] - xorig + epsilon); + const float y0 = IFLOOR(ctx->Current.RasterPos[1] - yorig + epsilon); + const float x1 = x0 + g->w, y1 = y0 + g->h; + const float clip_x0 = x0 * clip_x_scale - 1.0f; + const float clip_y0 = y0 * clip_y_scale - 1.0f; + const float clip_x1 = x1 * clip_x_scale - 1.0f; + const float clip_y1 = y1 * clip_y_scale - 1.0f; + + /* lower-left corner */ + verts->x = clip_x0; + verts->y = clip_y0; + verts->z = z; + verts->r = color[0]; + verts->g = color[1]; + verts->b = color[2]; + verts->a = color[3]; + verts->s = s0; + verts->t = t0; + verts++; + + /* lower-right corner */ + verts->x = clip_x1; + verts->y = clip_y0; + verts->z = z; + verts->r = color[0]; + verts->g = color[1]; + verts->b = color[2]; + verts->a = color[3]; + verts->s = s1; + verts->t = t0; + verts++; + + /* upper-right corner */ + verts->x = clip_x1; + verts->y = clip_y1; + verts->z = z; + verts->r = color[0]; + verts->g = color[1]; + verts->b = color[2]; + verts->a = color[3]; + verts->s = s1; + verts->t = t1; + verts++; + + /* upper-left corner */ + verts->x = clip_x0; + verts->y = clip_y1; + verts->z = z; + verts->r = color[0]; + verts->g = color[1]; + verts->b = color[2]; + verts->a = color[3]; + verts->s = s0; + verts->t = t1; + verts++; + + /* Update the raster position */ + ctx->Current.RasterPos[0] += xmove; + ctx->Current.RasterPos[1] += ymove; + } + + u_upload_unmap(st->uploader); + + cso_set_vertex_buffers(st->cso_context, + cso_get_aux_vertex_buffer_slot(st->cso_context), + 1, &vb); + + cso_draw_arrays(st->cso_context, PIPE_PRIM_QUADS, 0, num_verts); + + restore_render_state(ctx); + + pipe_resource_reference(&vb.buffer, NULL); + + /* We uploaded modified constants, need to invalidate them. */ + st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS; +} + + + /** Per-context init */ void st_init_bitmap_functions(struct dd_function_table *functions) { functions->Bitmap = st_Bitmap; + functions->DrawAtlasBitmaps = st_DrawAtlasBitmaps; } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 7b6d10e76b1..55801469f23 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -44,6 +44,7 @@ #include "st_cb_bitmap.h" #include "st_cb_clear.h" #include "st_cb_fbo.h" +#include "st_draw.h" #include "st_format.h" #include "st_program.h" @@ -55,8 +56,6 @@ #include "util/u_framebuffer.h" #include "util/u_inlines.h" #include "util/u_simple_shaders.h" -#include "util/u_draw_quad.h" -#include "util/u_upload_mgr.h" #include "cso_cache/cso_context.h" @@ -169,67 +168,6 @@ set_vertex_shader_layered(struct st_context *st) /** - * Draw a screen-aligned quadrilateral. - * Coords are clip coords with y=0=bottom. - */ -static void -draw_quad(struct st_context *st, - float x0, float y0, float x1, float y1, GLfloat z, - unsigned num_instances, - const union pipe_color_union *color) -{ - struct cso_context *cso = st->cso_context; - struct pipe_vertex_buffer vb = {0}; - GLuint i; - float (*vertices)[2][4]; /**< vertex pos + color */ - - vb.stride = 8 * sizeof(float); - - u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4, - &vb.buffer_offset, &vb.buffer, - (void **) &vertices); - if (!vb.buffer) { - return; - } - - /* Convert Z from [0,1] to [-1,1] range */ - z = z * 2.0f - 1.0f; - - /* positions */ - vertices[0][0][0] = x0; - vertices[0][0][1] = y0; - - vertices[1][0][0] = x1; - vertices[1][0][1] = y0; - - vertices[2][0][0] = x1; - vertices[2][0][1] = y1; - - vertices[3][0][0] = x0; - vertices[3][0][1] = y1; - - /* same for all verts: */ - for (i = 0; i < 4; i++) { - vertices[i][0][2] = z; - vertices[i][0][3] = 1.0; - vertices[i][1][0] = color->f[0]; - vertices[i][1][1] = color->f[1]; - vertices[i][1][2] = color->f[2]; - vertices[i][1][3] = color->f[3]; - } - - u_upload_unmap(st->uploader); - - /* draw */ - cso_set_vertex_buffers(cso, cso_get_aux_vertex_buffer_slot(cso), 1, &vb); - cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_FAN, 0, 4, - 0, num_instances); - pipe_resource_reference(&vb.buffer, NULL); -} - - - -/** * Do glClear by drawing a quadrilateral. * The vertices of the quad will be computed from the * ctx->DrawBuffer->_X/Ymin/max fields. @@ -238,6 +176,7 @@ static void clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) { struct st_context *st = st_context(ctx); + struct cso_context *cso = st->cso_context; const struct gl_framebuffer *fb = ctx->DrawBuffer; const GLfloat fb_width = (GLfloat) fb->Width; const GLfloat fb_height = (GLfloat) fb->Height; @@ -257,21 +196,17 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) x1, y1); */ - cso_save_blend(st->cso_context); - cso_save_stencil_ref(st->cso_context); - cso_save_depth_stencil_alpha(st->cso_context); - cso_save_rasterizer(st->cso_context); - cso_save_sample_mask(st->cso_context); - cso_save_min_samples(st->cso_context); - cso_save_viewport(st->cso_context); - cso_save_fragment_shader(st->cso_context); - cso_save_stream_outputs(st->cso_context); - cso_save_vertex_shader(st->cso_context); - cso_save_tessctrl_shader(st->cso_context); - cso_save_tesseval_shader(st->cso_context); - cso_save_geometry_shader(st->cso_context); - cso_save_vertex_elements(st->cso_context); - cso_save_aux_vertex_buffer_slot(st->cso_context); + cso_save_state(cso, (CSO_BIT_BLEND | + CSO_BIT_STENCIL_REF | + CSO_BIT_DEPTH_STENCIL_ALPHA | + CSO_BIT_RASTERIZER | + CSO_BIT_SAMPLE_MASK | + CSO_BIT_MIN_SAMPLES | + CSO_BIT_VIEWPORT | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BITS_ALL_SHADERS)); /* blend state: RGBA masking */ { @@ -298,10 +233,10 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) blend.rt[i].colormask |= PIPE_MASK_A; } - if (st->ctx->Color.DitherFlag) + if (ctx->Color.DitherFlag) blend.dither = 1; } - cso_set_blend(st->cso_context, &blend); + cso_set_blend(cso, &blend); } /* depth_stencil state: always pass/set to ref value */ @@ -325,64 +260,49 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) depth_stencil.stencil[0].valuemask = 0xff; depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; stencil_ref.ref_value[0] = ctx->Stencil.Clear; - cso_set_stencil_ref(st->cso_context, &stencil_ref); + cso_set_stencil_ref(cso, &stencil_ref); } - cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); + cso_set_depth_stencil_alpha(cso, &depth_stencil); } - cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw); - cso_set_stream_outputs(st->cso_context, 0, NULL, NULL); - cso_set_sample_mask(st->cso_context, ~0); - cso_set_min_samples(st->cso_context, 1); - cso_set_rasterizer(st->cso_context, &st->clear.raster); + cso_set_vertex_elements(cso, 2, st->util_velems); + cso_set_stream_outputs(cso, 0, NULL, NULL); + cso_set_sample_mask(cso, ~0); + cso_set_min_samples(cso, 1); + cso_set_rasterizer(cso, &st->clear.raster); /* viewport state: viewport matching window dims */ - { - const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * fb_width; - vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f); - vp.scale[2] = 0.5f; - vp.translate[0] = 0.5f * fb_width; - vp.translate[1] = 0.5f * fb_height; - vp.translate[2] = 0.5f; - cso_set_viewport(st->cso_context, &vp); - } + cso_set_viewport_dims(st->cso_context, fb_width, fb_height, + st_fb_orientation(fb) == Y_0_TOP); set_fragment_shader(st); - cso_set_tessctrl_shader_handle(st->cso_context, NULL); - cso_set_tesseval_shader_handle(st->cso_context, NULL); + cso_set_tessctrl_shader_handle(cso, NULL); + cso_set_tesseval_shader_handle(cso, NULL); if (num_layers > 1) set_vertex_shader_layered(st); else set_vertex_shader(st); - /* We can't translate the clear color to the colorbuffer format, + /* draw quad matching scissor rect. + * + * Note: if we're only clearing depth/stencil we still setup vertices + * with color, but they'll be ignored. + * + * We can't translate the clear color to the colorbuffer format, * because different colorbuffers may have different formats. */ - - /* draw quad matching scissor rect */ - draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, num_layers, - (union pipe_color_union*)&ctx->Color.ClearColor); + if (!st_draw_quad(st, x0, y0, x1, y1, + ctx->Depth.Clear * 2.0f - 1.0f, + 0.0f, 0.0f, 0.0f, 0.0f, + (const float *) &ctx->Color.ClearColor.f, + num_layers)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glClear"); + } /* Restore pipe state */ - cso_restore_blend(st->cso_context); - cso_restore_stencil_ref(st->cso_context); - cso_restore_depth_stencil_alpha(st->cso_context); - cso_restore_rasterizer(st->cso_context); - cso_restore_sample_mask(st->cso_context); - cso_restore_min_samples(st->cso_context); - cso_restore_viewport(st->cso_context); - cso_restore_fragment_shader(st->cso_context); - cso_restore_vertex_shader(st->cso_context); - cso_restore_tessctrl_shader(st->cso_context); - cso_restore_tesseval_shader(st->cso_context); - cso_restore_geometry_shader(st->cso_context); - cso_restore_vertex_elements(st->cso_context); - cso_restore_aux_vertex_buffer_slot(st->cso_context); - cso_restore_stream_outputs(st->cso_context); + cso_restore_state(cso); } @@ -470,7 +390,7 @@ st_Clear(struct gl_context *ctx, GLbitfield mask) st_flush_bitmap_cache(st); /* This makes sure the pipe has the latest scissor, etc values */ - st_validate_state( st ); + st_validate_state( st, ST_PIPELINE_RENDER ); if (mask & BUFFER_BITS_COLOR) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { diff --git a/src/mesa/state_tracker/st_cb_compute.c b/src/mesa/state_tracker/st_cb_compute.c new file mode 100644 index 00000000000..364159d62d8 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_compute.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2016 Samuel Pitoiset + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/state.h" +#include "st_atom.h" +#include "st_context.h" +#include "st_cb_bufferobjects.h" +#include "st_cb_compute.h" + +#include "pipe/p_context.h" + +static void st_dispatch_compute_common(struct gl_context *ctx, + const GLuint *num_groups, + struct pipe_resource *indirect, + GLintptr indirect_offset) +{ + struct gl_shader_program *prog = + ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_grid_info info = { 0 }; + + if (ctx->NewState) + _mesa_update_state(ctx); + + if (st->dirty_cp.st || ctx->NewDriverState) + st_validate_state(st, ST_PIPELINE_COMPUTE); + + for (unsigned i = 0; i < 3; i++) { + info.block[i] = prog->Comp.LocalSize[i]; + info.grid[i] = num_groups ? num_groups[i] : 0; + } + + if (indirect) { + info.indirect = indirect; + info.indirect_offset = indirect_offset; + } + + pipe->launch_grid(pipe, &info); +} + +static void st_dispatch_compute(struct gl_context *ctx, + const GLuint *num_groups) +{ + st_dispatch_compute_common(ctx, num_groups, NULL, 0); +} + +static void st_dispatch_compute_indirect(struct gl_context *ctx, + GLintptr indirect_offset) +{ + struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer; + struct pipe_resource *indirect = st_buffer_object(indirect_buffer)->buffer; + + st_dispatch_compute_common(ctx, NULL, indirect, indirect_offset); +} + +void st_init_compute_functions(struct dd_function_table *functions) +{ + functions->DispatchCompute = st_dispatch_compute; + functions->DispatchComputeIndirect = st_dispatch_compute_indirect; +} diff --git a/src/mesa/state_tracker/st_cb_compute.h b/src/mesa/state_tracker/st_cb_compute.h new file mode 100644 index 00000000000..78ec756a69e --- /dev/null +++ b/src/mesa/state_tracker/st_cb_compute.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2016 Samuel Pitoiset + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CB_COMPUTE_H +#define ST_CB_COMPUTE_H + +#include "main/compiler.h" + +struct dd_function_table; + +extern void +st_init_compute_functions(struct dd_function_table *functions); + +#endif /* ST_CB_COMPUTE_H */ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index fd58886a782..51d4ae51918 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -35,6 +35,7 @@ #include "main/bufferobj.h" #include "main/blit.h" #include "main/format_pack.h" +#include "main/framebuffer.h" #include "main/macros.h" #include "main/mtypes.h" #include "main/pack.h" @@ -56,6 +57,7 @@ #include "st_cb_fbo.h" #include "st_context.h" #include "st_debug.h" +#include "st_draw.h" #include "st_format.h" #include "st_program.h" #include "st_texture.h" @@ -63,16 +65,45 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "tgsi/tgsi_ureg.h" -#include "util/u_draw_quad.h" #include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_tile.h" -#include "util/u_upload_mgr.h" #include "cso_cache/cso_context.h" /** + * We have a simple glDrawPixels cache to try to optimize the case where the + * same image is drawn over and over again. It basically works as follows: + * + * 1. After we construct a texture map with the image and draw it, we do + * not discard the texture. We keep it around, plus we note the + * glDrawPixels width, height, format, etc. parameters and keep a copy + * of the image in a malloc'd buffer. + * + * 2. On the next glDrawPixels we check if the parameters match the previous + * call. If those match, we check if the image matches the previous image + * via a memcmp() call. If everything matches, we re-use the previous + * texture, thereby avoiding the cost creating a new texture and copying + * the image to it. + * + * The effectiveness of this cache depends upon: + * 1. If the memcmp() finds a difference, it happens relatively quickly. + Hopefully, not just the last pixels differ! + * 2. If the memcmp() finds no difference, doing that check is faster than + * creating and loading a texture. + * + * Notes: + * 1. We don't support any pixel unpacking parameters. + * 2. We don't try to cache images in Pixel Buffer Objects. + * 3. Instead of saving the whole image, perhaps some sort of reliable + * checksum function could be used instead. + */ +#define USE_DRAWPIXELS_CACHE 1 + + + +/** * Create fragment program that does a TEX() instruction to get a Z and/or * stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL. * Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX). @@ -162,22 +193,22 @@ make_passthrough_vertex_shader(struct st_context *st, return NULL; /* MOV result.pos, vertex.pos; */ - ureg_MOV(ureg, + ureg_MOV(ureg, ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ), ureg_DECL_vs_input( ureg, 0 )); - - /* MOV result.texcoord0, vertex.attr[1]; */ - ureg_MOV(ureg, - ureg_DECL_output( ureg, texcoord_semantic, 0 ), - ureg_DECL_vs_input( ureg, 1 )); - + if (passColor) { - /* MOV result.color0, vertex.attr[2]; */ - ureg_MOV(ureg, + /* MOV result.color0, vertex.attr[1]; */ + ureg_MOV(ureg, ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ), - ureg_DECL_vs_input( ureg, 2 )); + ureg_DECL_vs_input( ureg, 1 )); } + /* MOV result.texcoord0, vertex.attr[2]; */ + ureg_MOV(ureg, + ureg_DECL_output( ureg, texcoord_semantic, 0 ), + ureg_DECL_vs_input( ureg, 2 )); + ureg_END( ureg ); st->drawpix.vert_shaders[passColor] = @@ -347,6 +378,39 @@ make_texture(struct st_context *st, enum pipe_format pipeFormat; GLenum baseInternalFormat; +#if USE_DRAWPIXELS_CACHE + const GLint bpp = _mesa_bytes_per_pixel(format, type); + + /* Check if the glDrawPixels() parameters and state matches the cache */ + if (width == st->drawpix_cache.width && + height == st->drawpix_cache.height && + format == st->drawpix_cache.format && + type == st->drawpix_cache.type && + pixels == st->drawpix_cache.user_pointer && + !_mesa_is_bufferobj(unpack->BufferObj) && + (unpack->RowLength == 0 || unpack->RowLength == width) && + unpack->SkipPixels == 0 && + unpack->SkipRows == 0 && + unpack->SwapBytes == GL_FALSE && + st->drawpix_cache.image) { + /* check if the pixel data is the same */ + if (memcmp(pixels, st->drawpix_cache.image, width * height * bpp) == 0) { + /* OK, re-use the cached texture */ + return st->drawpix_cache.texture; + } + } + + /* discard the cached image and texture (if there is one) */ + st->drawpix_cache.width = 0; + st->drawpix_cache.height = 0; + st->drawpix_cache.user_pointer = NULL; + if (st->drawpix_cache.image) { + free(st->drawpix_cache.image); + st->drawpix_cache.image = NULL; + } + pipe_resource_reference(&st->drawpix_cache.texture, NULL); +#endif + /* Choose a pixel format for the temp texture which will hold the * image to draw. */ @@ -437,107 +501,29 @@ make_texture(struct st_context *st, _mesa_unmap_pbo_source(ctx, unpack); - return pt; -} - - -/** - * Draw quad with texcoords and optional color. - * Coords are gallium window coords with y=0=top. - * \param color may be null - * \param invertTex if true, flip texcoords vertically - */ -static void -draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z, - GLfloat x1, GLfloat y1, const GLfloat *color, - GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - GLfloat (*verts)[3][4]; /* four verts, three attribs, XYZW */ - struct pipe_resource *buf = NULL; - unsigned offset; - - u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), 4, &offset, - &buf, (void **) &verts); - if (!buf) { - return; - } - - /* setup vertex data */ - { - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; - const GLfloat fb_width = (GLfloat) fb->Width; - const GLfloat fb_height = (GLfloat) fb->Height; - const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f; - const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f; - const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f; - const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f; - const GLfloat sLeft = 0.0f, sRight = maxXcoord; - const GLfloat tTop = invertTex ? maxYcoord : 0.0f; - const GLfloat tBot = invertTex ? 0.0f : maxYcoord; - GLuint i; - - /* upper-left */ - verts[0][0][0] = clip_x0; /* v[0].attr[0].x */ - verts[0][0][1] = clip_y0; /* v[0].attr[0].y */ - - /* upper-right */ - verts[1][0][0] = clip_x1; - verts[1][0][1] = clip_y0; - - /* lower-right */ - verts[2][0][0] = clip_x1; - verts[2][0][1] = clip_y1; - - /* lower-left */ - verts[3][0][0] = clip_x0; - verts[3][0][1] = clip_y1; - - verts[0][1][0] = sLeft; /* v[0].attr[1].S */ - verts[0][1][1] = tTop; /* v[0].attr[1].T */ - verts[1][1][0] = sRight; - verts[1][1][1] = tTop; - verts[2][1][0] = sRight; - verts[2][1][1] = tBot; - verts[3][1][0] = sLeft; - verts[3][1][1] = tBot; - - /* same for all verts: */ - if (color) { - for (i = 0; i < 4; i++) { - verts[i][0][2] = z; /* v[i].attr[0].z */ - verts[i][0][3] = 1.0f; /* v[i].attr[0].w */ - verts[i][2][0] = color[0]; /* v[i].attr[2].r */ - verts[i][2][1] = color[1]; /* v[i].attr[2].g */ - verts[i][2][2] = color[2]; /* v[i].attr[2].b */ - verts[i][2][3] = color[3]; /* v[i].attr[2].a */ - verts[i][1][2] = 0.0f; /* v[i].attr[1].R */ - verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */ - } - } - else { - for (i = 0; i < 4; i++) { - verts[i][0][2] = z; /*Z*/ - verts[i][0][3] = 1.0f; /*W*/ - verts[i][1][2] = 0.0f; /*R*/ - verts[i][1][3] = 1.0f; /*Q*/ - } +#if USE_DRAWPIXELS_CACHE + /* Save the glDrawPixels parameter and image in the cache */ + if ((unpack->RowLength == 0 || unpack->RowLength == width) && + unpack->SkipPixels == 0 && + unpack->SkipRows == 0) { + st->drawpix_cache.width = width; + st->drawpix_cache.height = height; + st->drawpix_cache.format = format; + st->drawpix_cache.type = type; + st->drawpix_cache.user_pointer = pixels; + assert(!st->drawpix_cache.image); + st->drawpix_cache.image = malloc(width * height * bpp); + if (st->drawpix_cache.image) { + memcpy(st->drawpix_cache.image, pixels, width * height * bpp); } + st->drawpix_cache.texture = pt; } +#endif - u_upload_unmap(st->uploader); - util_draw_vertex_buffer(pipe, st->cso_context, buf, - cso_get_aux_vertex_buffer_slot(st->cso_context), - offset, - PIPE_PRIM_QUADS, - 4, /* verts */ - 3); /* attribs/vert */ - pipe_resource_reference(&buf, NULL); + return pt; } - static void draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, GLsizei width, GLsizei height, @@ -554,9 +540,12 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; struct cso_context *cso = st->cso_context; + const unsigned fb_width = _mesa_geometric_width(ctx->DrawBuffer); + const unsigned fb_height = _mesa_geometric_height(ctx->DrawBuffer); GLfloat x0, y0, x1, y1; GLsizei maxSize; boolean normalized = sv[0]->texture->target == PIPE_TEXTURE_2D; + unsigned cso_state_mask; assert(sv[0]->texture->target == st->internal_target); @@ -569,22 +558,19 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, assert(width <= maxSize); assert(height <= maxSize); - cso_save_rasterizer(cso); - cso_save_viewport(cso); - cso_save_fragment_samplers(cso); - cso_save_fragment_sampler_views(cso); - cso_save_fragment_shader(cso); - cso_save_stream_outputs(cso); - cso_save_vertex_shader(cso); - cso_save_tessctrl_shader(cso); - cso_save_tesseval_shader(cso); - cso_save_geometry_shader(cso); - cso_save_vertex_elements(cso); - cso_save_aux_vertex_buffer_slot(cso); + cso_state_mask = (CSO_BIT_RASTERIZER | + CSO_BIT_VIEWPORT | + CSO_BIT_FRAGMENT_SAMPLERS | + CSO_BIT_FRAGMENT_SAMPLER_VIEWS | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BITS_ALL_SHADERS); if (write_stencil) { - cso_save_depth_stencil_alpha(cso); - cso_save_blend(cso); + cso_state_mask |= (CSO_BIT_DEPTH_STENCIL_ALPHA | + CSO_BIT_BLEND); } + cso_save_state(cso, cso_state_mask); /* rasterizer state: just scissor */ { @@ -694,28 +680,17 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, } /* viewport state: viewport matching window dims */ - { - const float w = (float) ctx->DrawBuffer->Width; - const float h = (float) ctx->DrawBuffer->Height; - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * w; - vp.scale[1] = -0.5f * h; - vp.scale[2] = 0.5f; - vp.translate[0] = 0.5f * w; - vp.translate[1] = 0.5f * h; - vp.translate[2] = 0.5f; - cso_set_viewport(cso, &vp); - } + cso_set_viewport_dims(cso, fb_width, fb_height, TRUE); - cso_set_vertex_elements(cso, 3, st->velems_util_draw); - cso_set_stream_outputs(st->cso_context, 0, NULL, NULL); + cso_set_vertex_elements(cso, 3, st->util_velems); + cso_set_stream_outputs(cso, 0, NULL, NULL); /* Compute Gallium window coords (y=0=top) with pixel zoom. * Recall that these coords are transformed by the current * vertex shader and viewport transformation. */ if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) { - y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY); + y = fb_height - (int) (y + height * ctx->Pixel.ZoomY); invertTex = !invertTex; } @@ -727,27 +702,27 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ z = z * 2.0f - 1.0f; - draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex, - normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width, - normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height); + { + const float clip_x0 = x0 / (float) fb_width * 2.0f - 1.0f; + const float clip_y0 = y0 / (float) fb_height * 2.0f - 1.0f; + const float clip_x1 = x1 / (float) fb_width * 2.0f - 1.0f; + const float clip_y1 = y1 / (float) fb_height * 2.0f - 1.0f; + const float maxXcoord = normalized ? + ((float) width / sv[0]->texture->width0) : (float) width; + const float maxYcoord = normalized + ? ((float) height / sv[0]->texture->height0) : (float) height; + const float sLeft = 0.0f, sRight = maxXcoord; + const float tTop = invertTex ? maxYcoord : 0.0f; + const float tBot = invertTex ? 0.0f : maxYcoord; + + if (!st_draw_quad(st, clip_x0, clip_y0, clip_x1, clip_y1, z, + sLeft, tBot, sRight, tTop, color, 0)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels"); + } + } /* restore state */ - cso_restore_rasterizer(cso); - cso_restore_viewport(cso); - cso_restore_fragment_samplers(cso); - cso_restore_fragment_sampler_views(cso); - cso_restore_fragment_shader(cso); - cso_restore_vertex_shader(cso); - cso_restore_tessctrl_shader(cso); - cso_restore_tesseval_shader(cso); - cso_restore_geometry_shader(cso); - cso_restore_vertex_elements(cso); - cso_restore_aux_vertex_buffer_slot(cso); - cso_restore_stream_outputs(cso); - if (write_stencil) { - cso_restore_depth_stencil_alpha(cso); - cso_restore_blend(cso); - } + cso_restore_state(cso); } @@ -956,7 +931,7 @@ get_color_fp_variant(struct st_context *st) ctx->Pixel.AlphaScale != 1.0); key.pixelMaps = ctx->Pixel.MapColorFlag; key.clamp_color = st->clamp_frag_color_in_shader && - st->ctx->Color._ClampFragmentColor; + ctx->Color._ClampFragmentColor; fpv = st_get_fp_variant(st, st->fp, &key); @@ -1060,7 +1035,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, { void *driver_vp, *driver_fp; struct st_context *st = st_context(ctx); - const GLfloat *color; struct pipe_context *pipe = st->pipe; GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; struct pipe_sampler_view *sv[2] = { NULL }; @@ -1074,7 +1048,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, st_flush_bitmap_cache(st); - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); /* Limit the size of the glDrawPixels to the max texture size. * Strictly speaking, that's not correct but since we don't handle @@ -1106,7 +1080,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, driver_fp = get_drawpix_z_stencil_program(st, write_depth, write_stencil); driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); - color = ctx->Current.RasterColor; } else { fpv = get_color_fp_variant(st); @@ -1114,7 +1087,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, driver_fp = fpv->driver_shader; driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); - color = NULL; if (ctx->Pixel.MapColorFlag) { pipe_sampler_view_reference(&sv[1], st->pixel_xfer.pixelmap_sampler_view); @@ -1172,12 +1144,15 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, num_sampler_view, driver_vp, driver_fp, fpv, - color, GL_FALSE, write_depth, write_stencil); + ctx->Current.RasterColor, + GL_FALSE, write_depth, write_stencil); pipe_sampler_view_reference(&sv[0], NULL); if (num_sampler_view > 1) pipe_sampler_view_reference(&sv[1], NULL); +#if !USE_DRAWPIXELS_CACHE pipe_resource_reference(&pt, NULL); +#endif } @@ -1427,7 +1402,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, struct pipe_sampler_view *sv[2] = { NULL }; struct st_fp_variant *fpv = NULL; int num_sampler_view = 1; - GLfloat *color; enum pipe_format srcFormat; unsigned srcBind; GLboolean invertTex = GL_FALSE; @@ -1436,7 +1410,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, st_flush_bitmap_cache(st); - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); if (type == GL_DEPTH_STENCIL) { /* XXX make this more efficient */ @@ -1469,7 +1443,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, fpv = get_color_fp_variant(st); rbRead = st_get_color_read_renderbuffer(ctx); - color = NULL; driver_fp = fpv->driver_shader; driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); @@ -1490,7 +1463,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, assert(type == GL_DEPTH); rbRead = st_renderbuffer(ctx->ReadBuffer-> Attachment[BUFFER_DEPTH].Renderbuffer); - color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; driver_fp = get_drawpix_z_stencil_program(st, GL_TRUE, GL_FALSE); driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); @@ -1622,7 +1594,8 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, num_sampler_view, driver_vp, driver_fp, fpv, - color, invertTex, GL_FALSE, GL_FALSE); + ctx->Current.Attrib[VERT_ATTRIB_COLOR0], + invertTex, GL_FALSE, GL_FALSE); pipe_resource_reference(&pt, NULL); pipe_sampler_view_reference(&sv[0], NULL); diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c index e6ab77fb521..a7926295277 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.c +++ b/src/mesa/state_tracker/st_cb_drawtex.c @@ -49,6 +49,7 @@ struct cached_shader /** * Simple linear list cache. * Most of the time there'll only be one cached shader. + * XXX This should be per-st_context state. */ static struct cached_shader CachedShaders[MAX_SHADERS]; static GLuint NumCachedShaders = 0; @@ -99,13 +100,14 @@ lookup_shader(struct pipe_context *pipe, return CachedShaders[i].handle; } + static void st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, GLfloat width, GLfloat height) { struct st_context *st = ctx->st; struct pipe_context *pipe = st->pipe; - struct cso_context *cso = ctx->st->cso_context; + struct cso_context *cso = st->cso_context; struct pipe_resource *vbuffer = NULL; GLuint i, numTexCoords, numAttribs; GLboolean emitColor; @@ -116,7 +118,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, st_flush_bitmap_cache(st); - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); /* determine if we need vertex color */ if (ctx->FragmentProgram._Current->Base.InputsRead & VARYING_BIT_COL0) @@ -150,7 +152,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height; GLfloat *vbuf = NULL; - GLuint attr; + GLuint tex_attr; u_upload_alloc(st->uploader, 0, numAttribs * 4 * 4 * sizeof(GLfloat), 4, @@ -158,12 +160,12 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, if (!vbuffer) { return; } - + z = CLAMP(z, 0.0f, 1.0f); /* positions (in clip coords) */ { - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const struct gl_framebuffer *fb = ctx->DrawBuffer; const GLfloat fb_width = (GLfloat)fb->Width; const GLfloat fb_height = (GLfloat)fb->Height; @@ -190,10 +192,10 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]); semantic_names[1] = TGSI_SEMANTIC_COLOR; semantic_indexes[1] = 0; - attr = 2; + tex_attr = 2; } else { - attr = 1; + tex_attr = 1; } /* texcoords */ @@ -210,17 +212,17 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht; /*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/ - SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */ - SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */ - SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */ - SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */ + SET_ATTRIB(0, tex_attr, s0, t0, 0.0f, 1.0f); /* lower left */ + SET_ATTRIB(1, tex_attr, s1, t0, 0.0f, 1.0f); /* lower right */ + SET_ATTRIB(2, tex_attr, s1, t1, 0.0f, 1.0f); /* upper right */ + SET_ATTRIB(3, tex_attr, s0, t1, 0.0f, 1.0f); /* upper left */ - semantic_names[attr] = st->needs_texcoord_semantic ? + semantic_names[tex_attr] = st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; /* XXX: should this use semantic index i instead of 0 ? */ - semantic_indexes[attr] = 0; + semantic_indexes[tex_attr] = 0; - attr++; + tex_attr++; } } @@ -229,15 +231,14 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, #undef SET_ATTRIB } - - cso_save_viewport(cso); - cso_save_stream_outputs(cso); - cso_save_vertex_shader(cso); - cso_save_tessctrl_shader(cso); - cso_save_tesseval_shader(cso); - cso_save_geometry_shader(cso); - cso_save_vertex_elements(cso); - cso_save_aux_vertex_buffer_slot(cso); + cso_save_state(cso, (CSO_BIT_VIEWPORT | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_VERTEX_SHADER | + CSO_BIT_TESSCTRL_SHADER | + CSO_BIT_TESSEVAL_SHADER | + CSO_BIT_GEOMETRY_SHADER | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT)); { void *vs = lookup_shader(pipe, numAttribs, @@ -255,11 +256,11 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; } cso_set_vertex_elements(cso, numAttribs, velements); - cso_set_stream_outputs(st->cso_context, 0, NULL, NULL); + cso_set_stream_outputs(cso, 0, NULL, NULL); /* viewport state: viewport matching window dims */ { - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const struct gl_framebuffer *fb = ctx->DrawBuffer; const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); const GLfloat width = (GLfloat)fb->Width; const GLfloat height = (GLfloat)fb->Height; @@ -273,7 +274,6 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, cso_set_viewport(cso, &vp); } - util_draw_vertex_buffer(pipe, cso, vbuffer, cso_get_aux_vertex_buffer_slot(cso), offset, /* offset */ @@ -281,18 +281,10 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, 4, /* verts */ numAttribs); /* attribs/vert */ - pipe_resource_reference(&vbuffer, NULL); /* restore state */ - cso_restore_viewport(cso); - cso_restore_vertex_shader(cso); - cso_restore_tessctrl_shader(cso); - cso_restore_tesseval_shader(cso); - cso_restore_geometry_shader(cso); - cso_restore_vertex_elements(cso); - cso_restore_aux_vertex_buffer_slot(cso); - cso_restore_stream_outputs(cso); + cso_restore_state(cso); } diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 2a2eb0992c8..82ab914503b 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -44,6 +44,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" +#include "st_atom.h" #include "st_context.h" #include "st_cb_fbo.h" #include "st_cb_flush.h" @@ -711,9 +712,17 @@ st_ReadBuffer(struct gl_context *ctx, GLenum buffer) (void) buffer; - /* add the renderbuffer on demand */ - if (fb->_ColorReadBufferIndex >= 0) + /* Check if we need to allocate a front color buffer. + * Front buffers are often allocated on demand (other color buffers are + * always allocated in advance). + */ + if ((fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT || + fb->_ColorReadBufferIndex == BUFFER_FRONT_RIGHT) && + fb->Attachment[fb->_ColorReadBufferIndex].Type == GL_NONE) { + /* add the buffer */ st_manager_add_color_renderbuffer(st, fb, fb->_ColorReadBufferIndex); + st_validate_state(st, ST_PIPELINE_RENDER); + } } diff --git a/src/mesa/state_tracker/st_cb_msaa.c b/src/mesa/state_tracker/st_cb_msaa.c index e9955b62b8f..d581f2121b0 100644 --- a/src/mesa/state_tracker/st_cb_msaa.c +++ b/src/mesa/state_tracker/st_cb_msaa.c @@ -44,7 +44,7 @@ st_GetSamplePosition(struct gl_context *ctx, { struct st_context *st = st_context(ctx); - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); if (st->pipe->get_sample_position) st->pipe->get_sample_position(st->pipe, (unsigned) fb->Visual.samples, diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index ca493d84715..27cc0f3d154 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -74,6 +74,9 @@ st_bind_program(struct gl_context *ctx, GLenum target, struct gl_program *prog) case GL_TESS_EVALUATION_PROGRAM_NV: st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; break; + case GL_COMPUTE_PROGRAM_NV: + st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM; + break; } } @@ -92,6 +95,7 @@ st_use_program(struct gl_context *ctx, struct gl_shader_program *shProg) st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM; st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM; st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; + st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM; } @@ -123,6 +127,10 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id) struct st_tesseval_program *prog = ST_CALLOC_STRUCT(st_tesseval_program); return _mesa_init_gl_program(&prog->Base.Base, target, id); } + case GL_COMPUTE_PROGRAM_NV: { + struct st_compute_program *prog = ST_CALLOC_STRUCT(st_compute_program); + return _mesa_init_gl_program(&prog->Base.Base, target, id); + } default: assert(0); return NULL; @@ -195,6 +203,17 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi); } break; + case GL_COMPUTE_PROGRAM_NV: + { + struct st_compute_program *stcp = + (struct st_compute_program *) prog; + + st_release_cp_variants(st, stcp); + + if (stcp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi); + } + break; default: assert(0); /* problem */ } @@ -272,6 +291,17 @@ st_program_string_notify( struct gl_context *ctx, if (st->tep == sttep) st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; } + else if (target == GL_COMPUTE_PROGRAM_NV) { + struct st_compute_program *stcp = + (struct st_compute_program *) prog; + + st_release_cp_variants(st, stcp); + if (!st_translate_compute_program(st, stcp)) + return false; + + if (st->cp == stcp) + st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM; + } if (ST_DEBUG & DEBUG_PRECOMPILE || st->shader_has_one_variant[stage]) diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 747b41464ae..eec72f8a412 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -248,7 +248,7 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4]) draw_set_rasterize_stage(st->draw, st->rastpos_stage); /* make sure everything's up to date */ - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); /* This will get set only if rastpos_point(), above, gets called */ ctx->Current.RasterPosValid = GL_FALSE; diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index bb36e6969d6..5153c4bbba1 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -81,7 +81,7 @@ needs_integer_signed_unsigned_conversion(const struct gl_context *ctx, * we do here should be free in such cases. */ static void -st_readpixels(struct gl_context *ctx, GLint x, GLint y, +st_ReadPixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, @@ -104,7 +104,7 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, /* Validate state (to be sure we have up-to-date framebuffer surfaces) * and flush the bitmap cache prior to reading. */ - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); st_flush_bitmap_cache(st); if (!st->prefer_blit_based_texture_transfer) { @@ -257,5 +257,5 @@ fallback: void st_init_readpixels_functions(struct dd_function_table *functions) { - functions->ReadPixels = st_readpixels; + functions->ReadPixels = st_ReadPixels; } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index d53126a9441..cfec627f10c 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1272,10 +1272,11 @@ create_pbo_upload_fs(struct st_context *st) ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); } + /* temp0.w = 0 */ + ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0)); + /* out = txf(sampler, temp0.x) */ - ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, - ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X), - sampler); + ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler); ureg_release_temporary(ureg, temp0); @@ -1297,6 +1298,7 @@ try_pbo_upload_common(struct gl_context *ctx, unsigned image_height) { struct st_context *st = st_context(ctx); + struct cso_context *cso = st->cso_context; struct pipe_context *pipe = st->pipe; unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1; unsigned skip_pixels = 0; @@ -1333,6 +1335,20 @@ try_pbo_upload_common(struct gl_context *ctx, return false; } + cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS | + CSO_BIT_FRAGMENT_SAMPLERS | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BIT_FRAMEBUFFER | + CSO_BIT_VIEWPORT | + CSO_BIT_BLEND | + CSO_BIT_DEPTH_STENCIL_ALPHA | + CSO_BIT_RASTERIZER | + CSO_BIT_STREAM_OUTPUTS | + CSO_BITS_ALL_SHADERS)); + cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT); + + /* Set up the sampler_view */ { unsigned first_element = buf_offset; @@ -1340,14 +1356,17 @@ try_pbo_upload_common(struct gl_context *ctx, + (upload_height - 1 + (depth - 1) * image_height) * stride; struct pipe_sampler_view templ; struct pipe_sampler_view *sampler_view; + struct pipe_sampler_state sampler = {0}; + const struct pipe_sampler_state *samplers[1] = {&sampler}; /* This should be ensured by Mesa before calling our callbacks */ assert((last_element + 1) * bytes_per_pixel <= buffer->width0); if (last_element - first_element > ctx->Const.MaxTextureBufferSize - 1) - return false; + goto fail; memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_BUFFER; templ.format = src_format; templ.u.buf.first_element = first_element; templ.u.buf.last_element = last_element; @@ -1358,13 +1377,13 @@ try_pbo_upload_common(struct gl_context *ctx, sampler_view = pipe->create_sampler_view(pipe, buffer, &templ); if (sampler_view == NULL) - return false; + goto fail; - cso_save_fragment_sampler_views(st->cso_context); - cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1, - &sampler_view); + cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view); pipe_sampler_view_reference(&sampler_view, NULL); + + cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers); } /* Upload vertices */ @@ -1386,7 +1405,7 @@ try_pbo_upload_common(struct gl_context *ctx, u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4, &vbo.buffer_offset, &vbo.buffer, (void **) &verts); if (!verts) - goto fail_vertex_upload; + goto fail; verts[0] = x0; verts[1] = y0; @@ -1401,30 +1420,28 @@ try_pbo_upload_common(struct gl_context *ctx, velem.src_offset = 0; velem.instance_divisor = 0; - velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(st->cso_context); + velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(cso); velem.src_format = PIPE_FORMAT_R32G32_FLOAT; - cso_save_vertex_elements(st->cso_context); - cso_set_vertex_elements(st->cso_context, 1, &velem); + cso_set_vertex_elements(cso, 1, &velem); - cso_save_aux_vertex_buffer_slot(st->cso_context); - cso_set_vertex_buffers(st->cso_context, velem.vertex_buffer_index, - 1, &vbo); + cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo); pipe_resource_reference(&vbo.buffer, NULL); } /* Upload constants */ + /* Note: the user buffer must be valid until draw time */ + struct { + int32_t xoffset; + int32_t yoffset; + int32_t stride; + int32_t image_size; + } constants; + { struct pipe_constant_buffer cb; - struct { - int32_t xoffset; - int32_t yoffset; - int32_t stride; - int32_t image_size; - } constants; - constants.xoffset = -xoffset + skip_pixels; constants.yoffset = -yoffset; constants.stride = stride; @@ -1434,10 +1451,10 @@ try_pbo_upload_common(struct gl_context *ctx, cb.buffer = NULL; cb.user_buffer = NULL; u_upload_data(st->constbuf_uploader, 0, sizeof(constants), - st->ctx->Const.UniformBufferOffsetAlignment, + ctx->Const.UniformBufferOffsetAlignment, &constants, &cb.buffer_offset, &cb.buffer); if (!cb.buffer) - goto fail_constant_upload; + goto fail; u_upload_unmap(st->constbuf_uploader); } else { @@ -1447,8 +1464,7 @@ try_pbo_upload_common(struct gl_context *ctx, } cb.buffer_size = sizeof(constants); - cso_save_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT); - cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, &cb); + cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb); pipe_resource_reference(&cb.buffer, NULL); } @@ -1462,80 +1478,52 @@ try_pbo_upload_common(struct gl_context *ctx, fb.nr_cbufs = 1; pipe_surface_reference(&fb.cbufs[0], surface); - cso_save_framebuffer(st->cso_context); - cso_set_framebuffer(st->cso_context, &fb); + cso_set_framebuffer(cso, &fb); pipe_surface_reference(&fb.cbufs[0], NULL); } - /* Viewport state */ - { - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * surface->width; - vp.scale[1] = 0.5f * surface->height; - vp.scale[2] = 1.0f; - vp.translate[0] = 0.5f * surface->width; - vp.translate[1] = 0.5f * surface->height; - vp.translate[2] = 0.0f; - - cso_save_viewport(st->cso_context); - cso_set_viewport(st->cso_context, &vp); - } + cso_set_viewport_dims(cso, surface->width, surface->height, FALSE); /* Blend state */ - cso_save_blend(st->cso_context); - cso_set_blend(st->cso_context, &st->pbo_upload.blend); + cso_set_blend(cso, &st->pbo_upload.blend); + + /* Depth/stencil/alpha state */ + { + struct pipe_depth_stencil_alpha_state dsa; + memset(&dsa, 0, sizeof(dsa)); + cso_set_depth_stencil_alpha(cso, &dsa); + } /* Rasterizer state */ - cso_save_rasterizer(st->cso_context); - cso_set_rasterizer(st->cso_context, &st->pbo_upload.raster); + cso_set_rasterizer(cso, &st->pbo_upload.raster); /* Set up the shaders */ - cso_save_vertex_shader(st->cso_context); - cso_set_vertex_shader_handle(st->cso_context, st->pbo_upload.vs); + cso_set_vertex_shader_handle(cso, st->pbo_upload.vs); - cso_save_geometry_shader(st->cso_context); - cso_set_geometry_shader_handle(st->cso_context, - depth != 1 ? st->pbo_upload.gs : NULL); + cso_set_geometry_shader_handle(cso, depth != 1 ? st->pbo_upload.gs : NULL); - cso_save_tessctrl_shader(st->cso_context); - cso_set_tessctrl_shader_handle(st->cso_context, NULL); + cso_set_tessctrl_shader_handle(cso, NULL); - cso_save_tesseval_shader(st->cso_context); - cso_set_tesseval_shader_handle(st->cso_context, NULL); + cso_set_tesseval_shader_handle(cso, NULL); - cso_save_fragment_shader(st->cso_context); - cso_set_fragment_shader_handle(st->cso_context, st->pbo_upload.fs); + cso_set_fragment_shader_handle(cso, st->pbo_upload.fs); /* Disable stream output */ - cso_save_stream_outputs(st->cso_context); - cso_set_stream_outputs(st->cso_context, 0, NULL, 0); + cso_set_stream_outputs(cso, 0, NULL, 0); if (depth == 1) { - cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); } else { - cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, + cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4, 0, depth); } success = true; - cso_restore_framebuffer(st->cso_context); - cso_restore_viewport(st->cso_context); - cso_restore_blend(st->cso_context); - cso_restore_rasterizer(st->cso_context); - cso_restore_vertex_shader(st->cso_context); - cso_restore_geometry_shader(st->cso_context); - cso_restore_tessctrl_shader(st->cso_context); - cso_restore_tesseval_shader(st->cso_context); - cso_restore_fragment_shader(st->cso_context); - cso_restore_stream_outputs(st->cso_context); - cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT); -fail_constant_upload: - cso_restore_vertex_elements(st->cso_context); - cso_restore_aux_vertex_buffer_slot(st->cso_context); -fail_vertex_upload: - cso_restore_fragment_sampler_views(st->cso_context); +fail: + cso_restore_state(cso); + cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT); return success; } @@ -2752,7 +2740,7 @@ st_finalize_texture(struct gl_context *ctx, { struct st_context *st = st_context(ctx); struct st_texture_object *stObj = st_texture_object(tObj); - const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + const GLuint nr_faces = _mesa_num_tex_faces(stObj->base.Target); GLuint face; const struct st_texture_image *firstImage; enum pipe_format firstImageFormat; diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 9016846b148..e3ddee660f7 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -43,6 +43,7 @@ #include "st_cb_blit.h" #include "st_cb_bufferobjects.h" #include "st_cb_clear.h" +#include "st_cb_compute.h" #include "st_cb_condrender.h" #include "st_cb_copyimage.h" #include "st_cb_drawpixels.h" @@ -138,8 +139,11 @@ void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state) st->dirty.st |= ST_NEW_VERTEX_PROGRAM; } + /* Invalidate render and compute pipelines. */ st->dirty.mesa |= new_state; st->dirty.st |= ST_NEW_MESA; + st->dirty_cp.mesa |= new_state; + st->dirty_cp.st |= ST_NEW_MESA; /* This is the only core Mesa module we depend upon. * No longer use swrast, swsetup, tnl. @@ -182,6 +186,10 @@ st_destroy_context_priv(struct st_context *st) u_upload_destroy(st->constbuf_uploader); } + /* free glDrawPixels cache data */ + free(st->drawpix_cache.image); + pipe_resource_reference(&st->drawpix_cache.texture, NULL); + cso_destroy_context(st->cso_context); free( st ); } @@ -208,8 +216,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, /* state tracker needs the VBO module */ _vbo_CreateContext(ctx); + /* Initialize render and compute pipelines flags */ st->dirty.mesa = ~0; st->dirty.st = ~0; + st->dirty_cp.mesa = ~0; + st->dirty_cp.st = ~0; /* Create upload manager for vertex data for glBitmap, glDrawPixels, * glClear, etc. @@ -241,16 +252,30 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, else st->internal_target = PIPE_TEXTURE_RECT; - /* Vertex element objects used for drawing rectangles for glBitmap, - * glDrawPixels, glClear, etc. + /* Setup vertex element info for 'struct st_util_vertex'. */ - for (i = 0; i < ARRAY_SIZE(st->velems_util_draw); i++) { - memset(&st->velems_util_draw[i], 0, sizeof(struct pipe_vertex_element)); - st->velems_util_draw[i].src_offset = i * 4 * sizeof(float); - st->velems_util_draw[i].instance_divisor = 0; - st->velems_util_draw[i].vertex_buffer_index = - cso_get_aux_vertex_buffer_slot(st->cso_context); - st->velems_util_draw[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + { + const unsigned slot = cso_get_aux_vertex_buffer_slot(st->cso_context); + + /* If this assertion ever fails all state tracker calls to + * cso_get_aux_vertex_buffer_slot() should be audited. This + * particular call would have to be moved to just before each + * drawing call. + */ + assert(slot == 0); + + STATIC_ASSERT(sizeof(struct st_util_vertex) == 9 * sizeof(float)); + + memset(&st->util_velems, 0, sizeof(st->util_velems)); + st->util_velems[0].src_offset = 0; + st->util_velems[0].vertex_buffer_index = slot; + st->util_velems[0].src_format = PIPE_FORMAT_R32G32B32_FLOAT; + st->util_velems[1].src_offset = 3 * sizeof(float); + st->util_velems[1].vertex_buffer_index = slot; + st->util_velems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + st->util_velems[2].src_offset = 7 * sizeof(float); + st->util_velems[2].vertex_buffer_index = slot; + st->util_velems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; } /* we want all vertex data to be placed in buffer objects */ @@ -262,9 +287,9 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, /* Need these flags: */ - st->ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - st->ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; st->has_stencil_export = screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT); @@ -328,8 +353,8 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, /* called after _mesa_create_context/_mesa_init_point, fix default user * settable max point size up */ - st->ctx->Point.MaxSize = MAX2(ctx->Const.MaxPointSize, - ctx->Const.MaxPointSizeAA); + ctx->Point.MaxSize = MAX2(ctx->Const.MaxPointSize, + ctx->Const.MaxPointSizeAA); /* For vertex shaders, make sure not to emit saturate when SM 3.0 is not supported */ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoSat = !st->has_shader_model3; @@ -377,6 +402,7 @@ static void st_init_driver_flags(struct gl_driver_flags *f) f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS; f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER; f->NewShaderStorageBuffer = ST_NEW_STORAGE_BUFFER; + f->NewImageUnits = ST_NEW_IMAGE_UNITS; } struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, @@ -440,6 +466,7 @@ void st_destroy_context( struct st_context *st ) st_reference_vertprog(st, &st->vp, NULL); st_reference_tesscprog(st, &st->tcp, NULL); st_reference_tesseprog(st, &st->tep, NULL); + st_reference_compprog(st, &st->cp, NULL); /* release framebuffer surfaces */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { @@ -449,7 +476,7 @@ void st_destroy_context( struct st_context *st ) pipe_sampler_view_reference(&st->pixel_xfer.pixelmap_sampler_view, NULL); pipe_resource_reference(&st->pixel_xfer.pixelmap_texture, NULL); - _vbo_DestroyContext(st->ctx); + _vbo_DestroyContext(ctx); st_destroy_program_variants(st); @@ -503,6 +530,7 @@ void st_init_driver_functions(struct pipe_screen *screen, st_init_flush_functions(screen, functions); st_init_string_functions(functions); st_init_viewport_functions(functions); + st_init_compute_functions(functions); st_init_xformfb_functions(functions); st_init_syncobj_functions(functions); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 9a80f4bae70..f960c64cbe8 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -64,6 +64,8 @@ struct u_upload_mgr; #define ST_NEW_SAMPLER_VIEWS (1 << 11) #define ST_NEW_ATOMIC_BUFFER (1 << 12) #define ST_NEW_STORAGE_BUFFER (1 << 13) +#define ST_NEW_COMPUTE_PROGRAM (1 << 14) +#define ST_NEW_IMAGE_UNITS (1 << 15) struct st_state_flags { @@ -78,6 +80,23 @@ struct st_tracked_state { }; +/** + * Enumeration of state tracker pipelines. + */ +enum st_pipeline { + ST_PIPELINE_RENDER, + ST_PIPELINE_COMPUTE, +}; + + +/** For drawing quads for glClear, glDraw/CopyPixels, glBitmap, etc. */ +struct st_util_vertex +{ + float x, y, z; + float r, g, b, a; + float s, t; +}; + struct st_context { @@ -153,6 +172,7 @@ struct st_context char renderer[100]; struct st_state_flags dirty; + struct st_state_flags dirty_cp; GLboolean vertdata_edgeflags; GLboolean edgeflag_culls_prims; @@ -165,12 +185,14 @@ struct st_context struct st_geometry_program *gp; /**< Currently bound geometry program */ struct st_tessctrl_program *tcp; /**< Currently bound tess control program */ struct st_tesseval_program *tep; /**< Currently bound tess eval program */ + struct st_compute_program *cp; /**< Currently bound compute program */ struct st_vp_variant *vp_variant; struct st_fp_variant *fp_variant; struct st_basic_variant *gp_variant; struct st_basic_variant *tcp_variant; struct st_basic_variant *tep_variant; + struct st_basic_variant *cp_variant; struct gl_texture_object *default_texture; @@ -183,6 +205,7 @@ struct st_context struct { struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; + struct pipe_sampler_state atlas_sampler; enum pipe_format tex_format; void *vs; struct bitmap_cache *cache; @@ -194,6 +217,14 @@ struct st_context void *vert_shaders[2]; /**< ureg shaders */ } drawpix; + struct { + GLsizei width, height; + GLenum format, type; + const void *user_pointer; /**< Last user 'pixels' pointer */ + void *image; /**< Copy of the glDrawPixels image data */ + struct pipe_resource *texture; + } drawpix_cache; + /** for glClear */ struct { struct pipe_rasterizer_state raster; @@ -217,8 +248,8 @@ struct st_context bool use_gs; } pbo_upload; - /** used for anything using util_draw_vertex_buffer */ - struct pipe_vertex_element velems_util_draw[3]; + /** for drawing with st_util_vertex */ + struct pipe_vertex_element util_velems[3]; void *passthrough_fs; /**< simple pass-through frag shader */ diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 10e294cd147..2de6620602d 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -60,7 +60,7 @@ #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_prim.h" -#include "util/u_draw_quad.h" +#include "util/u_draw.h" #include "util/u_upload_mgr.h" #include "draw/draw_context.h" #include "cso_cache/cso_context.h" @@ -202,7 +202,7 @@ st_draw_vbo(struct gl_context *ctx, /* Validate state. */ if (st->dirty.st || ctx->NewDriverState) { - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); #if 0 if (MESA_VERBOSE & VERBOSE_GLSL) { @@ -315,7 +315,7 @@ st_indirect_draw_vbo(struct gl_context *ctx, /* Validate state. */ if (st->dirty.st || ctx->NewDriverState) { - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); } if (st->vertex_array_out_of_memory) { @@ -398,3 +398,93 @@ st_destroy_draw(struct st_context *st) { draw_destroy(st->draw); } + + +/** + * Draw a quad with given position, texcoords and color. + */ +bool +st_draw_quad(struct st_context *st, + float x0, float y0, float x1, float y1, float z, + float s0, float t0, float s1, float t1, + const float *color, + unsigned num_instances) +{ + struct pipe_vertex_buffer vb = {0}; + struct st_util_vertex *verts; + + vb.stride = sizeof(struct st_util_vertex); + + u_upload_alloc(st->uploader, 0, 4 * sizeof(struct st_util_vertex), 4, + &vb.buffer_offset, &vb.buffer, (void **) &verts); + if (!vb.buffer) { + return false; + } + + /* lower-left */ + verts[0].x = x0; + verts[0].y = y1; + verts[0].z = z; + verts[0].r = color[0]; + verts[0].g = color[1]; + verts[0].b = color[2]; + verts[0].a = color[3]; + verts[0].s = s0; + verts[0].t = t0; + + /* lower-right */ + verts[1].x = x1; + verts[1].y = y1; + verts[1].z = z; + verts[1].r = color[0]; + verts[1].g = color[1]; + verts[1].b = color[2]; + verts[1].a = color[3]; + verts[1].s = s1; + verts[1].t = t0; + + /* upper-right */ + verts[2].x = x1; + verts[2].y = y0; + verts[2].z = z; + verts[2].r = color[0]; + verts[2].g = color[1]; + verts[2].b = color[2]; + verts[2].a = color[3]; + verts[2].s = s1; + verts[2].t = t1; + + /* upper-left */ + verts[3].x = x0; + verts[3].y = y0; + verts[3].z = z; + verts[3].r = color[0]; + verts[3].g = color[1]; + verts[3].b = color[2]; + verts[3].a = color[3]; + verts[3].s = s0; + verts[3].t = t1; + + u_upload_unmap(st->uploader); + + /* At the time of writing, cso_get_aux_vertex_buffer_slot() always returns + * zero. If that ever changes we need to audit the calls to that function + * and make sure the slot number is used consistently everywhere. + */ + assert(cso_get_aux_vertex_buffer_slot(st->cso_context) == 0); + + cso_set_vertex_buffers(st->cso_context, + cso_get_aux_vertex_buffer_slot(st->cso_context), + 1, &vb); + + if (num_instances > 1) { + cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_FAN, 0, 4, + 0, num_instances); + } else { + cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_FAN, 0, 4); + } + + pipe_resource_reference(&vb.buffer, NULL); + + return true; +} diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h index a973c8a4a5d..d85c3b7facd 100644 --- a/src/mesa/state_tracker/st_draw.h +++ b/src/mesa/state_tracker/st_draw.h @@ -85,4 +85,11 @@ pointer_to_offset(const void *ptr) } +bool +st_draw_quad(struct st_context *st, + float x0, float y0, float x1, float y1, float z, + float s0, float t0, float s1, float t1, + const float *color, + unsigned num_instances); + #endif diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index b6e6dea5b27..9f48945d74d 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -140,7 +140,7 @@ st_feedback_draw_vbo(struct gl_context *ctx, st_flush_bitmap_cache(st); - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); if (!index_bounds_valid) vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index feabe6290eb..24c64447f44 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -75,6 +75,7 @@ static int _clamp(int a, int min, int max) void st_init_limits(struct pipe_screen *screen, struct gl_constants *c, struct gl_extensions *extensions) { + int supported_irs; unsigned sh; boolean can_ubo = TRUE; @@ -174,9 +175,19 @@ void st_init_limits(struct pipe_screen *screen, pc = &c->Program[MESA_SHADER_TESS_EVAL]; options = &c->ShaderCompilerOptions[MESA_SHADER_TESS_EVAL]; break; + case PIPE_SHADER_COMPUTE: + pc = &c->Program[MESA_SHADER_COMPUTE]; + options = &c->ShaderCompilerOptions[MESA_SHADER_COMPUTE]; + + if (!screen->get_param(screen, PIPE_CAP_COMPUTE)) + continue; + supported_irs = + screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUPPORTED_IRS); + if (!(supported_irs & (1 << PIPE_SHADER_IR_TGSI))) + continue; + break; default: - /* compute shader, etc. */ - continue; + assert(0); } pc->MaxTextureImageUnits = @@ -223,6 +234,9 @@ void st_init_limits(struct pipe_screen *screen, screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2; pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers; + pc->MaxImageUniforms = screen->get_shader_param( + screen, sh, PIPE_SHADER_CAP_MAX_SHADER_IMAGES); + /* Gallium doesn't really care about local vs. env parameters so use the * same limits. */ @@ -261,6 +275,9 @@ void st_init_limits(struct pipe_screen *screen, options->LowerClipDistance = true; options->LowerBufferInterfaceBlocks = true; + + if (sh == PIPE_SHADER_COMPUTE) + options->LowerShaderSharedVariables = true; } c->LowerTessLevel = true; @@ -270,7 +287,8 @@ void st_init_limits(struct pipe_screen *screen, c->Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits + c->Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits + c->Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits + - c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits, + c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits + + c->Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits, MAX_COMBINED_TEXTURE_IMAGE_UNITS); /* This depends on program constants. */ @@ -330,7 +348,8 @@ void st_init_limits(struct pipe_screen *screen, c->Program[MESA_SHADER_TESS_CTRL].MaxUniformBlocks + c->Program[MESA_SHADER_TESS_EVAL].MaxUniformBlocks + c->Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks + - c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks; + c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks + + c->Program[MESA_SHADER_COMPUTE].MaxUniformBlocks; assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS); } @@ -363,6 +382,21 @@ void st_init_limits(struct pipe_screen *screen, c->MaxShaderStorageBlockSize = 1 << 27; extensions->ARB_shader_storage_buffer_object = GL_TRUE; } + + c->MaxCombinedImageUniforms = + c->Program[MESA_SHADER_VERTEX].MaxImageUniforms + + c->Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms + + c->Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms + + c->Program[MESA_SHADER_GEOMETRY].MaxImageUniforms + + c->Program[MESA_SHADER_FRAGMENT].MaxImageUniforms + + c->Program[MESA_SHADER_COMPUTE].MaxImageUniforms; + c->MaxCombinedShaderOutputResources += c->MaxCombinedImageUniforms; + c->MaxImageUnits = MAX_IMAGE_UNITS; + c->MaxImageSamples = 0; /* XXX */ + if (c->MaxCombinedImageUniforms) { + extensions->ARB_shader_image_load_store = GL_TRUE; + extensions->ARB_shader_image_size = GL_TRUE; + } } @@ -1012,4 +1046,31 @@ void st_init_extensions(struct pipe_screen *screen, if ((ST_DEBUG & DEBUG_GREMEDY) && screen->get_param(screen, PIPE_CAP_STRING_MARKER)) extensions->GREMEDY_string_marker = GL_TRUE; + + if (screen->get_param(screen, PIPE_CAP_COMPUTE)) { + int compute_supported_irs = + screen->get_shader_param(screen, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_SUPPORTED_IRS); + if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) { + uint64_t grid_size[3], block_size[3]; + + screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE, + grid_size); + screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, + block_size); + screen->get_compute_param(screen, + PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, + &consts->MaxComputeWorkGroupInvocations); + screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, + &consts->MaxComputeSharedMemorySize); + + for (i = 0; i < 3; i++) { + consts->MaxComputeWorkGroupCount[i] = grid_size[i]; + consts->MaxComputeWorkGroupSize[i] = block_size[i]; + } + /* XXX: ARB_compute_shader is not enabled by default because images + * support is still not implemented yet. */ + /* extensions->ARB_compute_shader = true; */ + } + } } diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 2b92bade440..82bf3a185ad 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -1484,6 +1484,48 @@ static const struct format_mapping format_map[] = { { PIPE_FORMAT_ETC1_RGB8, 0 } }, + /* ETC2 */ + { + { GL_COMPRESSED_RGB8_ETC2, 0 }, + { PIPE_FORMAT_ETC2_RGB8, 0 } + }, + { + { GL_COMPRESSED_SRGB8_ETC2, 0 }, + { PIPE_FORMAT_ETC2_SRGB8, 0 } + }, + { + { GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, 0 }, + { PIPE_FORMAT_ETC2_RGB8A1, 0 } + }, + { + { GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2, 0 }, + { PIPE_FORMAT_ETC2_SRGB8A1, 0 } + }, + { + { GL_COMPRESSED_RGBA8_ETC2_EAC, 0 }, + { PIPE_FORMAT_ETC2_RGBA8, 0 } + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC, 0 }, + { PIPE_FORMAT_ETC2_SRGBA8, 0 } + }, + { + { GL_COMPRESSED_R11_EAC, 0 }, + { PIPE_FORMAT_ETC2_R11_UNORM, 0 } + }, + { + { GL_COMPRESSED_SIGNED_R11_EAC, 0 }, + { PIPE_FORMAT_ETC2_R11_SNORM, 0 } + }, + { + { GL_COMPRESSED_RG11_EAC, 0 }, + { PIPE_FORMAT_ETC2_RG11_UNORM, 0 } + }, + { + { GL_COMPRESSED_SIGNED_RG11_EAC, 0 }, + { PIPE_FORMAT_ETC2_RG11_SNORM, 0 } + }, + /* BPTC */ { { GL_COMPRESSED_RGBA_BPTC_UNORM, 0 }, diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h index 90e00e8ebf6..3e10aa64bc6 100644 --- a/src/mesa/state_tracker/st_format.h +++ b/src/mesa/state_tracker/st_format.h @@ -36,6 +36,10 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" +#ifdef __cplusplus +extern "C" { +#endif + struct gl_context; struct pipe_screen; @@ -77,4 +81,8 @@ st_translate_color(const union gl_color_union *colorIn, union pipe_color_union *colorOut, GLenum baseFormat, GLboolean is_integer); +#ifdef __cplusplus +} +#endif + #endif /* ST_FORMAT_H */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 2ad91ecf4df..943582d447a 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -40,6 +40,7 @@ #include "main/shaderobj.h" #include "main/uniforms.h" #include "main/shaderapi.h" +#include "main/shaderimage.h" #include "program/prog_instruction.h" #include "pipe/p_context.h" @@ -50,6 +51,7 @@ #include "util/u_memory.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" +#include "st_format.h" #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ @@ -262,6 +264,7 @@ public: int tex_target; /**< One of TEXTURE_*_INDEX */ glsl_base_type tex_type; GLboolean tex_shadow; + unsigned image_format; st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned tex_offset_num_offset; @@ -395,6 +398,9 @@ public: glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ int buffers_used; + int images_used; + int image_targets[PIPE_MAX_SHADER_IMAGES]; + unsigned image_formats[PIPE_MAX_SHADER_IMAGES]; bool indirect_addr_consts; int wpos_transform_const; @@ -402,6 +408,7 @@ public: bool native_integers; bool have_sqrt; bool have_fma; + bool use_shared_memory; variable_storage *find_variable_storage(ir_variable *var); @@ -451,6 +458,8 @@ public: void visit_atomic_counter_intrinsic(ir_call *); void visit_ssbo_intrinsic(ir_call *); void visit_membar_intrinsic(ir_call *); + void visit_shared_intrinsic(ir_call *); + void visit_image_intrinsic(ir_call *); st_src_reg result; @@ -1214,6 +1223,7 @@ attrib_type_size(const struct glsl_type *type, bool is_vs_input) case GLSL_TYPE_INTERFACE: case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: + case GLSL_TYPE_FUNCTION: assert(!"Invalid type in type_size"); break; } @@ -1969,6 +1979,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_u2i: /* Converting between signed and unsigned integers is a no-op. */ result_src = op[0]; + result_src.type = result_dst.type; break; case ir_unop_b2i: if (native_integers) { @@ -3341,6 +3352,239 @@ glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir) } void +glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + exec_node *param = ir->actual_parameters.get_head(); + + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT); + + /* Calculate the surface offset */ + offset->accept(this); + st_src_reg off = this->result; + + st_dst_reg dst = undef_dst; + if (ir->return_deref) { + ir->return_deref->accept(this); + dst = st_dst_reg(this->result); + dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; + } + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_load_shared", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); + inst->buffer = buffer; + } else if (!strcmp("__intrinsic_store_shared", callee)) { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + dst.writemask = write_mask->value.u[0]; + + dst.type = this->result.type; + inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); + inst->buffer = buffer; + } else { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + st_src_reg data = this->result, data2 = undef_src; + unsigned opcode; + if (!strcmp("__intrinsic_atomic_add_shared", callee)) + opcode = TGSI_OPCODE_ATOMUADD; + else if (!strcmp("__intrinsic_atomic_min_shared", callee)) + opcode = TGSI_OPCODE_ATOMIMIN; + else if (!strcmp("__intrinsic_atomic_max_shared", callee)) + opcode = TGSI_OPCODE_ATOMIMAX; + else if (!strcmp("__intrinsic_atomic_and_shared", callee)) + opcode = TGSI_OPCODE_ATOMAND; + else if (!strcmp("__intrinsic_atomic_or_shared", callee)) + opcode = TGSI_OPCODE_ATOMOR; + else if (!strcmp("__intrinsic_atomic_xor_shared", callee)) + opcode = TGSI_OPCODE_ATOMXOR; + else if (!strcmp("__intrinsic_atomic_exchange_shared", callee)) + opcode = TGSI_OPCODE_ATOMXCHG; + else if (!strcmp("__intrinsic_atomic_comp_swap_shared", callee)) { + opcode = TGSI_OPCODE_ATOMCAS; + param = param->get_next(); + val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + data2 = this->result; + } else { + assert(!"Unexpected intrinsic"); + return; + } + + inst = emit_asm(ir, opcode, dst, off, data, data2); + inst->buffer = buffer; + } +} + +void +glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + exec_node *param = ir->actual_parameters.get_head(); + + ir_dereference *img = (ir_dereference *)param; + const ir_variable *imgvar = img->variable_referenced(); + const glsl_type *type = imgvar->type->without_array(); + unsigned sampler_array_size = 1, sampler_base = 0; + + st_src_reg reladdr; + st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); + + get_deref_offsets(img, &sampler_array_size, &sampler_base, + (unsigned int *)&image.index, &reladdr); + if (reladdr.file != PROGRAM_UNDEFINED) { + emit_arl(ir, sampler_reladdr, reladdr); + image.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(image.reladdr, &sampler_reladdr, sizeof(reladdr)); + } + + st_dst_reg dst = undef_dst; + if (ir->return_deref) { + ir->return_deref->accept(this); + dst = st_dst_reg(this->result); + dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; + } + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_image_size", callee)) { + dst.writemask = WRITEMASK_XYZ; + inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst); + } else if (!strcmp("__intrinsic_image_samples", callee)) { + st_src_reg res = get_temp(glsl_type::ivec4_type); + st_dst_reg dstres = st_dst_reg(res); + dstres.writemask = WRITEMASK_W; + emit_asm(ir, TGSI_OPCODE_RESQ, dstres); + res.swizzle = SWIZZLE_WWWW; + inst = emit_asm(ir, TGSI_OPCODE_MOV, dst, res); + } else { + st_src_reg arg1 = undef_src, arg2 = undef_src; + st_src_reg coord; + st_dst_reg coord_dst; + coord = get_temp(glsl_type::ivec4_type); + coord_dst = st_dst_reg(coord); + coord_dst.writemask = (1 << type->coordinate_components()) - 1; + param = param->get_next(); + ((ir_dereference *)param)->accept(this); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + coord.swizzle = SWIZZLE_XXXX; + switch (type->coordinate_components()) { + case 4: assert(!"unexpected coord count"); + /* fallthrough */ + case 3: coord.swizzle |= SWIZZLE_Z << 6; + /* fallthrough */ + case 2: coord.swizzle |= SWIZZLE_Y << 3; + } + + if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + param = param->get_next(); + ((ir_dereference *)param)->accept(this); + st_src_reg sample = this->result; + sample.swizzle = SWIZZLE_XXXX; + coord_dst.writemask = WRITEMASK_W; + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample); + coord.swizzle |= SWIZZLE_W << 9; + } + + param = param->get_next(); + if (!param->is_tail_sentinel()) { + ((ir_dereference *)param)->accept(this); + arg1 = this->result; + param = param->get_next(); + } + + if (!param->is_tail_sentinel()) { + ((ir_dereference *)param)->accept(this); + arg2 = this->result; + param = param->get_next(); + } + + assert(param->is_tail_sentinel()); + + unsigned opcode; + if (!strcmp("__intrinsic_image_load", callee)) + opcode = TGSI_OPCODE_LOAD; + else if (!strcmp("__intrinsic_image_store", callee)) + opcode = TGSI_OPCODE_STORE; + else if (!strcmp("__intrinsic_image_atomic_add", callee)) + opcode = TGSI_OPCODE_ATOMUADD; + else if (!strcmp("__intrinsic_image_atomic_min", callee)) + opcode = TGSI_OPCODE_ATOMIMIN; + else if (!strcmp("__intrinsic_image_atomic_max", callee)) + opcode = TGSI_OPCODE_ATOMIMAX; + else if (!strcmp("__intrinsic_image_atomic_and", callee)) + opcode = TGSI_OPCODE_ATOMAND; + else if (!strcmp("__intrinsic_image_atomic_or", callee)) + opcode = TGSI_OPCODE_ATOMOR; + else if (!strcmp("__intrinsic_image_atomic_xor", callee)) + opcode = TGSI_OPCODE_ATOMXOR; + else if (!strcmp("__intrinsic_image_atomic_exchange", callee)) + opcode = TGSI_OPCODE_ATOMXCHG; + else if (!strcmp("__intrinsic_image_atomic_comp_swap", callee)) + opcode = TGSI_OPCODE_ATOMCAS; + else { + assert(!"Unexpected intrinsic"); + return; + } + + inst = emit_asm(ir, opcode, dst, coord, arg1, arg2); + if (opcode == TGSI_OPCODE_STORE) + inst->dst[0].writemask = WRITEMASK_XYZW; + } + + inst->buffer = image; + inst->sampler_array_size = sampler_array_size; + inst->sampler_base = sampler_base; + + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + inst->tex_target = (type->sampler_array) + ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + break; + case GLSL_SAMPLER_DIM_2D: + inst->tex_target = (type->sampler_array) + ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + break; + case GLSL_SAMPLER_DIM_3D: + inst->tex_target = TEXTURE_3D_INDEX; + break; + case GLSL_SAMPLER_DIM_CUBE: + inst->tex_target = (type->sampler_array) + ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; + break; + case GLSL_SAMPLER_DIM_RECT: + inst->tex_target = TEXTURE_RECT_INDEX; + break; + case GLSL_SAMPLER_DIM_BUF: + inst->tex_target = TEXTURE_BUFFER_INDEX; + break; + case GLSL_SAMPLER_DIM_EXTERNAL: + inst->tex_target = TEXTURE_EXTERNAL_INDEX; + break; + case GLSL_SAMPLER_DIM_MS: + inst->tex_target = (type->sampler_array) + ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; + break; + default: + assert(!"Should not get here."); + } + + inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx), + _mesa_get_shader_image_format(imgvar->data.image_format)); +} + +void glsl_to_tgsi_visitor::visit(ir_call *ir) { glsl_to_tgsi_instruction *call_inst; @@ -3381,6 +3625,36 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) return; } + if (!strcmp("__intrinsic_load_shared", callee) || + !strcmp("__intrinsic_store_shared", callee) || + !strcmp("__intrinsic_atomic_add_shared", callee) || + !strcmp("__intrinsic_atomic_min_shared", callee) || + !strcmp("__intrinsic_atomic_max_shared", callee) || + !strcmp("__intrinsic_atomic_and_shared", callee) || + !strcmp("__intrinsic_atomic_or_shared", callee) || + !strcmp("__intrinsic_atomic_xor_shared", callee) || + !strcmp("__intrinsic_atomic_exchange_shared", callee) || + !strcmp("__intrinsic_atomic_comp_swap_shared", callee)) { + visit_shared_intrinsic(ir); + return; + } + + if (!strcmp("__intrinsic_image_load", callee) || + !strcmp("__intrinsic_image_store", callee) || + !strcmp("__intrinsic_image_atomic_add", callee) || + !strcmp("__intrinsic_image_atomic_min", callee) || + !strcmp("__intrinsic_image_atomic_max", callee) || + !strcmp("__intrinsic_image_atomic_and", callee) || + !strcmp("__intrinsic_image_atomic_or", callee) || + !strcmp("__intrinsic_image_atomic_xor", callee) || + !strcmp("__intrinsic_image_atomic_exchange", callee) || + !strcmp("__intrinsic_image_atomic_comp_swap", callee) || + !strcmp("__intrinsic_image_size", callee) || + !strcmp("__intrinsic_image_samples", callee)) { + visit_image_intrinsic(ir); + return; + } + entry = get_function_signature(sig); /* Process in parameters. */ foreach_two_lists(formal_node, &sig->parameters, @@ -3980,6 +4254,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() num_address_regs = 0; samplers_used = 0; buffers_used = 0; + images_used = 0; indirect_addr_consts = false; wpos_transform_const = -1; glsl_version = 0; @@ -3992,6 +4267,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() options = NULL; have_sqrt = false; have_fma = false; + use_shared_memory = false; } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() @@ -4015,6 +4291,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; v->buffers_used = 0; + v->images_used = 0; foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { if (inst->info->is_tex) { @@ -4035,8 +4312,20 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) if (inst->buffer.file != PROGRAM_UNDEFINED && ( is_resource_instruction(inst->op) || inst->op == TGSI_OPCODE_STORE)) { - if (inst->buffer.file == PROGRAM_BUFFER) + if (inst->buffer.file == PROGRAM_BUFFER) { v->buffers_used |= 1 << inst->buffer.index; + } else if (inst->buffer.file == PROGRAM_MEMORY) { + v->use_shared_memory = true; + } else { + assert(inst->buffer.file == PROGRAM_IMAGE); + for (int i = 0; i < inst->sampler_array_size; i++) { + unsigned idx = inst->sampler_base + i; + v->images_used |= 1 << idx; + v->image_targets[idx] = + st_translate_texture_target(inst->tex_target, false); + v->image_formats[idx] = inst->image_format; + } + } } } prog->SamplersUsed = v->samplers_used; @@ -4819,7 +5108,9 @@ struct st_translate { struct ureg_dst address[3]; struct ureg_src samplers[PIPE_MAX_SAMPLERS]; struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; + struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + struct ureg_src shared_memory; struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned *array_sizes; struct array_decl *input_arrays; @@ -4880,6 +5171,12 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { TGSI_SEMANTIC_PRIMID, TGSI_SEMANTIC_TESSOUTER, TGSI_SEMANTIC_TESSINNER, + + /* Compute shaders + */ + TGSI_SEMANTIC_THREAD_ID, + TGSI_SEMANTIC_BLOCK_ID, + TGSI_SEMANTIC_GRID_SIZE, }; /** @@ -5308,7 +5605,12 @@ compile_tgsi_instruction(struct st_translate *t, for (i = num_src - 1; i >= 0; i--) src[i + 1] = src[i]; num_src++; - src[0] = t->buffers[inst->buffer.index]; + if (inst->buffer.file == PROGRAM_MEMORY) + src[0] = t->shared_memory; + else if (inst->buffer.file == PROGRAM_BUFFER) + src[0] = t->buffers[inst->buffer.index]; + else + src[0] = t->images[inst->buffer.index]; if (inst->buffer.reladdr) src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2])); assert(src[0].File != TGSI_FILE_NULL); @@ -5317,7 +5619,13 @@ compile_tgsi_instruction(struct st_translate *t, break; case TGSI_OPCODE_STORE: - dst[0] = ureg_writemask(ureg_dst(t->buffers[inst->buffer.index]), inst->dst[0].writemask); + if (inst->buffer.file == PROGRAM_MEMORY) + dst[0] = ureg_dst(t->shared_memory); + else if (inst->buffer.file == PROGRAM_BUFFER) + dst[0] = ureg_dst(t->buffers[inst->buffer.index]); + else + dst[0] = ureg_dst(t->images[inst->buffer.index]); + dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask); if (inst->buffer.reladdr) dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2])); assert(dst[0].File != TGSI_FILE_NULL); @@ -5643,6 +5951,12 @@ st_translate_program( TGSI_SEMANTIC_TESSCOORD); assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] == TGSI_SEMANTIC_HELPER_INVOCATION); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_LOCAL_INVOCATION_ID] == + TGSI_SEMANTIC_THREAD_ID); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_WORK_GROUP_ID] == + TGSI_SEMANTIC_BLOCK_ID); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_NUM_WORK_GROUPS] == + TGSI_SEMANTIC_GRID_SIZE); t = CALLOC_STRUCT(st_translate); if (!t) { @@ -5710,6 +6024,8 @@ st_translate_program( t->inputs[i] = ureg_DECL_vs_input(ureg, i); } break; + case TGSI_PROCESSOR_COMPUTE: + break; default: assert(0); } @@ -5719,6 +6035,7 @@ st_translate_program( */ switch (procType) { case TGSI_PROCESSOR_FRAGMENT: + case TGSI_PROCESSOR_COMPUTE: break; case TGSI_PROCESSOR_GEOMETRY: case TGSI_PROCESSOR_TESS_EVAL: @@ -5969,7 +6286,17 @@ st_translate_program( } } + if (program->use_shared_memory) + t->shared_memory = ureg_DECL_shared_memory(ureg); + for (i = 0; i < program->shader->NumImages; i++) { + if (program->images_used & (1 << i)) { + t->images[i] = ureg_DECL_image(ureg, i, + program->image_targets[i], + program->image_formats[i], + true, false); + } + } /* Emit each instruction in turn: */ @@ -6188,6 +6515,7 @@ get_mesa_program(struct gl_context *ctx, struct st_geometry_program *stgp; struct st_tessctrl_program *sttcp; struct st_tesseval_program *sttep; + struct st_compute_program *stcp; switch (shader->Type) { case GL_VERTEX_SHADER: @@ -6210,6 +6538,10 @@ get_mesa_program(struct gl_context *ctx, sttep = (struct st_tesseval_program *)prog; sttep->glsl_to_tgsi = v; break; + case GL_COMPUTE_SHADER: + stcp = (struct st_compute_program *)prog; + stcp->glsl_to_tgsi = v; + break; default: assert(!"should not be reached"); return NULL; diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 84b65369d80..a983d64b5d5 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -906,7 +906,6 @@ st_manager_add_color_renderbuffer(struct st_context *st, break; default: return FALSE; - break; } if (!st_framebuffer_add_renderbuffer(stfb, idx)) diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 5bd626f8610..2e21d02b8b5 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -158,6 +158,9 @@ delete_basic_variant(struct st_context *st, struct st_basic_variant *v, case GL_GEOMETRY_PROGRAM_NV: cso_delete_geometry_shader(st->cso_context, v->driver_shader); break; + case GL_COMPUTE_PROGRAM_NV: + cso_delete_compute_shader(st->cso_context, v->driver_shader); + break; default: assert(!"this shouldn't occur"); } @@ -193,6 +196,30 @@ st_release_basic_variants(struct st_context *st, GLenum target, /** + * Free all variants of a compute program. + */ +void +st_release_cp_variants(struct st_context *st, struct st_compute_program *stcp) +{ + struct st_basic_variant **variants = &stcp->variants; + struct st_basic_variant *v; + + for (v = *variants; v; ) { + struct st_basic_variant *next = v->next; + delete_basic_variant(st, v, stcp->Base.Base.Target); + v = next; + } + + *variants = NULL; + + if (stcp->tgsi.prog) { + ureg_free_tokens(stcp->tgsi.prog); + stcp->tgsi.prog = NULL; + } +} + + +/** * Translate a vertex program. */ bool @@ -1395,6 +1422,74 @@ st_translate_tesseval_program(struct st_context *st, /** + * Translate a compute program to create a new variant. + */ +bool +st_translate_compute_program(struct st_context *st, + struct st_compute_program *stcp) +{ + struct ureg_program *ureg; + struct pipe_shader_state prog; + + ureg = ureg_create_with_screen(TGSI_PROCESSOR_COMPUTE, st->pipe->screen); + if (ureg == NULL) + return false; + + st_translate_program_common(st, &stcp->Base.Base, stcp->glsl_to_tgsi, ureg, + TGSI_PROCESSOR_COMPUTE, &prog); + + stcp->tgsi.prog = prog.tokens; + stcp->tgsi.req_local_mem = stcp->Base.SharedSize; + stcp->tgsi.req_private_mem = 0; + stcp->tgsi.req_input_mem = 0; + + free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi); + stcp->glsl_to_tgsi = NULL; + return true; +} + + +/** + * Get/create compute program variant. + */ +struct st_basic_variant * +st_get_cp_variant(struct st_context *st, + struct pipe_compute_state *tgsi, + struct st_basic_variant **variants) +{ + struct pipe_context *pipe = st->pipe; + struct st_basic_variant *v; + struct st_basic_variant_key key; + + memset(&key, 0, sizeof(key)); + key.st = st->has_shareable_shaders ? NULL : st; + + /* Search for existing variant */ + for (v = *variants; v; v = v->next) { + if (memcmp(&v->key, &key, sizeof(key)) == 0) { + break; + } + } + + if (!v) { + /* create new */ + v = CALLOC_STRUCT(st_basic_variant); + if (v) { + /* fill in new variant */ + v->driver_shader = pipe->create_compute_state(pipe, tgsi); + v->key = key; + + /* insert into list */ + v->next = *variants; + *variants = v; + } + } + + return v; +} + + +/** * Vert/Geom/Frag programs have per-context variants. Free all the * variants attached to the given program which match the given context. */ @@ -1449,14 +1544,17 @@ destroy_program_variants(struct st_context *st, struct gl_program *target) case GL_GEOMETRY_PROGRAM_NV: case GL_TESS_CONTROL_PROGRAM_NV: case GL_TESS_EVALUATION_PROGRAM_NV: + case GL_COMPUTE_PROGRAM_NV: { struct st_geometry_program *gp = (struct st_geometry_program*)target; struct st_tessctrl_program *tcp = (struct st_tessctrl_program*)target; struct st_tesseval_program *tep = (struct st_tesseval_program*)target; + struct st_compute_program *cp = (struct st_compute_program*)target; struct st_basic_variant **variants = target->Target == GL_GEOMETRY_PROGRAM_NV ? &gp->variants : target->Target == GL_TESS_CONTROL_PROGRAM_NV ? &tcp->variants : target->Target == GL_TESS_EVALUATION_PROGRAM_NV ? &tep->variants : + target->Target == GL_COMPUTE_PROGRAM_NV ? &cp->variants : NULL; struct st_basic_variant *v, **prevPtr = variants; @@ -1513,6 +1611,7 @@ destroy_shader_program_variants_cb(GLuint key, void *data, void *userData) case GL_GEOMETRY_SHADER: case GL_TESS_CONTROL_SHADER: case GL_TESS_EVALUATION_SHADER: + case GL_COMPUTE_SHADER: { destroy_program_variants(st, shader->Program); } @@ -1629,6 +1728,12 @@ st_precompile_shader_variant(struct st_context *st, break; } + case GL_COMPUTE_PROGRAM_NV: { + struct st_compute_program *p = (struct st_compute_program *)prog; + st_get_cp_variant(st, &p->tgsi, &p->variants); + break; + } + default: assert(0); } diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 74f3def6095..028fba99a74 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -231,6 +231,18 @@ struct st_tesseval_program }; +/** + * Derived from Mesa gl_compute_program: + */ +struct st_compute_program +{ + struct gl_compute_program Base; /**< The Mesa compute program */ + struct pipe_compute_state tgsi; + struct glsl_to_tgsi_visitor* glsl_to_tgsi; + + struct st_basic_variant *variants; +}; + static inline struct st_fragment_program * st_fragment_program( struct gl_fragment_program *fp ) @@ -263,6 +275,12 @@ st_tesseval_program( struct gl_tess_eval_program *tep ) return (struct st_tesseval_program *)tep; } +static inline struct st_compute_program * +st_compute_program( struct gl_compute_program *cp ) +{ + return (struct st_compute_program *)cp; +} + static inline void st_reference_vertprog(struct st_context *st, struct st_vertex_program **ptr, @@ -313,6 +331,16 @@ st_reference_tesseprog(struct st_context *st, (struct gl_program *) prog); } +static inline void +st_reference_compprog(struct st_context *st, + struct st_compute_program **ptr, + struct st_compute_program *prog) +{ + _mesa_reference_program(st->ctx, + (struct gl_program **) ptr, + (struct gl_program *) prog); +} + /** * This defines mapping from Mesa VARYING_SLOTs to TGSI GENERIC slots. */ @@ -351,6 +379,11 @@ st_get_fp_variant(struct st_context *st, const struct st_fp_variant_key *key); extern struct st_basic_variant * +st_get_cp_variant(struct st_context *st, + struct pipe_compute_state *tgsi, + struct st_basic_variant **variants); + +extern struct st_basic_variant * st_get_basic_variant(struct st_context *st, unsigned pipe_shader, struct pipe_shader_state *tgsi, @@ -365,6 +398,10 @@ st_release_fp_variants( struct st_context *st, struct st_fragment_program *stfp ); extern void +st_release_cp_variants(struct st_context *st, + struct st_compute_program *stcp); + +extern void st_release_basic_variants(struct st_context *st, GLenum target, struct st_basic_variant **variants, struct pipe_shader_state *tgsi); @@ -392,6 +429,10 @@ extern bool st_translate_tesseval_program(struct st_context *st, struct st_tesseval_program *sttep); +extern bool +st_translate_compute_program(struct st_context *st, + struct st_compute_program *stcp); + extern void st_print_current_vertex_program(void); diff --git a/src/util/hash_table.h b/src/util/hash_table.h index 85b013cac24..c69abfa3e64 100644 --- a/src/util/hash_table.h +++ b/src/util/hash_table.h @@ -108,7 +108,9 @@ static inline uint32_t _mesa_hash_pointer(const void *pointer) return _mesa_hash_data(&pointer, sizeof(pointer)); } -static const uint32_t _mesa_fnv32_1a_offset_bias = 2166136261u; +enum { + _mesa_fnv32_1a_offset_bias = 2166136261u, +}; static inline uint32_t _mesa_fnv32_1a_accumulate_block(uint32_t hash, const void *data, size_t size) diff --git a/src/util/u_atomic.h b/src/util/u_atomic.h index e38395ac633..867590391f4 100644 --- a/src/util/u_atomic.h +++ b/src/util/u_atomic.h @@ -88,65 +88,6 @@ #include <intrin.h> #include <assert.h> -#if _MSC_VER < 1600 - -/* Implement _InterlockedCompareExchange8 in terms of _InterlockedCompareExchange16 */ -static __inline char -_InterlockedCompareExchange8(char volatile *destination8, char exchange8, char comparand8) -{ - INT_PTR destinationAddr = (INT_PTR)destination8; - short volatile *destination16 = (short volatile *)(destinationAddr & ~1); - const short shift8 = (destinationAddr & 1) * 8; - const short mask8 = 0xff << shift8; - short initial16 = *destination16; - char initial8 = initial16 >> shift8; - while (initial8 == comparand8) { - /* initial *destination8 matches, so try exchange it while keeping the - * neighboring byte untouched */ - short exchange16 = (initial16 & ~mask8) | ((short)exchange8 << shift8); - short comparand16 = initial16; - short initial16 = _InterlockedCompareExchange16(destination16, exchange16, comparand16); - if (initial16 == comparand16) { - /* succeeded */ - return comparand8; - } - /* something changed, retry with the new initial value */ - initial8 = initial16 >> shift8; - } - return initial8; -} - -/* Implement _InterlockedExchangeAdd16 in terms of _InterlockedCompareExchange16 */ -static __inline short -_InterlockedExchangeAdd16(short volatile *addend, short value) -{ - short initial = *addend; - short comparand; - do { - short exchange = initial + value; - comparand = initial; - /* if *addend==comparand then *addend=exchange, return original *addend */ - initial = _InterlockedCompareExchange16(addend, exchange, comparand); - } while(initial != comparand); - return comparand; -} - -/* Implement _InterlockedExchangeAdd8 in terms of _InterlockedCompareExchange8 */ -static __inline char -_InterlockedExchangeAdd8(char volatile *addend, char value) -{ - char initial = *addend; - char comparand; - do { - char exchange = initial + value; - comparand = initial; - initial = _InterlockedCompareExchange8(addend, exchange, comparand); - } while(initial != comparand); - return comparand; -} - -#endif /* _MSC_VER < 1600 */ - /* MSVC supports decltype keyword, but it's only supported on C++ and doesn't * quite work here; and if a C++-only solution is worthwhile, then it would be * better to use templates / function overloading, instead of decltype magic. |