diff options
author | Jason Ekstrand <[email protected]> | 2015-11-03 15:45:04 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-11-03 15:45:04 -0800 |
commit | b00e3f221b3f6dd0e87697c53331fd033b6e8676 (patch) | |
tree | a59dfeca8fd404c65da59a663e0abda301e893a2 | |
parent | a1e7b8701a4687f29b013364a852aa773c80f960 (diff) | |
parent | 5d4b019d2a6d4deb4db11780618515cf1fa8a4fc (diff) |
Merge remote-tracking branch 'mesa-public/master' into vulkan
340 files changed, 16617 insertions, 3727 deletions
diff --git a/configure.ac b/configure.ac index 0c88db9f66f..758751c4b94 100644 --- a/configure.ac +++ b/configure.ac @@ -81,7 +81,7 @@ PRESENTPROTO_REQUIRED=1.0 LIBUDEV_REQUIRED=151 GLPROTO_REQUIRED=1.4.14 LIBOMXIL_BELLAGIO_REQUIRED=0.0 -LIBVA_REQUIRED=0.35.0 +LIBVA_REQUIRED=0.38.0 VDPAU_REQUIRED=1.1 WAYLAND_REQUIRED=1.2.0 XCB_REQUIRED=1.9.3 @@ -867,7 +867,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast" AC_ARG_WITH([gallium-drivers], [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@], [comma delimited Gallium drivers list, e.g. - "i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4" + "i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl" @<:@default=r300,r600,svga,swrast@:>@])], [with_gallium_drivers="$withval"], [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"]) @@ -2188,6 +2188,12 @@ if test -n "$with_gallium_drivers"; then PKG_CHECK_MODULES([SIMPENROSE], [simpenrose], [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no]) ;; + xvirgl) + HAVE_GALLIUM_VIRGL=yes + gallium_require_drm "virgl" + gallium_require_drm_loader + require_egl_drm "virgl" + ;; *) AC_MSG_ERROR([Unknown Gallium driver: $driver]) ;; @@ -2259,6 +2265,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_VC4, test "x$HAVE_GALLIUM_VC4" = xyes) +AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno) @@ -2386,6 +2393,7 @@ AC_CONFIG_FILES([Makefile src/gallium/drivers/svga/Makefile src/gallium/drivers/trace/Makefile src/gallium/drivers/vc4/Makefile + src/gallium/drivers/virgl/Makefile src/gallium/state_trackers/clover/Makefile src/gallium/state_trackers/dri/Makefile src/gallium/state_trackers/glx/xlib/Makefile @@ -2426,6 +2434,8 @@ AC_CONFIG_FILES([Makefile src/gallium/winsys/sw/wrapper/Makefile src/gallium/winsys/sw/xlib/Makefile src/gallium/winsys/vc4/drm/Makefile + src/gallium/winsys/virgl/drm/Makefile + src/gallium/winsys/virgl/vtest/Makefile src/gbm/Makefile src/gbm/main/gbm.pc src/glsl/Makefile diff --git a/docs/GL3.txt b/docs/GL3.txt index 167321676df..7f6b8c9ef27 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -153,10 +153,10 @@ GL 4.3, GLSL 4.30: GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30) GL_ARB_clear_buffer_object DONE (all drivers) GL_ARB_compute_shader in progress (jljusten) - GL_ARB_copy_image DONE (i965) (gallium - in progress, VMware) + GL_ARB_copy_image DONE (i965, nv50, nvc0, radeonsi) GL_KHR_debug DONE (all drivers) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) - GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe) + GL_ARB_fragment_layer_viewport DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe) GL_ARB_framebuffer_no_attachments DONE (i965) GL_ARB_internalformat_query2 not started GL_ARB_invalidate_subdata DONE (all drivers) @@ -243,7 +243,7 @@ GLES3.2, GLSL ES 3.2 GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+) GL_OES_copy_image not started (based on GL_ARB_copy_image, which is done for some drivers) GL_OES_draw_buffers_indexed not started - GL_OES_draw_elements_base_vertex not started (based on GL_ARB_draw_elements_base_vertex, which is done for all drivers) + GL_OES_draw_elements_base_vertex DONE (all drivers) GL_OES_geometry_shader not started (based on GL_ARB_geometry_shader4, which is done for all drivers) GL_OES_gpu_shader5 not started (based on parts of GL_ARB_gpu_shader5, which is done for some drivers) GL_OES_primitive_bounding box not started diff --git a/docs/index.html b/docs/index.html index 138447fc500..c8d4a5c2699 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,6 +16,12 @@ <h1>News</h1> +<h2>October 24, 2015</h2> +<p> +<a href="relnotes/11.0.4.html">Mesa 11.0.4</a> is released. +This is a bug-fix release. +</p> + <h2>October 10, 2015</h2> <p> <a href="relnotes/11.0.3.html">Mesa 11.0.3</a> is released. @@ -28,7 +34,7 @@ This is a bug-fix release. This is a bug-fix release. <br> NOTE: It is anticipated that 10.6.9 will be the final release in the 10.6 -series. Users of 10.5 are encouraged to migrate to the 11.0 series in order +series. Users of 10.6 are encouraged to migrate to the 11.0 series in order to obtain future fixes. </p> diff --git a/docs/relnotes.html b/docs/relnotes.html index 074c3b6a612..d1dde4fd726 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release. </p> <ul> +<li><a href="relnotes/11.0.4.html">11.0.4 release notes</a> <li><a href="relnotes/11.0.3.html">11.0.3 release notes</a> <li><a href="relnotes/10.6.9.html">10.6.9 release notes</a> <li><a href="relnotes/11.0.2.html">11.0.2 release notes</a> diff --git a/docs/relnotes/11.0.4.html b/docs/relnotes/11.0.4.html new file mode 100644 index 00000000000..a777b9de506 --- /dev/null +++ b/docs/relnotes/11.0.4.html @@ -0,0 +1,168 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 11.0.4 Release Notes / October 24, 2015</h1> + +<p> +Mesa 11.0.4 is a bug fix release which fixes bugs found since the 11.0.3 release. +</p> +<p> +Mesa 11.0.4 implements the OpenGL 4.1 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.1. OpenGL +4.1 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +ed412ca6a46d1bd055120e5c12806c15419ae8c4dd6d3f6ea20a83091d5c78bf mesa-11.0.4.tar.gz +40201bf7fc6fa12a6d9edfe870b41eb4dd6669154e3c42c48a96f70805f5483d mesa-11.0.4.tar.xz +</pre> + + +<h2>New features</h2> +<p>None</p> + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> + +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw@entry=0x7fffd4097a08, fb=fb@entry=0x7fffd40fa900, buffers=buffers@entry=2, partial_clear=partial_clear@entry=false)</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86720">Bug 86720</a> - [radeon] Europa Universalis 4 freezing during game start (10.3.3+, still broken on 11.0.2)</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91788">Bug 91788</a> - [HSW Regression] Synmark2_v6 Multithread performance case FPS reduced by 36%</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92304">Bug 92304</a> - [cts] cts.shaders.negative conformance tests fail</li> + +</ul> + + +<h2>Changes</h2> + +<p>Alejandro Piñeiro (2):</p> +<ul> + <li>i965/vec4: check writemask when bailing out at register coalesce</li> + <li>i965/vec4: fill src_reg type using the constructor type parameter</li> +</ul> + +<p>Brian Paul (2):</p> +<ul> + <li>vbo: fix incorrect switch statement in init_mat_currval()</li> + <li>mesa: fix incorrect opcode in save_BlendFunci()</li> +</ul> + +<p>Chih-Wei Huang (3):</p> +<ul> + <li>mesa: android: Fix the incorrect path of sse_minmax.c</li> + <li>nv50/ir: use C++11 standard std::unordered_map if possible</li> + <li>nv30: include the header of ffs prototype</li> +</ul> + +<p>Chris Wilson (1):</p> +<ul> + <li>i965: Remove early release of DRI2 miptree</li> +</ul> + +<p>Dave Airlie (1):</p> +<ul> + <li>mesa/uniforms: fix get_uniform for doubles (v2)</li> +</ul> + +<p>Emil Velikov (1):</p> +<ul> + <li>docs: add sha256 checksums for 11.0.3</li> +</ul> + +<p>Francisco Jerez (5):</p> +<ul> + <li>i965: Don't tell the hardware about our UAV access.</li> + <li>mesa: Expose function to calculate whether a shader image unit is valid.</li> + <li>mesa: Skip redundant texture completeness checking during image validation.</li> + <li>i965: Use _mesa_is_image_unit_valid() instead of gl_image_unit::_Valid.</li> + <li>mesa: Get rid of texture-dependent image unit derived state.</li> +</ul> + +<p>Ian Romanick (8):</p> +<ul> + <li>glsl: Allow built-in functions as constant expressions in OpenGL ES 1.00</li> + <li>ff_fragment_shader: Use binding to set the sampler unit</li> + <li>glsl/linker: Use constant_initializer instead of constant_value to initialize uniforms</li> + <li>glsl: Use constant_initializer instead of constant_value to determine whether to keep an unused uniform</li> + <li>glsl: Only set ir_variable::constant_value for const-decorated variables</li> + <li>glsl: Restrict initializers for global variables to constant expression in ES</li> + <li>glsl: Add method to determine whether an expression contains the sequence operator</li> + <li>glsl: In later GLSL versions, sequence operator is cannot be a constant expression</li> +</ul> + +<p>Ilia Mirkin (1):</p> +<ul> + <li>nouveau: make sure there's always room to emit a fence</li> +</ul> + +<p>Indrajit Das (1):</p> +<ul> + <li>st/va: Used correct parameter to derive the value of the "h" variable in vlVaCreateImage</li> +</ul> + +<p>Jonathan Gray (1):</p> +<ul> + <li>configure.ac: ensure RM is set</li> +</ul> + +<p>Krzysztof Sobiecki (1):</p> +<ul> + <li>st/fbo: use pipe_surface_release instead of pipe_surface_reference</li> +</ul> + +<p>Leo Liu (1):</p> +<ul> + <li>st/omx/dec/h264: fix field picture type 0 poc disorder</li> +</ul> + +<p>Marek Olšák (3):</p> +<ul> + <li>st/mesa: fix clip state dependencies</li> + <li>radeonsi: fix a GS copy shader leak</li> + <li>gallium: add PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT</li> +</ul> + +<p>Nicolai Hähnle (1):</p> +<ul> + <li>u_vbuf: fix vb slot assignment for translated buffers</li> +</ul> + +<p>Rob Clark (1):</p> +<ul> + <li>freedreno/a3xx: cache-flush is needed after MEM_WRITE</li> +</ul> + +<p>Tapani Pälli (3):</p> +<ul> + <li>mesa: add GL_UNSIGNED_INT_24_8 to _mesa_pack_depth_span</li> + <li>mesa: Set api prefix to version string when overriding version</li> + <li>mesa: fix ARRAY_SIZE query for GetProgramResourceiv</li> +</ul> + + +</div> +</body> +</html> diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index d3dbe9dda13..7160244fcb4 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -45,15 +45,21 @@ Note: some of the new features are only available with certain drivers. <ul> <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li> +<li>GL_ARB_copy_image on radeonsi</li> <li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li> <li>GL_ARB_gpu_shader5 on r600 for Evergreen and later chips</li> +<li>GL_ARB_shader_clock on i965 (gen7+)</li> +<li>GL_ARB_shader_stencil_export on i965 (gen9+)</li> <li>GL_ARB_shader_storage_buffer_object on i965</li> <li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li> <li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li> <li>GL_ARB_texture_query_lod on softpipe</li> <li>GL_ARB_texture_view on radeonsi</li> +<li>GL_EXT_draw_elements_base_vertex on all drivers</li> +<li>GL_OES_draw_elements_base_vertex on all drivers</li> <li>EGL_KHR_create_context on softpipe, llvmpipe</li> <li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li> +<li>new virgl gallium driver for qemu virtio-gpu</li> </ul> <h2>Bug fixes</h2> diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index a0f155a1f42..6bbd3fa87f5 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -495,7 +495,7 @@ struct __DRIdamageExtensionRec { * SWRast Loader extension. */ #define __DRI_SWRAST_LOADER "DRI_SWRastLoader" -#define __DRI_SWRAST_LOADER_VERSION 2 +#define __DRI_SWRAST_LOADER_VERSION 3 struct __DRIswrastLoaderExtensionRec { __DRIextension base; @@ -528,6 +528,15 @@ struct __DRIswrastLoaderExtensionRec { void (*putImage2)(__DRIdrawable *drawable, int op, int x, int y, int width, int height, int stride, char *data, void *loaderPrivate); + + /** + * Put image to drawable + * + * \since 3 + */ + void (*getImage2)(__DRIdrawable *readable, + int x, int y, int width, int height, int stride, + char *data, void *loaderPrivate); }; /** diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h index 8a425999429..5891ba67ea4 100644 --- a/include/pci_ids/i965_pci_ids.h +++ b/include/pci_ids/i965_pci_ids.h @@ -109,21 +109,29 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)") CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)") CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3") CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3") -CHIPSET(0x1902, skl_gt1, "Intel(R) Skylake DT GT1") -CHIPSET(0x1906, skl_gt1, "Intel(R) Skylake ULT GT1") -CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake SRV GT1") -CHIPSET(0x190B, skl_gt1, "Intel(R) Skylake Halo GT1") -CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake ULX GT1") -CHIPSET(0x1912, skl_gt2, "Intel(R) Skylake DT GT2") -CHIPSET(0x1916, skl_gt2, "Intel(R) Skylake ULT GT2") -CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake SRV GT2") -CHIPSET(0x191B, skl_gt2, "Intel(R) Skylake Halo GT2") -CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2") -CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2") -CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F") -CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3") -CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3") -CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3") +CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)") +CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)") +CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1") +CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1") +CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)") +CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f") +CHIPSET(0x1915, skl_gt2, "Intel(R) Skylake GT2f") +CHIPSET(0x1916, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)") +CHIPSET(0x1917, skl_gt2, "Intel(R) Skylake GT2f") +CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2") +CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)") +CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)") +CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)") +CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2") +CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)") +CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)") +CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)") +CHIPSET(0x192A, skl_gt4, "Intel(R) Skylake GT4") +CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)") +CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4") +CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4") +CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4") +CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4") CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)") CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)") CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)") diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h index 52eada1d3d5..bcf15a186c6 100644 --- a/include/pci_ids/radeonsi_pci_ids.h +++ b/include/pci_ids/radeonsi_pci_ids.h @@ -181,3 +181,5 @@ CHIPSET(0x9876, CARRIZO_, CARRIZO) CHIPSET(0x9877, CARRIZO_, CARRIZO) CHIPSET(0x7300, FIJI_, FIJI) + +CHIPSET(0x98E4, STONEY_, STONEY) diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am index a7c3606de0a..611d55fafe2 100644 --- a/src/gallium/Makefile.am +++ b/src/gallium/Makefile.am @@ -82,6 +82,11 @@ if HAVE_GALLIUM_VC4 SUBDIRS += drivers/vc4 winsys/vc4/drm endif +## virgl +if HAVE_GALLIUM_VIRGL +SUBDIRS += drivers/virgl winsys/virgl/drm winsys/virgl/vtest +endif + ## the sw winsys' SUBDIRS += winsys/sw/null diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 017d0752060..96aba7370c1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -427,6 +427,7 @@ lp_build_init(void) */ util_cpu_caps.has_avx = 0; util_cpu_caps.has_avx2 = 0; + util_cpu_caps.has_f16c = 0; } #ifdef PIPE_ARCH_PPC_64 @@ -458,7 +459,9 @@ lp_build_init(void) util_cpu_caps.has_sse3 = 0; util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; + util_cpu_caps.has_sse4_2 = 0; util_cpu_caps.has_avx = 0; + util_cpu_caps.has_avx2 = 0; util_cpu_caps.has_f16c = 0; #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 72fab8ccf06..7bda1184ee9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -497,20 +497,48 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #endif } - llvm::SmallVector<std::string, 1> MAttrs; - if (util_cpu_caps.has_avx) { - /* - * AVX feature is not automatically detected from CPUID by the X86 target - * yet, because the old (yet default) JIT engine is not capable of - * emitting the opcodes. On newer llvm versions it is and at least some - * versions (tested with 3.3) will emit avx opcodes without this anyway. - */ - MAttrs.push_back("+avx"); - if (util_cpu_caps.has_f16c) { - MAttrs.push_back("+f16c"); - } - builder.setMAttrs(MAttrs); - } + llvm::SmallVector<std::string, 16> MAttrs; + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + /* + * We need to unset attributes because sometimes LLVM mistakenly assumes + * certain features are present given the processor name. + * + * https://bugs.freedesktop.org/show_bug.cgi?id=92214 + * http://llvm.org/PR25021 + * http://llvm.org/PR19429 + * http://llvm.org/PR16721 + */ + MAttrs.push_back(util_cpu_caps.has_sse ? "+sse" : "-sse" ); + MAttrs.push_back(util_cpu_caps.has_sse2 ? "+sse2" : "-sse2" ); + MAttrs.push_back(util_cpu_caps.has_sse3 ? "+sse3" : "-sse3" ); + MAttrs.push_back(util_cpu_caps.has_ssse3 ? "+ssse3" : "-ssse3" ); +#if HAVE_LLVM >= 0x0304 + MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1"); +#else + MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse41" : "-sse41" ); +#endif +#if HAVE_LLVM >= 0x0304 + MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2"); +#else + MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse42" : "-sse42" ); +#endif + /* + * AVX feature is not automatically detected from CPUID by the X86 target + * yet, because the old (yet default) JIT engine is not capable of + * emitting the opcodes. On newer llvm versions it is and at least some + * versions (tested with 3.3) will emit avx opcodes without this anyway. + */ + MAttrs.push_back(util_cpu_caps.has_avx ? "+avx" : "-avx"); + MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c"); + MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2"); +#endif + +#if defined(PIPE_ARCH_PPC) + MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); +#endif + + builder.setMAttrs(MAttrs); #if HAVE_LLVM >= 0x0305 StringRef MCPU = llvm::sys::getHostCPUName(); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index b5c06b69571..26bfa0d2677 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -405,16 +405,17 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, break; case PIPE_TEX_WRAP_MIRROR_REPEAT: + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + offset = lp_build_div(coord_bld, offset, length_f); + coord = lp_build_add(coord_bld, coord, offset); + } /* compute mirror function */ coord = lp_build_coord_mirror(bld, coord); /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); coord = lp_build_sub(coord_bld, coord, half); - if (offset) { - offset = lp_build_int_to_float(coord_bld, offset); - coord = lp_build_add(coord_bld, coord, offset); - } /* convert to int, compute lerp weight */ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); @@ -567,12 +568,13 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } /* floor */ /* use itrunc instead since we clamp to 0 anyway */ icoord = lp_build_itrunc(coord_bld, coord); - if (offset) { - icoord = lp_build_add(int_coord_bld, icoord, offset); - } /* clamp to [0, length - 1]. */ icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, @@ -2586,6 +2588,10 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; } + /* + * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest, + * so AoS path could be used. Not sure it's worth the trouble... + */ min_img_filter = derived_sampler_state.min_img_filter; mag_img_filter = derived_sampler_state.mag_img_filter; diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h index 08271a760f5..6ca4dc8136c 100644 --- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h @@ -59,6 +59,11 @@ #include "vc4/drm/vc4_drm_public.h" #endif +#if GALLIUM_VIRGL +#include "virgl/drm/virgl_drm_public.h" +#include "virgl/virgl_public.h" +#endif + static char* driver_name = NULL; /* XXX: We need to teardown the winsys if *screen_create() fails. */ @@ -296,6 +301,33 @@ pipe_freedreno_create_screen(int fd) } #endif +#if defined(GALLIUM_VIRGL) +#if defined(DRI_TARGET) + +const __DRIextension **__driDriverGetExtensions_virtio_gpu(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +static struct pipe_screen * +pipe_virgl_create_screen(int fd) +{ + struct virgl_winsys *vws; + struct pipe_screen *screen; + + vws = virgl_drm_winsys_create(fd); + if (!vws) + return NULL; + + screen = virgl_create_screen(vws); + return screen ? debug_screen_wrap(screen) : NULL; +} +#endif + #if defined(GALLIUM_VC4) #if defined(DRI_TARGET) @@ -385,6 +417,11 @@ dd_create_screen(int fd) return pipe_freedreno_create_screen(fd); else #endif +#if defined(GALLIUM_VIRGL) + if ((strcmp(driver_name, "virtio_gpu") == 0)) + return pipe_virgl_create_screen(fd); + else +#endif #if defined(GALLIUM_VC4) if (strcmp(driver_name, "vc4") == 0) return pipe_vc4_create_screen(fd); @@ -474,6 +511,11 @@ dd_configuration(enum drm_conf conf) return configuration_query(conf); else #endif +#if defined(GALLIUM_VIRGL) + if ((strcmp(driver_name, "virtio_gpu") == 0)) + return configuration_query(conf); + else +#endif #if defined(GALLIUM_VC4) if (strcmp(driver_name, "vc4") == 0) return configuration_query(conf); diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index 5f46552f6c3..f3693fb1f39 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -19,6 +19,10 @@ #include "llvmpipe/lp_public.h" #endif +#ifdef GALLIUM_VIRGL +#include "virgl/virgl_public.h" +#include "virgl/vtest/virgl_vtest_public.h" +#endif static inline struct pipe_screen * sw_screen_create_named(struct sw_winsys *winsys, const char *driver) @@ -30,6 +34,14 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver) screen = llvmpipe_create_screen(winsys); #endif +#if defined(GALLIUM_VIRGL) + if (screen == NULL && strcmp(driver, "virpipe") == 0) { + struct virgl_winsys *vws; + vws = virgl_vtest_winsys_wrap(winsys); + screen = virgl_create_screen(vws); + } +#endif + #if defined(GALLIUM_SOFTPIPE) if (screen == NULL) screen = softpipe_create_screen(winsys); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 5d80cca5b0e..e29ffb39894 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -29,6 +29,7 @@ #include "util/u_string.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_math.h" #include "tgsi_dump.h" #include "tgsi_info.h" #include "tgsi_iterate.h" @@ -43,6 +44,8 @@ struct dump_ctx { struct tgsi_iterate_context iter; + boolean dump_float_as_hex; + uint instno; uint immno; int indent; @@ -88,6 +91,7 @@ dump_enum( #define SID(I) ctx->dump_printf( ctx, "%d", I ) #define FLT(F) ctx->dump_printf( ctx, "%10.4f", F ) #define DBL(D) ctx->dump_printf( ctx, "%10.8f", D ) +#define HFLT(F) ctx->dump_printf( ctx, "0x%08x", fui((F)) ) #define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) const char * @@ -251,7 +255,10 @@ dump_imm_data(struct tgsi_iterate_context *iter, break; } case TGSI_IMM_FLOAT32: - FLT( data[i].Float ); + if (ctx->dump_float_as_hex) + HFLT( data[i].Float ); + else + FLT( data[i].Float ); break; case TGSI_IMM_UINT32: UID(data[i].Uint); @@ -682,6 +689,11 @@ tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file) ctx.indentation = 0; ctx.file = file; + if (flags & TGSI_DUMP_FLOAT_AS_HEX) + ctx.dump_float_as_hex = TRUE; + else + ctx.dump_float_as_hex = FALSE; + tgsi_iterate_shader( tokens, &ctx.iter ); } @@ -697,6 +709,7 @@ struct str_dump_ctx char *str; char *ptr; int left; + bool nospace; }; static void @@ -719,10 +732,11 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...) sctx->ptr += written; sctx->left -= written; } - } + } else + sctx->nospace = true; } -void +bool tgsi_dump_str( const struct tgsi_token *tokens, uint flags, @@ -749,8 +763,16 @@ tgsi_dump_str( ctx.str[0] = 0; ctx.ptr = str; ctx.left = (int)size; + ctx.nospace = false; + + if (flags & TGSI_DUMP_FLOAT_AS_HEX) + ctx.base.dump_float_as_hex = TRUE; + else + ctx.base.dump_float_as_hex = FALSE; tgsi_iterate_shader( tokens, &ctx.base.iter ); + + return !ctx.nospace; } void @@ -773,6 +795,7 @@ tgsi_dump_instruction_str( ctx.str[0] = 0; ctx.ptr = str; ctx.left = (int)size; + ctx.nospace = false; iter_instruction( &ctx.base.iter, (struct tgsi_full_instruction *)inst ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h index 7c8f92ee7bc..c3722d333d7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -38,7 +38,9 @@ extern "C" { #endif -void +#define TGSI_DUMP_FLOAT_AS_HEX (1 << 0) + +bool tgsi_dump_str( const struct tgsi_token *tokens, uint flags, diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 3e3ed5b19d1..4a82c9b3552 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -195,8 +195,15 @@ static boolean parse_float( const char **pcur, float *val ) boolean integral_part = FALSE; boolean fractional_part = FALSE; - *val = (float) atof( cur ); + if (*cur == '0' && *(cur + 1) == 'x') { + union fi fi; + fi.ui = strtoul(cur, NULL, 16); + *val = fi.f; + cur += 10; + goto out; + } + *val = (float) atof( cur ); if (*cur == '-' || *cur == '+') cur++; if (is_digit( cur )) { @@ -228,6 +235,8 @@ static boolean parse_float( const char **pcur, float *val ) else return FALSE; } + +out: *pcur = cur; return TRUE; } diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c index c1ce408119f..79630bf6dc3 100644 --- a/src/gallium/auxiliary/util/u_format.c +++ b/src/gallium/auxiliary/util/u_format.c @@ -170,6 +170,25 @@ util_format_is_snorm(enum pipe_format format) } boolean +util_format_is_snorm8(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + int i; + + if (desc->is_mixed) + return FALSE; + + i = util_format_get_first_non_void_channel(format); + if (i == -1) + return FALSE; + + return desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED && + !desc->channel[i].pure_integer && + desc->channel[i].normalized && + desc->channel[i].size == 8; +} + +boolean util_format_is_luminance_alpha(enum pipe_format format) { const struct util_format_description *desc = diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 42b39ff04fd..a1b1b28fa41 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -686,6 +686,9 @@ util_format_is_pure_uint(enum pipe_format format); boolean util_format_is_snorm(enum pipe_format format); +boolean +util_format_is_snorm8(enum pipe_format format); + /** * Check if the src format can be blitted to the destination format with * a simple memcpy. For example, blitting from RGBA to RGBx is OK, but not diff --git a/src/gallium/auxiliary/util/u_tests.c b/src/gallium/auxiliary/util/u_tests.c index a94e5cc2949..006dfa95af2 100644 --- a/src/gallium/auxiliary/util/u_tests.c +++ b/src/gallium/auxiliary/util/u_tests.c @@ -450,6 +450,43 @@ null_constant_buffer(struct pipe_context *ctx) util_report_result(pass); } +static void +null_fragment_shader(struct pipe_context *ctx) +{ + struct cso_context *cso; + struct pipe_resource *cb; + void *vs; + struct pipe_rasterizer_state rs = {0}; + struct pipe_query *query; + union pipe_query_result qresult; + + cso = cso_create_context(ctx); + cb = util_create_texture2d(ctx->screen, 256, 256, + PIPE_FORMAT_R8G8B8A8_UNORM); + util_set_common_states_and_clear(cso, ctx, cb); + + /* No rasterization. */ + rs.rasterizer_discard = 1; + cso_set_rasterizer(cso, &rs); + + vs = util_set_passthrough_vertex_shader(cso, ctx, false); + + query = ctx->create_query(ctx, PIPE_QUERY_PRIMITIVES_GENERATED, 0); + ctx->begin_query(ctx, query); + util_draw_fullscreen_quad(cso); + ctx->end_query(ctx, query); + ctx->get_query_result(ctx, query, true, &qresult); + + /* Cleanup. */ + cso_destroy_context(cso); + ctx->delete_vs_state(ctx, vs); + ctx->destroy_query(ctx, query); + pipe_resource_reference(&cb, NULL); + + /* Check PRIMITIVES_GENERATED. */ + util_report_result(qresult.u64 == 2); +} + /** * Run all tests. This should be run with a clean context after * context_create. @@ -459,6 +496,7 @@ util_run_tests(struct pipe_screen *screen) { struct pipe_context *ctx = screen->context_create(screen, NULL, 0); + null_fragment_shader(ctx); tgsi_vs_window_space_position(ctx); null_sampler_view(ctx, TGSI_TEXTURE_2D); null_sampler_view(ctx, TGSI_TEXTURE_BUFFER); diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 151afb2dffe..91fdb43cfbb 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -278,7 +278,9 @@ The integer capabilities: in the shader. * ``PIPE_CAP_SHAREABLE_SHADERS``: Whether shader CSOs can be used by any pipe_context. - +* ``PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS``: + Whether copying between compressed and plain formats is supported where + a compressed block is copied to/from a plain pixel of the same size. .. _pipe_capf: diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index a9498835011..3906c9b996e 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -81,7 +81,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ info->restart_index : 0xffffffff); - if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex && + if (ctx->rasterizer->point_size_per_vertex && (info->mode == PIPE_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE; @@ -137,7 +137,7 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .key = { /* do binning pass first: */ .binning_pass = true, - .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, + .color_two_side = ctx->rasterizer->light_twoside, // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), @@ -149,9 +149,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .fsaturate_t = fd3_ctx->fsaturate_t, .fsaturate_r = fd3_ctx->fsaturate_r, }, - .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, - .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0, - .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false, + .rasterflat = ctx->rasterizer->flatshade, + .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, + .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, }; unsigned dirty; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 411f5b76329..8f9c8b0623c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -627,7 +627,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, ctx->prog.dirty = 0; } - if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) { + if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend); uint32_t i; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index 025753c037e..7bd5163529a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -118,12 +118,12 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .key = { /* do binning pass first: */ .binning_pass = true, - .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, - .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, + .color_two_side = ctx->rasterizer->light_twoside, + .rasterflat = ctx->rasterizer->flatshade, // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), - .ucp_enables = ctx->rasterizer ? ctx->rasterizer->clip_plane_enable : 0, + .ucp_enables = ctx->rasterizer->clip_plane_enable, .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate), .vsaturate_s = fd4_ctx->vsaturate_s, .vsaturate_t = fd4_ctx->vsaturate_t, @@ -132,9 +132,9 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .fsaturate_t = fd4_ctx->fsaturate_t, .fsaturate_r = fd4_ctx->fsaturate_r, }, - .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, - .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false, - .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false, + .rasterflat = ctx->rasterizer->flatshade, + .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, + .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, }; unsigned dirty; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index c7ed1d2e379..cf5dd7b0f17 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -594,7 +594,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, ctx->prog.dirty = 0; } - if ((dirty & FD_DIRTY_BLEND) && ctx->blend) { + if ((dirty & FD_DIRTY_BLEND)) { struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend); uint32_t i; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 50d140fe903..9f8c33263fb 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -238,6 +238,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; case PIPE_CAP_MAX_VIEWPORTS: diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 5812af626cb..2d2fd375656 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -252,6 +252,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 5efe9da2d22..2e9470e66e9 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -202,14 +202,16 @@ static inline void gen6_3DSTATE_WM(struct ilo_builder *builder, const struct ilo_state_raster *rs, const struct ilo_state_ps *ps, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 6, 6); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); dw[1] = kernel_offset; @@ -221,6 +223,11 @@ gen6_3DSTATE_WM(struct ilo_builder *builder, dw[6] = rs->wm[2] | ps->ps[4]; dw[7] = 0; /* kernel 1 */ dw[8] = 0; /* kernel 2 */ + + if (ilo_state_ps_get_scratch_size(ps)) { + ilo_builder_batch_reloc(builder, pos + 2, scratch_bo, + ps->ps[0], 0); + } } static inline void @@ -329,14 +336,16 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder) static inline void gen7_3DSTATE_PS(struct ilo_builder *builder, const struct ilo_state_ps *ps, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 8; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -347,19 +356,26 @@ gen7_3DSTATE_PS(struct ilo_builder *builder, dw[5] = ps->ps[5]; dw[6] = 0; /* kernel 1 */ dw[7] = 0; /* kernel 2 */ + + if (ilo_state_ps_get_scratch_size(ps)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + ps->ps[3], 0); + } } static inline void gen8_3DSTATE_PS(struct ilo_builder *builder, const struct ilo_state_ps *ps, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 12; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -374,6 +390,11 @@ gen8_3DSTATE_PS(struct ilo_builder *builder, dw[9] = 0; dw[10] = 0; /* kernel 2 */ dw[11] = 0; + + if (ilo_state_ps_get_scratch_size(ps)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + ps->ps[1], 0); + } } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 6e94fb25f1f..3a448719c15 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -477,14 +477,16 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, static inline void gen6_3DSTATE_VS(struct ilo_builder *builder, const struct ilo_state_vs *vs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 6; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 6, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -493,19 +495,26 @@ gen6_3DSTATE_VS(struct ilo_builder *builder, dw[3] = vs->vs[1]; dw[4] = vs->vs[2]; dw[5] = vs->vs[3]; + + if (ilo_state_vs_get_scratch_size(vs)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + vs->vs[1], 0); + } } static inline void gen8_3DSTATE_VS(struct ilo_builder *builder, const struct ilo_state_vs *vs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -517,19 +526,26 @@ gen8_3DSTATE_VS(struct ilo_builder *builder, dw[6] = vs->vs[2]; dw[7] = vs->vs[3]; dw[8] = vs->vs[4]; + + if (ilo_state_vs_get_scratch_size(vs)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + vs->vs[1], 0); + } } static inline void gen7_3DSTATE_HS(struct ilo_builder *builder, const struct ilo_state_hs *hs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 7; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2); /* see hs_set_gen7_3DSTATE_HS() */ @@ -539,19 +555,26 @@ gen7_3DSTATE_HS(struct ilo_builder *builder, dw[4] = hs->hs[2]; dw[5] = hs->hs[3]; dw[6] = 0; + + if (ilo_state_hs_get_scratch_size(hs)) { + ilo_builder_batch_reloc(builder, pos + 4, scratch_bo, + hs->hs[2], 0); + } } static inline void gen8_3DSTATE_HS(struct ilo_builder *builder, const struct ilo_state_hs *hs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2); /* see hs_set_gen7_3DSTATE_HS() */ @@ -563,6 +586,11 @@ gen8_3DSTATE_HS(struct ilo_builder *builder, dw[6] = 0; dw[7] = hs->hs[3]; dw[8] = 0; + + if (ilo_state_hs_get_scratch_size(hs)) { + ilo_builder_batch_reloc64(builder, pos + 5, scratch_bo, + hs->hs[2], 0); + } } static inline void @@ -586,14 +614,16 @@ gen7_3DSTATE_TE(struct ilo_builder *builder, static inline void gen7_3DSTATE_DS(struct ilo_builder *builder, const struct ilo_state_ds *ds, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 6; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2); /* see ds_set_gen7_3DSTATE_DS() */ @@ -602,19 +632,26 @@ gen7_3DSTATE_DS(struct ilo_builder *builder, dw[3] = ds->ds[1]; dw[4] = ds->ds[2]; dw[5] = ds->ds[3]; + + if (ilo_state_ds_get_scratch_size(ds)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + ds->ds[1], 0); + } } static inline void gen8_3DSTATE_DS(struct ilo_builder *builder, const struct ilo_state_ds *ds, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2); /* see ds_set_gen7_3DSTATE_DS() */ @@ -626,19 +663,26 @@ gen8_3DSTATE_DS(struct ilo_builder *builder, dw[6] = ds->ds[2]; dw[7] = ds->ds[3]; dw[8] = ds->ds[4]; + + if (ilo_state_ds_get_scratch_size(ds)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + ds->ds[1], 0); + } } static inline void gen6_3DSTATE_GS(struct ilo_builder *builder, const struct ilo_state_gs *gs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 7; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 6, 6); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -648,6 +692,11 @@ gen6_3DSTATE_GS(struct ilo_builder *builder, dw[4] = gs->gs[2]; dw[5] = gs->gs[3]; dw[6] = gs->gs[4]; + + if (ilo_state_gs_get_scratch_size(gs)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + gs->gs[1], 0); + } } static inline void @@ -677,14 +726,16 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder, static inline void gen7_3DSTATE_GS(struct ilo_builder *builder, const struct ilo_state_gs *gs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 7; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -694,19 +745,26 @@ gen7_3DSTATE_GS(struct ilo_builder *builder, dw[4] = gs->gs[2]; dw[5] = gs->gs[3]; dw[6] = 0; + + if (ilo_state_gs_get_scratch_size(gs)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + gs->gs[1], 0); + } } static inline void gen8_3DSTATE_GS(struct ilo_builder *builder, const struct ilo_state_gs *gs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 10; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -719,6 +777,11 @@ gen8_3DSTATE_GS(struct ilo_builder *builder, dw[7] = gs->gs[3]; dw[8] = 0; dw[9] = gs->gs[4]; + + if (ilo_state_gs_get_scratch_size(gs)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + gs->gs[1], 0); + } } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c index a5fe5e1a6b0..ba3ff9001e1 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_compute.c +++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c @@ -158,7 +158,8 @@ compute_interface_get_gen6_read_end(const struct ilo_dev *dev, */ assert(per_thread_read <= 63); - /* From the Haswell PRM, volume 2d, page 199: + /* + * From the Haswell PRM, volume 2d, page 199: * * "(Cross-Thread Constant Data Read Length) [0,127]" */ @@ -210,38 +211,68 @@ compute_validate_gen6(const struct ilo_dev *dev, return true; } -static uint8_t -compute_get_gen6_scratch_space(const struct ilo_dev *dev, - const struct ilo_state_compute_info *info) +static uint32_t +compute_get_gen6_per_thread_scratch_size(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info, + uint8_t *per_thread_space) { - uint32_t scratch_size = 0; - uint8_t i; + ILO_DEV_ASSERT(dev, 6, 7); - ILO_DEV_ASSERT(dev, 6, 8); + /* + * From the Sandy Bridge PRM, volume 2 part 2, page 30: + * + * "(Per Thread Scratch Space) + * Range = [0,11] indicating [1k bytes, 12k bytes] [DevSNB]" + */ + assert(info->per_thread_scratch_size <= 12 * 1024); - for (i = 0; i < info->interface_count; i++) { - if (scratch_size < info->interfaces[i].scratch_size) - scratch_size = info->interfaces[i].scratch_size; + if (!info->per_thread_scratch_size) { + *per_thread_space = 0; + return 0; } - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - assert(scratch_size <= 2 * 1024 * 1024); + *per_thread_space = (info->per_thread_scratch_size > 1024) ? + (info->per_thread_scratch_size - 1) / 1024 : 0; + + return 1024 * (1 + *per_thread_space); +} - /* next power of two, starting from 1KB */ - return (scratch_size > 1024) ? - (util_last_bit(scratch_size - 1) - 10): 0; - } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { - assert(scratch_size <= 2 * 1024 * 1024); +static uint32_t +compute_get_gen75_per_thread_scratch_size(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info, + uint8_t *per_thread_space) +{ + ILO_DEV_ASSERT(dev, 7.5, 8); - /* next power of two, starting from 2KB */ - return (scratch_size > 2048) ? - (util_last_bit(scratch_size - 1) - 11): 0; - } else { - assert(scratch_size <= 12 * 1024); + /* + * From the Haswell PRM, volume 2b, page 407: + * + * "(Per Thread Scratch Space) + * [0,10] Indicating [2k bytes, 2 Mbytes]" + * + * "Note: The scratch space should be declared as 2x the desired + * scratch space. The stack will start at the half-way point instead + * of the end. The upper half of scratch space will not be accessed + * and so does not have to be allocated in memory." + * + * From the Broadwell PRM, volume 2a, page 450: + * + * "(Per Thread Scratch Space) + * [0,11] indicating [1k bytes, 2 Mbytes]" + */ + assert(info->per_thread_scratch_size <= + ((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 2 : 1) * 1024 * 1024); - return (scratch_size > 1024) ? - (scratch_size - 1) / 1024 : 0; + if (!info->per_thread_scratch_size) { + *per_thread_space = 0; + return 0; } + + /* next power of two, starting from 1KB */ + *per_thread_space = (info->per_thread_scratch_size > 1024) ? + (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0; + + return 1 << (10 + *per_thread_space); } static bool @@ -250,7 +281,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, const struct ilo_state_compute_info *info) { struct compute_urb_configuration urb; - uint8_t scratch_space; + uint32_t per_thread_size; + uint8_t per_thread_space; uint32_t dw1, dw2, dw4; @@ -260,9 +292,16 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, !compute_validate_gen6(dev, info, &urb)) return false; - scratch_space = compute_get_gen6_scratch_space(dev, info); + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + per_thread_size = compute_get_gen75_per_thread_scratch_size(dev, + info, &per_thread_space); + } else { + per_thread_size = compute_get_gen6_per_thread_scratch_size(dev, + info, &per_thread_space); + } + + dw1 = per_thread_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT; - dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT; dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT | urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT | GEN6_VFE_DW2_RESET_GATEWAY_TIMER | @@ -281,6 +320,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, compute->vfe[1] = dw2; compute->vfe[2] = dw4; + compute->scratch_size = per_thread_size * dev->thread_count; + return true; } diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h index 346f7b617f4..bd56bba4369 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_compute.h +++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h @@ -45,8 +45,6 @@ struct ilo_state_compute_interface_info { /* usually 0 unless there are multiple interfaces */ uint32_t kernel_offset; - uint32_t scratch_size; - uint8_t sampler_count; uint8_t surface_count; @@ -65,6 +63,8 @@ struct ilo_state_compute_info { const struct ilo_state_compute_interface_info *interfaces; uint8_t interface_count; + uint32_t per_thread_scratch_size; + uint32_t cv_urb_alloc_size; uint32_t curbe_alloc_size; }; @@ -74,6 +74,8 @@ struct ilo_state_compute { uint32_t (*idrt)[6]; uint8_t idrt_count; + + uint32_t scratch_size; }; static inline size_t @@ -89,4 +91,10 @@ ilo_state_compute_init(struct ilo_state_compute *compute, const struct ilo_dev *dev, const struct ilo_state_compute_info *info); +static inline uint32_t +ilo_state_compute_get_scratch_size(const struct ilo_state_compute *compute) +{ + return compute->scratch_size; +} + #endif /* ILO_STATE_COMPUTE_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.c b/src/gallium/drivers/ilo/core/ilo_state_shader.c index f67326c7f10..aec4fd6d8a6 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader.c +++ b/src/gallium/drivers/ilo/core/ilo_state_shader.c @@ -37,7 +37,9 @@ enum vertex_stage { struct vertex_ff { uint8_t grf_start; - uint8_t scratch_space; + + uint8_t per_thread_scratch_space; + uint32_t per_thread_scratch_size; uint8_t sampler_count; uint8_t surface_count; @@ -59,13 +61,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev, * others. */ const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32; - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 134: - * - * "(Per-Thread Scratch Space) - * Range [0,11] indicating [1K Bytes, 2M Bytes]" - */ - const uint32_t max_scratch_size = 2 * 1024 * 1024; ILO_DEV_ASSERT(dev, 6, 8); @@ -73,7 +68,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev, assert(!kernel->offset); assert(kernel->grf_start < max_grf_start); - assert(kernel->scratch_size <= max_scratch_size); return true; } @@ -112,18 +106,33 @@ vertex_get_gen6_ff(const struct ilo_dev *dev, const struct ilo_state_shader_kernel_info *kernel, const struct ilo_state_shader_resource_info *resource, const struct ilo_state_shader_urb_info *urb, + uint32_t per_thread_scratch_size, struct vertex_ff *ff) { ILO_DEV_ASSERT(dev, 6, 8); + memset(ff, 0, sizeof(*ff)); + if (!vertex_validate_gen6_kernel(dev, stage, kernel) || !vertex_validate_gen6_urb(dev, stage, urb)) return false; ff->grf_start = kernel->grf_start; - /* next power of two, starting from 1KB */ - ff->scratch_space = (kernel->scratch_size > 1024) ? - (util_last_bit(kernel->scratch_size - 1) - 10): 0; + + if (per_thread_scratch_size) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 134: + * + * "(Per-Thread Scratch Space) + * Range [0,11] indicating [1K Bytes, 2M Bytes]" + */ + assert(per_thread_scratch_size <= 2 * 1024 * 1024); + + /* next power of two, starting from 1KB */ + ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ? + (util_last_bit(per_thread_scratch_size - 1) - 10) : 0; + ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space); + } ff->sampler_count = (resource->sampler_count <= 12) ? (resource->sampler_count + 3) / 4 : 4; @@ -192,8 +201,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs, ILO_DEV_ASSERT(dev, 6, 8); - if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, - &info->resource, &info->urb, &ff)) + if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, &ff)) return false; thread_count = vs_get_gen6_thread_count(dev, info); @@ -207,7 +216,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs, if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) dw2 |= GEN75_THREADDISP_ACCESS_UAV; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT | ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | @@ -234,6 +244,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs, if (ilo_dev_gen(dev) >= ILO_GEN(8)) vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT; + vs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -273,8 +285,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs, ILO_DEV_ASSERT(dev, 7, 8); - if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, - &info->resource, &info->urb, &ff)) + if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, &ff)) return false; thread_count = hs_get_gen7_thread_count(dev, info); @@ -282,19 +294,22 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs, dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; - if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + dw2 |= thread_count << GEN8_HS_DW2_MAX_THREADS__SHIFT; + else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT; else dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT; - dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT; - if (info->dispatch_enable) dw2 |= GEN7_HS_DW2_HS_ENABLE; if (info->stats_enable) dw2 |= GEN7_HS_DW2_STATISTICS; - dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw4 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES | ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT | @@ -310,6 +325,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs, hs->hs[2] = dw4; hs->hs[3] = dw5; + hs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -373,8 +390,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds, ILO_DEV_ASSERT(dev, 7, 8); - if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, - &info->resource, &info->urb, &ff)) + if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, &ff)) return false; thread_count = ds_get_gen7_thread_count(dev, info); @@ -385,7 +402,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds, if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) dw2 |= GEN75_THREADDISP_ACCESS_UAV; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT | ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT | @@ -412,6 +430,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds, if (ilo_dev_gen(dev) >= ILO_GEN(8)) ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT; + ds->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -425,8 +445,8 @@ gs_get_gen6_ff(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 6, 8); - if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, - &info->resource, &info->urb, ff)) + if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, ff)) return false; /* @@ -510,7 +530,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs, ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT | ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT | @@ -550,6 +571,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs, gs->gs[3] = dw5; gs->gs[4] = dw6; + gs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -588,7 +611,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs, if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) dw2 |= GEN75_THREADDISP_ACCESS_UAV; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT | 0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT | @@ -618,6 +642,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs, if (ilo_dev_gen(dev) >= ILO_GEN(8)) gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT; + gs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h index 44690c5b0bb..35651090d66 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader.h +++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h @@ -42,8 +42,6 @@ struct ilo_state_shader_kernel_info { uint8_t grf_start; uint8_t pcb_attr_count; - - uint32_t scratch_size; }; /** @@ -77,6 +75,7 @@ struct ilo_state_vs_info { struct ilo_state_shader_resource_info resource; struct ilo_state_shader_urb_info urb; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -86,6 +85,7 @@ struct ilo_state_hs_info { struct ilo_state_shader_resource_info resource; struct ilo_state_shader_urb_info urb; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -95,6 +95,7 @@ struct ilo_state_ds_info { struct ilo_state_shader_resource_info resource; struct ilo_state_shader_urb_info urb; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -119,6 +120,7 @@ struct ilo_state_gs_info { struct ilo_state_gs_sol_info sol; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -158,6 +160,8 @@ struct ilo_state_ps_info { struct ilo_state_ps_io_info io; struct ilo_state_ps_params_info params; + uint32_t per_thread_scratch_size; + /* bitmask of GEN6_PS_DISPATCH_x */ uint8_t valid_kernels; bool per_sample_dispatch; @@ -173,23 +177,28 @@ struct ilo_state_ps_info { struct ilo_state_vs { uint32_t vs[5]; + uint32_t scratch_size; }; struct ilo_state_hs { uint32_t hs[4]; + uint32_t scratch_size; }; struct ilo_state_ds { uint32_t te[3]; uint32_t ds[5]; + uint32_t scratch_size; }; struct ilo_state_gs { uint32_t gs[5]; + uint32_t scratch_size; }; struct ilo_state_ps { uint32_t ps[8]; + uint32_t scratch_size; struct ilo_state_ps_dispatch_conds { bool ps_valid; @@ -211,6 +220,12 @@ bool ilo_state_vs_init_disabled(struct ilo_state_vs *vs, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_vs_get_scratch_size(const struct ilo_state_vs *vs) +{ + return vs->scratch_size; +} + bool ilo_state_hs_init(struct ilo_state_hs *hs, const struct ilo_dev *dev, @@ -221,6 +236,12 @@ ilo_state_hs_init_disabled(struct ilo_state_hs *hs, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_hs_get_scratch_size(const struct ilo_state_hs *hs) +{ + return hs->scratch_size; +} + bool ilo_state_ds_init(struct ilo_state_ds *ds, const struct ilo_dev *dev, @@ -230,6 +251,12 @@ bool ilo_state_ds_init_disabled(struct ilo_state_ds *ds, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_ds_get_scratch_size(const struct ilo_state_ds *ds) +{ + return ds->scratch_size; +} + bool ilo_state_gs_init(struct ilo_state_gs *gs, const struct ilo_dev *dev, @@ -239,6 +266,12 @@ bool ilo_state_gs_init_disabled(struct ilo_state_gs *gs, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_gs_get_scratch_size(const struct ilo_state_gs *gs) +{ + return gs->scratch_size; +} + bool ilo_state_ps_init(struct ilo_state_ps *ps, const struct ilo_dev *dev, @@ -253,4 +286,10 @@ ilo_state_ps_set_params(struct ilo_state_ps *ps, const struct ilo_dev *dev, const struct ilo_state_ps_params_info *params); +static inline uint32_t +ilo_state_ps_get_scratch_size(const struct ilo_state_ps *ps) +{ + return ps->scratch_size; +} + #endif /* ILO_STATE_SHADER_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c index ceeb68a460e..5c3ca1ebe37 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c +++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c @@ -34,7 +34,8 @@ struct pixel_ff { uint32_t kernel_offsets[3]; uint8_t grf_starts[3]; bool pcb_enable; - uint8_t scratch_space; + uint8_t per_thread_scratch_space; + uint32_t per_thread_scratch_size; uint8_t sampler_count; uint8_t surface_count; @@ -56,13 +57,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev, { /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */ const uint8_t max_grf_start = 128; - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 271: - * - * "(Per-Thread Scratch Space) - * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two" - */ - const uint32_t max_scratch_size = 2 * 1024 * 1024; ILO_DEV_ASSERT(dev, 6, 8); @@ -70,7 +64,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev, assert(kernel->offset % 64 == 0); assert(kernel->grf_start < max_grf_start); - assert(kernel->scratch_size <= max_scratch_size); return true; } @@ -325,7 +318,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev, const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; - uint32_t scratch_size; ILO_DEV_ASSERT(dev, 6, 8); @@ -363,21 +355,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev, ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && kernel_32->pcb_attr_count)); - scratch_size = 0; - if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && - scratch_size < kernel_8->scratch_size) - scratch_size = kernel_8->scratch_size; - if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && - scratch_size < kernel_16->scratch_size) - scratch_size = kernel_16->scratch_size; - if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && - scratch_size < kernel_32->scratch_size) - scratch_size = kernel_32->scratch_size; - - /* next power of two, starting from 1KB */ - ff->scratch_space = (scratch_size > 1024) ? - (util_last_bit(scratch_size - 1) - 10): 0; - /* GPU hangs on Haswell if none of the dispatch mode bits is set */ if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes) ff->dispatch_modes |= GEN6_PS_DISPATCH_8; @@ -401,6 +378,21 @@ ps_get_gen6_ff(const struct ilo_dev *dev, if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff)) return false; + if (info->per_thread_scratch_size) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 271: + * + * "(Per-Thread Scratch Space) + * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two" + */ + assert(info->per_thread_scratch_size <= 2 * 1024 * 1024); + + /* next power of two, starting from 1KB */ + ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ? + (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0; + ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space); + } + ff->sampler_count = (resource->sampler_count <= 12) ? (resource->sampler_count + 3) / 4 : 4; ff->surface_count = resource->surface_count; @@ -441,7 +433,8 @@ ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps, if (false) dw2 |= GEN6_THREADDISP_FP_MODE_ALT; - dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff->per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT | ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT | @@ -539,7 +532,8 @@ ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps, if (false) dw2 |= GEN6_THREADDISP_FP_MODE_ALT; - dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff->per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT | ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; @@ -603,7 +597,8 @@ ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps, if (false) dw3 |= GEN6_THREADDISP_FP_MODE_ALT; - dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw4 = ff->per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT | io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT | @@ -705,6 +700,7 @@ ilo_state_ps_init(struct ilo_state_ps *ps, ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff); } + ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count; /* save conditions */ ps->conds = ff.conds; diff --git a/src/gallium/drivers/ilo/ilo_blit.h b/src/gallium/drivers/ilo/ilo_blit.h index da0bfe9c4c9..bad4dab8404 100644 --- a/src/gallium/drivers/ilo/ilo_blit.h +++ b/src/gallium/drivers/ilo/ilo_blit.h @@ -58,10 +58,12 @@ ilo_blit_resolve_slices(struct ilo_context *ilo, * As it is only used to resolve HiZ right now, return early when there is * no HiZ. */ - if (!ilo_image_can_enable_aux(&tex->image, level)) + if (tex->image.aux.type != ILO_IMAGE_AUX_HIZ || + !ilo_image_can_enable_aux(&tex->image, level)) return; - if (ilo_image_can_enable_aux(&tex->image, level)) { + if (tex->image.aux.type == ILO_IMAGE_AUX_HIZ && + ilo_image_can_enable_aux(&tex->image, level)) { ilo_blit_resolve_slices_for_hiz(ilo, res, level, first_slice, num_slices, resolve_flags); } diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c index 433348d9326..69f36ae5df6 100644 --- a/src/gallium/drivers/ilo/ilo_draw.c +++ b/src/gallium/drivers/ilo/ilo_draw.c @@ -547,6 +547,7 @@ static void ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct ilo_context *ilo = ilo_context(pipe); + int vs_scratch_size, gs_scratch_size, fs_scratch_size; if (ilo_debug & ILO_DEBUG_DRAW) { if (info->indexed) { @@ -574,8 +575,15 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) ilo_finalize_3d_states(ilo, info); + /* upload kernels */ ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder); + /* prepare scratch spaces */ + ilo_shader_cache_get_max_scratch_sizes(ilo->shader_cache, + &vs_scratch_size, &gs_scratch_size, &fs_scratch_size); + ilo_render_prepare_scratch_spaces(ilo->render, + vs_scratch_size, gs_scratch_size, fs_scratch_size); + ilo_blit_resolve_framebuffer(ilo); /* If draw_vbo ever fails, return immediately. */ diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index 21f75de11a0..8bc04df4fab 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -67,10 +67,49 @@ ilo_render_create(struct ilo_builder *builder) void ilo_render_destroy(struct ilo_render *render) { + intel_bo_unref(render->vs_scratch.bo); + intel_bo_unref(render->gs_scratch.bo); + intel_bo_unref(render->fs_scratch.bo); + intel_bo_unref(render->workaround_bo); FREE(render); } +static bool +resize_scratch_space(struct ilo_render *render, + struct ilo_render_scratch_space *scratch, + const char *name, int new_size) +{ + struct intel_bo *bo; + + if (scratch->size >= new_size) + return true; + + bo = intel_winsys_alloc_bo(render->builder->winsys, name, new_size, false); + if (!bo) + return false; + + intel_bo_unref(scratch->bo); + scratch->bo = bo; + scratch->size = new_size; + + return true; +} + +bool +ilo_render_prepare_scratch_spaces(struct ilo_render *render, + int vs_scratch_size, + int gs_scratch_size, + int fs_scratch_size) +{ + return (resize_scratch_space(render, &render->vs_scratch, + "vs scratch", vs_scratch_size) && + resize_scratch_space(render, &render->gs_scratch, + "gs scratch", gs_scratch_size) && + resize_scratch_space(render, &render->fs_scratch, + "fs scratch", fs_scratch_size)); +} + void ilo_render_get_sample_position(const struct ilo_render *render, unsigned sample_count, diff --git a/src/gallium/drivers/ilo/ilo_render.h b/src/gallium/drivers/ilo/ilo_render.h index 098af73ec9b..31fd1e6f859 100644 --- a/src/gallium/drivers/ilo/ilo_render.h +++ b/src/gallium/drivers/ilo/ilo_render.h @@ -43,6 +43,12 @@ ilo_render_create(struct ilo_builder *builder); void ilo_render_destroy(struct ilo_render *render); +bool +ilo_render_prepare_scratch_spaces(struct ilo_render *render, + int vs_scratch_size, + int gs_scratch_size, + int fs_scratch_size); + void ilo_render_get_sample_position(const struct ilo_render *render, unsigned sample_count, diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index 6b133750043..f227d6bf4da 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -51,6 +51,11 @@ struct ilo_render { struct intel_bo *workaround_bo; + struct ilo_render_scratch_space { + struct intel_bo *bo; + int size; + } vs_scratch, gs_scratch, fs_scratch; + struct ilo_state_sample_pattern sample_pattern; bool hw_ctx_changed; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index c1f759f3043..910e6c0fb7a 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -475,10 +475,13 @@ gen6_draw_vs(struct ilo_render *r, gen6_wa_pre_3dstate_vs_toggle(r); if (ilo_dev_gen(r->dev) == ILO_GEN(6) && - ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) - gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset); - else - gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) { + gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, + kernel_offset, r->vs_scratch.bo); + } else { + gen6_3DSTATE_VS(r->builder, &cso->vs, + kernel_offset, r->vs_scratch.bo); + } } } @@ -501,7 +504,8 @@ gen6_draw_gs(struct ilo_render *r, cso = ilo_shader_get_kernel_cso(vec->gs); kernel_offset = ilo_shader_get_kernel_offset(vec->gs); - gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset); + gen6_3DSTATE_GS(r->builder, &cso->gs, + kernel_offset, r->gs_scratch.bo); } else if (ilo_dev_gen(r->dev) == ILO_GEN(6) && ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) { const int verts_per_prim = @@ -524,9 +528,10 @@ gen6_draw_gs(struct ilo_render *r, kernel_offset = ilo_shader_get_kernel_offset(vec->vs) + ilo_shader_get_kernel_param(vec->vs, param); - gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset); + gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, + kernel_offset, r->gs_scratch.bo); } else { - gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0); + gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL); } } } @@ -672,7 +677,7 @@ gen6_draw_wm(struct ilo_render *r, gen6_wa_pre_3dstate_wm_max_threads(r); gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs, - &cso->ps, kernel_offset); + &cso->ps, kernel_offset, r->fs_scratch.bo); } } @@ -817,10 +822,10 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r, gen6_wa_post_3dstate_constant_vs(r); gen6_wa_pre_3dstate_vs_toggle(r); - gen6_3DSTATE_VS(r->builder, &blitter->vs, 0); + gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL); gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); - gen6_3DSTATE_GS(r->builder, &blitter->gs, 0); + gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL); gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs); gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe); @@ -833,7 +838,7 @@ gen6_rectlist_wm(struct ilo_render *r, gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0); + gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 6623a8bcb43..330ba6c88d6 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -318,10 +318,13 @@ gen7_draw_vs(struct ilo_render *r, const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs); const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs); - if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); - else - gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); + if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) { + gen8_3DSTATE_VS(r->builder, &cso->vs, + kernel_offset, r->vs_scratch.bo); + } else { + gen6_3DSTATE_VS(r->builder, &cso->vs, + kernel_offset, r->vs_scratch.bo); + } } } @@ -338,9 +341,9 @@ gen7_draw_hs(struct ilo_render *r, gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_HS(r->builder, hs, kernel_offset); + gen8_3DSTATE_HS(r->builder, hs, kernel_offset, NULL); else - gen7_3DSTATE_HS(r->builder, hs, kernel_offset); + gen7_3DSTATE_HS(r->builder, hs, kernel_offset, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_HS */ @@ -373,9 +376,9 @@ gen7_draw_ds(struct ilo_render *r, gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_DS(r->builder, ds, kernel_offset); + gen8_3DSTATE_DS(r->builder, ds, kernel_offset, NULL); else - gen7_3DSTATE_DS(r->builder, ds, kernel_offset); + gen7_3DSTATE_DS(r->builder, ds, kernel_offset, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_DS */ @@ -397,9 +400,9 @@ gen7_draw_gs(struct ilo_render *r, gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_GS(r->builder, gs, kernel_offset); + gen8_3DSTATE_GS(r->builder, gs, kernel_offset, NULL); else - gen7_3DSTATE_GS(r->builder, gs, kernel_offset); + gen7_3DSTATE_GS(r->builder, gs, kernel_offset, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_GS */ @@ -534,7 +537,7 @@ gen7_draw_wm(struct ilo_render *r, if (r->hw_ctx_changed) gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset); + gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo); } /* 3DSTATE_SCISSOR_STATE_POINTERS */ @@ -678,18 +681,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r, const struct ilo_blitter *blitter) { gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0); - gen6_3DSTATE_VS(r->builder, &blitter->vs, 0); + gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL); gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_HS(r->builder, &blitter->hs, 0); + gen7_3DSTATE_HS(r->builder, &blitter->hs, 0, NULL); gen7_3DSTATE_TE(r->builder, &blitter->ds); gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_DS(r->builder, &blitter->ds, 0); + gen7_3DSTATE_DS(r->builder, &blitter->ds, 0, NULL); gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_GS(r->builder, &blitter->gs, 0); + gen7_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL); gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol); @@ -711,7 +714,7 @@ gen7_rectlist_wm(struct ilo_render *r, gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_3DSTATE_PS(r->builder, &blitter->ps, 0); + gen7_3DSTATE_PS(r->builder, &blitter->ps, 0, NULL); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 65494b4058a..efe0e0d501b 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -125,7 +125,7 @@ gen8_draw_wm(struct ilo_render *r, /* 3DSTATE_PS */ if (DIRTY(FS) || r->instruction_bo_changed) - gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset); + gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo); /* 3DSTATE_PS_EXTRA */ if (DIRTY(FS)) diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index e1a7dc56685..888f7aa6782 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -474,6 +474,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 73b625e9de4..c61716dc791 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -37,6 +37,10 @@ struct ilo_shader_cache { struct list_head shaders; struct list_head changed; + + int max_vs_scratch_size; + int max_gs_scratch_size; + int max_fs_scratch_size; }; /** @@ -121,6 +125,8 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc, struct ilo_shader *sh; LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) { + int scratch_size, *cur_max; + if (sh->uploaded) continue; @@ -128,6 +134,29 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc, sh->kernel_size, sh->kernel); sh->uploaded = true; + + switch (shader->info.type) { + case PIPE_SHADER_VERTEX: + scratch_size = ilo_state_vs_get_scratch_size(&sh->cso.vs); + cur_max = &shc->max_vs_scratch_size; + break; + case PIPE_SHADER_GEOMETRY: + scratch_size = ilo_state_gs_get_scratch_size(&sh->cso.gs); + cur_max = &shc->max_gs_scratch_size; + break; + case PIPE_SHADER_FRAGMENT: + scratch_size = ilo_state_ps_get_scratch_size(&sh->cso.ps); + cur_max = &shc->max_fs_scratch_size; + break; + default: + assert(!"unknown shader type"); + scratch_size = 0; + cur_max = &shc->max_vs_scratch_size; + break; + } + + if (*cur_max < scratch_size) + *cur_max = scratch_size; } list_del(&shader->list); @@ -155,6 +184,21 @@ ilo_shader_cache_invalidate(struct ilo_shader_cache *shc) LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) sh->uploaded = false; } + + shc->max_vs_scratch_size = 0; + shc->max_gs_scratch_size = 0; + shc->max_fs_scratch_size = 0; +} + +void +ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc, + int *vs_scratch_size, + int *gs_scratch_size, + int *fs_scratch_size) +{ + *vs_scratch_size = shc->max_vs_scratch_size; + *gs_scratch_size = shc->max_gs_scratch_size; + *fs_scratch_size = shc->max_fs_scratch_size; } /** @@ -578,7 +622,6 @@ init_shader_kernel(const struct ilo_shader *kernel, kern->grf_start = kernel->in.start_grf; kern->pcb_attr_count = (kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16; - kern->scratch_size = 0; } static void @@ -602,6 +645,7 @@ init_vs(struct ilo_shader *kernel, init_shader_urb(kernel, state, &info.urb); init_shader_kernel(kernel, state, &info.kernel); init_shader_resource(kernel, state, &info.resource); + info.per_thread_scratch_size = kernel->per_thread_scratch_size; info.dispatch_enable = true; info.stats_enable = true; @@ -640,6 +684,7 @@ init_gs(struct ilo_shader *kernel, init_shader_urb(kernel, state, &info.urb); init_shader_kernel(kernel, state, &info.kernel); init_shader_resource(kernel, state, &info.resource); + info.per_thread_scratch_size = kernel->per_thread_scratch_size; info.dispatch_enable = true; info.stats_enable = true; @@ -664,6 +709,7 @@ init_ps(struct ilo_shader *kernel, init_shader_kernel(kernel, state, &info.kernel_8); init_shader_resource(kernel, state, &info.resource); + info.per_thread_scratch_size = kernel->per_thread_scratch_size; info.io.has_rt_write = true; info.io.posoffset = GEN6_POSOFFSET_NONE; info.io.attr_count = kernel->in.count; diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h index 01de54146b1..10dcf739430 100644 --- a/src/gallium/drivers/ilo/ilo_shader.h +++ b/src/gallium/drivers/ilo/ilo_shader.h @@ -120,6 +120,12 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc, void ilo_shader_cache_invalidate(struct ilo_shader_cache *shc); +void +ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc, + int *vs_scratch_size, + int *gs_scratch_size, + int *fs_scratch_size); + struct ilo_shader_state * ilo_shader_create_vs(const struct ilo_dev *dev, const struct pipe_shader_state *state, diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index 01c86675202..1f0cda174e8 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -139,6 +139,7 @@ struct ilo_shader { void *kernel; int kernel_size; + int per_thread_scratch_size; struct ilo_kernel_routing routing; struct ilo_state_ps_params_info ps_params; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index e2ed267da78..d1c50aefc84 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -299,6 +299,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 4c8167a9e7d..1778b13f9dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -854,10 +854,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->img_stride[j] = lp_tex->img_stride[j]; } - if (view->target == PIPE_TEXTURE_1D_ARRAY || - view->target == PIPE_TEXTURE_2D_ARRAY || - view->target == PIPE_TEXTURE_CUBE || - view->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (res->target == PIPE_TEXTURE_1D_ARRAY || + res->target == PIPE_TEXTURE_2D_ARRAY || + res->target == PIPE_TEXTURE_CUBE || + res->target == PIPE_TEXTURE_CUBE_ARRAY) { /* * For array textures, we don't have first_layer, instead * adjust last_layer (stored as depth) plus the mip level offsets diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index b205f02fdba..1e055878f7c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -275,10 +275,10 @@ prepare_shader_sampling( row_stride[j] = lp_tex->row_stride[j]; img_stride[j] = lp_tex->img_stride[j]; } - if (view->target == PIPE_TEXTURE_1D_ARRAY || - view->target == PIPE_TEXTURE_2D_ARRAY || - view->target == PIPE_TEXTURE_CUBE || - view->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (tex->target == PIPE_TEXTURE_1D_ARRAY || + tex->target == PIPE_TEXTURE_2D_ARRAY || + tex->target == PIPE_TEXTURE_CUBE || + tex->target == PIPE_TEXTURE_CUBE_ARRAY) { num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1; for (j = first_level; j <= last_level; j++) { mip_offsets[j] += view->u.tex.first_layer * diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index af46342fdf2..7862ac8f217 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -200,7 +200,8 @@ llvmpipe_can_create_resource(struct pipe_screen *screen, static boolean llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, - struct llvmpipe_resource *lpr) + struct llvmpipe_resource *lpr, + const void *map_front_private) { struct sw_winsys *winsys = screen->winsys; @@ -215,12 +216,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, lpr->base.format, width, height, 64, + map_front_private, &lpr->row_stride[0] ); if (lpr->dt == NULL) return FALSE; - { + if (!map_front_private) { void *map = winsys->displaytarget_map(winsys, lpr->dt, PIPE_TRANSFER_WRITE); @@ -235,8 +237,9 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, static struct pipe_resource * -llvmpipe_resource_create(struct pipe_screen *_screen, - const struct pipe_resource *templat) +llvmpipe_resource_create_front(struct pipe_screen *_screen, + const struct pipe_resource *templat, + const void *map_front_private) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource); @@ -254,7 +257,7 @@ llvmpipe_resource_create(struct pipe_screen *_screen, PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) { /* displayable surface */ - if (!llvmpipe_displaytarget_layout(screen, lpr)) + if (!llvmpipe_displaytarget_layout(screen, lpr, map_front_private)) goto fail; } else { @@ -300,7 +303,12 @@ llvmpipe_resource_create(struct pipe_screen *_screen, FREE(lpr); return NULL; } - +static struct pipe_resource * +llvmpipe_resource_create(struct pipe_screen *_screen, + const struct pipe_resource *templat) +{ + return llvmpipe_resource_create_front(_screen, templat, NULL); +} static void llvmpipe_resource_destroy(struct pipe_screen *pscreen, @@ -797,6 +805,7 @@ llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen) #endif screen->resource_create = llvmpipe_resource_create; + screen->resource_create_front = llvmpipe_resource_create_front; screen->resource_destroy = llvmpipe_resource_destroy; screen->resource_from_handle = llvmpipe_resource_from_handle; screen->resource_get_handle = llvmpipe_resource_get_handle; diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index c18e9f5b435..83f81135590 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -73,6 +73,9 @@ NV50_C_SOURCES := \ nv50/nv50_program.h \ nv50/nv50_push.c \ nv50/nv50_query.c \ + nv50/nv50_query.h \ + nv50/nv50_query_hw.c \ + nv50/nv50_query_hw.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index cce60550ae5..6ad9dd31681 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1128,7 +1128,6 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) info->prop.gp.instanceCount = 1; info->prop.gp.maxVertices = 1; } - info->io.clipDistance = 0xff; info->io.pointSize = 0xff; info->io.instanceId = 0xff; info->io.vertexId = 0xff; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h index a610c773f55..0d544581697 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h @@ -73,8 +73,8 @@ public: Instruction *mkCvt(operation, DataType, Value *, DataType, Value *); CmpInstruction *mkCmp(operation, CondCode, DataType, - Value *, - DataType, Value *, Value *, Value * = NULL); + Value *, + DataType, Value *, Value *, Value * = NULL); TexInstruction *mkTex(operation, TexTarget, uint16_t tic, uint16_t tsc, const std::vector<Value *> &def, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 2b9edcf9172..c0cab3299b5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -99,6 +99,7 @@ struct nv50_ir_prog_info uint8_t sourceRep; /* NV50_PROGRAM_IR */ const void *source; void *relocData; + void *interpData; struct nv50_ir_prog_symbol *syms; uint16_t numSyms; } bin; @@ -143,6 +144,7 @@ struct nv50_ir_prog_info bool earlyFragTests; bool separateFragData; bool usesDiscard; + bool sampleInterp; /* perform sample interp on all fp inputs */ } fp; struct { uint32_t inputOffset; /* base address for user args */ @@ -154,9 +156,8 @@ struct nv50_ir_prog_info uint8_t numBarriers; struct { - uint8_t clipDistance; /* index of first clip distance output */ - uint8_t clipDistanceMask; /* mask of clip distances defined */ - uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */ + uint8_t clipDistances; /* number of clip distance outputs */ + uint8_t cullDistances; /* number of cull distance outputs */ int8_t genUserClip; /* request user clip planes for ClipVertex */ uint16_t ucpBase; /* base address for UCPs */ uint8_t ucpCBSlot; /* constant buffer index of UCP data */ @@ -168,7 +169,6 @@ struct nv50_ir_prog_info int8_t viewportId; /* output index of ViewportIndex */ uint8_t fragDepth; /* output index of FragDepth */ uint8_t sampleMask; /* output index of SampleMask */ - bool sampleInterp; /* perform sample interp on all fp inputs */ uint8_t backFaceColor[2]; /* input/output indices of back face colour */ uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ bool fp64; /* program uses fp64 math */ @@ -198,6 +198,10 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code, uint32_t libPos, uint32_t dataPos); +extern void +nv50_ir_change_interp(void *interpData, uint32_t *code, + bool force_per_sample, bool flatshade); + /* obtain code that will be shared among programs */ extern void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 8f1542959c9..d712c9c300a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1437,6 +1437,30 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i) code[1] |= (i->ipa & 0xc) << (19 - 2); } +static void +interpApply(const InterpEntry *entry, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + int ipa = entry->ipa; + int reg = entry->reg; + int loc = entry->loc; + + if (flatshade && + (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { + ipa = NV50_IR_INTERP_FLAT; + reg = 0xff; + } else if (force_persample_interp && + (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && + (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { + ipa |= NV50_IR_INTERP_CENTROID; + } + code[loc + 1] &= ~(0xf << 19); + code[loc + 1] |= (ipa & 0x3) << 21; + code[loc + 1] |= (ipa & 0xc) << (19 - 2); + code[loc + 0] &= ~(0xff << 23); + code[loc + 0] |= reg << 23; +} + void CodeEmitterGK110::emitINTERP(const Instruction *i) { @@ -1448,10 +1472,13 @@ CodeEmitterGK110::emitINTERP(const Instruction *i) if (i->saturate) code[1] |= 1 << 18; - if (i->op == OP_PINTERP) + if (i->op == OP_PINTERP) { srcId(i->src(1), 23); - else + addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + } else { code[0] |= 0xff << 23; + addInterp(i->ipa, 0xff, interpApply); + } srcId(i->src(0).getIndirect(0), 10); emitInterpMode(i); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 6e22788341f..a327d572470 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -2217,6 +2217,30 @@ CodeEmitterGM107::emitAL2P() emitGPR (0x00, insn->def(0)); } +static void +interpApply(const InterpEntry *entry, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + int ipa = entry->ipa; + int reg = entry->reg; + int loc = entry->loc; + + if (flatshade && + (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { + ipa = NV50_IR_INTERP_FLAT; + reg = 0xff; + } else if (force_persample_interp && + (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && + (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { + ipa |= NV50_IR_INTERP_CENTROID; + } + code[loc + 1] &= ~(0xf << 0x14); + code[loc + 1] |= (ipa & 0x3) << 0x16; + code[loc + 1] |= (ipa & 0xc) << (0x14 - 2); + code[loc + 0] &= ~(0xff << 0x14); + code[loc + 0] |= reg << 0x14; +} + void CodeEmitterGM107::emitIPA() { @@ -2255,10 +2279,12 @@ CodeEmitterGM107::emitIPA() emitGPR(0x14, insn->src(1)); if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) emitGPR(0x27, insn->src(2)); + addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply); } else { if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) emitGPR(0x27, insn->src(1)); emitGPR(0x14); + addInterp(insn->ipa, 0xff, interpApply); } if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 90147668c91..9f1e4b803d5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -372,7 +372,7 @@ CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc) mode |= 3 << (s * 2); break; default: - ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile()); + ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile()); assert(0); break; } @@ -876,6 +876,30 @@ CodeEmitterNV50::emitPFETCH(const Instruction *i) emitFlagsRd(i); } +static void +interpApply(const InterpEntry *entry, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + int ipa = entry->ipa; + int encSize = entry->reg; + int loc = entry->loc; + + if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && + (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { + if (force_persample_interp) { + if (encSize == 8) + code[loc + 1] |= 1 << 16; + else + code[loc + 0] |= 1 << 24; + } else { + if (encSize == 8) + code[loc + 1] &= ~(1 << 16); + else + code[loc + 0] &= ~(1 << 24); + } + } +} + void CodeEmitterNV50::emitINTERP(const Instruction *i) { @@ -904,6 +928,8 @@ CodeEmitterNV50::emitINTERP(const Instruction *i) code[0] |= 1; emitFlagsRd(i); } + + addInterp(i->ipa, i->encSize, interpApply); } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 6bf5219d346..fd103146c72 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1618,6 +1618,29 @@ CodeEmitterNVC0::emitInterpMode(const Instruction *i) } } +static void +interpApply(const InterpEntry *entry, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + int ipa = entry->ipa; + int reg = entry->reg; + int loc = entry->loc; + + if (flatshade && + (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { + ipa = NV50_IR_INTERP_FLAT; + reg = 0x3f; + } else if (force_persample_interp && + (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && + (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { + ipa |= NV50_IR_INTERP_CENTROID; + } + code[loc + 0] &= ~(0xf << 6); + code[loc + 0] |= ipa << 6; + code[loc + 0] &= ~(0x3f << 26); + code[loc + 0] |= reg << 26; +} + void CodeEmitterNVC0::emitINTERP(const Instruction *i) { @@ -1630,10 +1653,13 @@ CodeEmitterNVC0::emitINTERP(const Instruction *i) if (i->saturate) code[0] |= 1 << 5; - if (i->op == OP_PINTERP) + if (i->op == OP_PINTERP) { srcId(i->src(1), 26); - else + addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + } else { code[0] |= 0x3f << 26; + addInterp(i->ipa, 0x3f, interpApply); + } srcId(i->src(0).getIndirect(0), 20); } else { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index c8efaf5947a..6a7cb4224f4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -910,7 +910,7 @@ bool Source::scanSource() info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16; if (info->io.genUserClip > 0) { - info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1; + info->io.clipDistances = info->io.genUserClip; const unsigned int nOut = (info->io.genUserClip + 3) / 4; @@ -919,7 +919,7 @@ bool Source::scanSource() info->out[i].id = i; info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; info->out[i].si = n; - info->out[i].mask = info->io.clipDistanceMask >> (n * 4); + info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4); } } @@ -969,6 +969,12 @@ void Source::scanProperty(const struct tgsi_full_property *prop) else info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */ break; + case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: + info->io.clipDistances = prop->u[0].Data; + break; + case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: + info->io.cullDistances = prop->u[0].Data; + break; default: INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); break; @@ -1054,7 +1060,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) default: break; } - if (decl->Interp.Location || info->io.sampleInterp) + if (decl->Interp.Location) info->in[i].centroid = 1; } @@ -1086,8 +1092,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) clipVertexOutput = i; break; case TGSI_SEMANTIC_CLIPDIST: - info->io.clipDistanceMask |= - decl->Declaration.UsageMask << (si * 4); info->io.genUserClip = -1; break; case TGSI_SEMANTIC_SAMPLEMASK: @@ -1119,6 +1123,10 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) case TGSI_SEMANTIC_VERTEXID: info->io.vertexId = first; break; + case TGSI_SEMANTIC_SAMPLEID: + case TGSI_SEMANTIC_SAMPLEPOS: + info->prop.fp.sampleInterp = 1; + break; default: break; } @@ -1338,6 +1346,8 @@ private: void handleINTERP(Value *dst0[4]); + uint8_t translateInterpMode(const struct nv50_ir_varying *var, + operation& op); Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); @@ -1451,8 +1461,8 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) return sym; } -static inline uint8_t -translateInterpMode(const struct nv50_ir_varying *var, operation& op) +uint8_t +Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op) { uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; @@ -1468,7 +1478,7 @@ translateInterpMode(const struct nv50_ir_varying *var, operation& op) op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) ? OP_PINTERP : OP_LINTERP; - if (var->centroid) + if (var->centroid || info->prop.fp.sampleInterp) mode |= NV50_IR_INTERP_CENTROID; return mode; @@ -1628,7 +1638,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) // don't load masked inputs, won't be assigned a slot if (!ptr && !(info->in[idx].mask & (1 << swz))) return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); - if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) + if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0)); return interpolate(src, c, shiftAddress(ptr)); } else diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index fe530c76b62..afc8ff1374f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -166,7 +166,7 @@ void Target::destroy(Target *targ) delete targ; } -CodeEmitter::CodeEmitter(const Target *target) : targ(target) +CodeEmitter::CodeEmitter(const Target *target) : targ(target), interpInfo(NULL) { } @@ -388,6 +388,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info) } } info->bin.relocData = emit->getRelocInfo(); + info->bin.interpData = emit->getInterpInfo(); emitSymbolTable(info); @@ -428,6 +429,29 @@ CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, return true; } +bool +CodeEmitter::addInterp(int ipa, int reg, InterpApply apply) +{ + unsigned int n = interpInfo ? interpInfo->count : 0; + + if (!(n % RELOC_ALLOC_INCREMENT)) { + size_t size = sizeof(InterpInfo) + n * sizeof(InterpEntry); + interpInfo = reinterpret_cast<InterpInfo *>( + REALLOC(interpInfo, n ? size : 0, + size + RELOC_ALLOC_INCREMENT * sizeof(InterpEntry))); + if (!interpInfo) + return false; + if (n == 0) + memset(interpInfo, 0, sizeof(InterpInfo)); + } + ++interpInfo->count; + + interpInfo->entry[n] = InterpEntry(ipa, reg, codeSize >> 2); + interpInfo->apply = apply; + + return true; +} + void RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const { @@ -472,6 +496,19 @@ nv50_ir_relocate_code(void *relocData, uint32_t *code, } void +nv50_ir_change_interp(void *interpData, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + nv50_ir::InterpInfo *info = reinterpret_cast<nv50_ir::InterpInfo *>( + interpData); + + // force_persample_interp: all non-flat -> per-sample + // flatshade: all color -> flat + for (unsigned i = 0; i < info->count; ++i) + info->apply(&info->entry[i], code, force_persample_interp, flatshade); +} + +void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h index 591916eb412..4e33997e1c1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h @@ -58,6 +58,23 @@ struct RelocInfo RelocEntry entry[0]; }; +struct InterpEntry +{ + InterpEntry(int ipa, int reg, int loc) : ipa(ipa), reg(reg), loc(loc) {} + uint32_t ipa:4; // SC mode used to identify colors + uint32_t reg:8; // The reg used for perspective division + uint32_t loc:20; // Let's hope we don't have more than 1M-sized shaders +}; + +typedef void (*InterpApply)(const InterpEntry*, uint32_t*, bool, bool); + +struct InterpInfo +{ + uint32_t count; + InterpApply apply; + InterpEntry entry[0]; +}; + class CodeEmitter { public: @@ -78,6 +95,9 @@ public: inline void *getRelocInfo() const { return relocInfo; } + bool addInterp(int ipa, int reg, InterpApply apply); + inline void *getInterpInfo() const { return interpInfo; } + virtual void prepareEmission(Program *); virtual void prepareEmission(Function *); virtual void prepareEmission(BasicBlock *); @@ -92,6 +112,7 @@ protected: uint32_t codeSizeLimit; RelocInfo *relocInfo; + InterpInfo *interpInfo; }; diff --git a/src/gallium/drivers/nouveau/nouveau_heap.c b/src/gallium/drivers/nouveau/nouveau_heap.c index f4aa5081dfe..3d415a5f30e 100644 --- a/src/gallium/drivers/nouveau/nouveau_heap.c +++ b/src/gallium/drivers/nouveau/nouveau_heap.c @@ -29,95 +29,95 @@ int nouveau_heap_init(struct nouveau_heap **heap, unsigned start, unsigned size) { - struct nouveau_heap *r; + struct nouveau_heap *r; - r = calloc(1, sizeof(struct nouveau_heap)); - if (!r) - return 1; + r = calloc(1, sizeof(struct nouveau_heap)); + if (!r) + return 1; - r->start = start; - r->size = size; - *heap = r; - return 0; + r->start = start; + r->size = size; + *heap = r; + return 0; } void nouveau_heap_destroy(struct nouveau_heap **heap) { - if (!*heap) - return; - free(*heap); - *heap = NULL; + if (!*heap) + return; + free(*heap); + *heap = NULL; } int nouveau_heap_alloc(struct nouveau_heap *heap, unsigned size, void *priv, struct nouveau_heap **res) { - struct nouveau_heap *r; + struct nouveau_heap *r; - if (!heap || !size || !res || *res) - return 1; + if (!heap || !size || !res || *res) + return 1; - while (heap) { - if (!heap->in_use && heap->size >= size) { - r = calloc(1, sizeof(struct nouveau_heap)); - if (!r) - return 1; + while (heap) { + if (!heap->in_use && heap->size >= size) { + r = calloc(1, sizeof(struct nouveau_heap)); + if (!r) + return 1; - r->start = (heap->start + heap->size) - size; - r->size = size; - r->in_use = 1; - r->priv = priv; + r->start = (heap->start + heap->size) - size; + r->size = size; + r->in_use = 1; + r->priv = priv; - heap->size -= size; + heap->size -= size; - r->next = heap->next; - if (heap->next) - heap->next->prev = r; - r->prev = heap; - heap->next = r; + r->next = heap->next; + if (heap->next) + heap->next->prev = r; + r->prev = heap; + heap->next = r; - *res = r; - return 0; - } + *res = r; + return 0; + } - heap = heap->next; - } + heap = heap->next; + } - return 1; + return 1; } void nouveau_heap_free(struct nouveau_heap **res) { - struct nouveau_heap *r; - - if (!res || !*res) - return; - r = *res; - *res = NULL; - - r->in_use = 0; - - if (r->next && !r->next->in_use) { - struct nouveau_heap *new = r->next; - - new->prev = r->prev; - if (r->prev) - r->prev->next = new; - new->size += r->size; - new->start = r->start; - - free(r); - r = new; - } - - if (r->prev && !r->prev->in_use) { - r->prev->next = r->next; - if (r->next) - r->next->prev = r->prev; - r->prev->size += r->size; - free(r); - } + struct nouveau_heap *r; + + if (!res || !*res) + return; + r = *res; + *res = NULL; + + r->in_use = 0; + + if (r->next && !r->next->in_use) { + struct nouveau_heap *new = r->next; + + new->prev = r->prev; + if (r->prev) + r->prev->next = new; + new->size += r->size; + new->start = r->start; + + free(r); + r = new; + } + + if (r->prev && !r->prev->in_use) { + r->prev->next = r->next; + if (r->next) + r->next->prev = r->prev; + r->prev->size += r->size; + free(r); + } } diff --git a/src/gallium/drivers/nouveau/nouveau_heap.h b/src/gallium/drivers/nouveau/nouveau_heap.h index a3d64a65623..99f610ed4c8 100644 --- a/src/gallium/drivers/nouveau/nouveau_heap.h +++ b/src/gallium/drivers/nouveau/nouveau_heap.h @@ -44,15 +44,15 @@ * full size of the heap. */ struct nouveau_heap { - struct nouveau_heap *prev; - struct nouveau_heap *next; + struct nouveau_heap *prev; + struct nouveau_heap *next; - void *priv; + void *priv; - unsigned start; - unsigned size; + unsigned start; + unsigned size; - int in_use; + int in_use; }; int diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index b2290e7e784..47603b0b7fd 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -30,211 +30,211 @@ int nouveau_mesa_debug = 0; static const char * nouveau_screen_get_name(struct pipe_screen *pscreen) { - struct nouveau_device *dev = nouveau_screen(pscreen)->device; - static char buffer[128]; + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + static char buffer[128]; - util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); - return buffer; + util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; } static const char * nouveau_screen_get_vendor(struct pipe_screen *pscreen) { - return "nouveau"; + return "nouveau"; } static const char * nouveau_screen_get_device_vendor(struct pipe_screen *pscreen) { - return "NVIDIA"; + return "NVIDIA"; } static uint64_t nouveau_screen_get_timestamp(struct pipe_screen *pscreen) { - int64_t cpu_time = os_time_get() * 1000; + int64_t cpu_time = os_time_get() * 1000; - /* getparam of PTIMER_TIME takes about x10 as long (several usecs) */ + /* getparam of PTIMER_TIME takes about x10 as long (several usecs) */ - return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta; + return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta; } static void nouveau_screen_fence_ref(struct pipe_screen *pscreen, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *pfence) + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *pfence) { - nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr); + nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr); } static boolean nouveau_screen_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *pfence, + struct pipe_fence_handle *pfence, uint64_t timeout) { - if (!timeout) - return nouveau_fence_signalled(nouveau_fence(pfence)); + if (!timeout) + return nouveau_fence_signalled(nouveau_fence(pfence)); - return nouveau_fence_wait(nouveau_fence(pfence)); + return nouveau_fence_wait(nouveau_fence(pfence)); } struct nouveau_bo * nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, - struct winsys_handle *whandle, - unsigned *out_stride) + struct winsys_handle *whandle, + unsigned *out_stride) { - struct nouveau_device *dev = nouveau_screen(pscreen)->device; - struct nouveau_bo *bo = 0; - int ret; - - if (whandle->type != DRM_API_HANDLE_TYPE_SHARED && - whandle->type != DRM_API_HANDLE_TYPE_FD) { - debug_printf("%s: attempt to import unsupported handle type %d\n", - __FUNCTION__, whandle->type); - return NULL; - } - - if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) - ret = nouveau_bo_name_ref(dev, whandle->handle, &bo); - else - ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo); - - if (ret) { - debug_printf("%s: ref name 0x%08x failed with %d\n", - __FUNCTION__, whandle->handle, ret); - return NULL; - } - - *out_stride = whandle->stride; - return bo; + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nouveau_bo *bo = 0; + int ret; + + if (whandle->type != DRM_API_HANDLE_TYPE_SHARED && + whandle->type != DRM_API_HANDLE_TYPE_FD) { + debug_printf("%s: attempt to import unsupported handle type %d\n", + __FUNCTION__, whandle->type); + return NULL; + } + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) + ret = nouveau_bo_name_ref(dev, whandle->handle, &bo); + else + ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo); + + if (ret) { + debug_printf("%s: ref name 0x%08x failed with %d\n", + __FUNCTION__, whandle->handle, ret); + return NULL; + } + + *out_stride = whandle->stride; + return bo; } bool nouveau_screen_bo_get_handle(struct pipe_screen *pscreen, - struct nouveau_bo *bo, - unsigned stride, - struct winsys_handle *whandle) + struct nouveau_bo *bo, + unsigned stride, + struct winsys_handle *whandle) { - whandle->stride = stride; - - if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { - return nouveau_bo_name_get(bo, &whandle->handle) == 0; - } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { - whandle->handle = bo->handle; - return true; - } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) { - return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0; - } else { - return false; - } + whandle->stride = stride; + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + return nouveau_bo_name_get(bo, &whandle->handle) == 0; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = bo->handle; + return true; + } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) { + return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0; + } else { + return false; + } } int nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) { - struct pipe_screen *pscreen = &screen->base; - struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 }; - struct nvc0_fifo nvc0_data = { }; - uint64_t time; - int size, ret; - void *data; - union nouveau_bo_config mm_config; - - char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG"); - if (nv_dbg) - nouveau_mesa_debug = atoi(nv_dbg); - - /* - * this is initialized to 1 in nouveau_drm_screen_create after screen - * is fully constructed and added to the global screen list. - */ - screen->refcount = -1; - - if (dev->chipset < 0xc0) { - data = &nv04_data; - size = sizeof(nv04_data); - } else { - data = &nvc0_data; - size = sizeof(nvc0_data); - } - - /* - * Set default VRAM domain if not overridden - */ - if (!screen->vram_domain) { - if (dev->vram_size > 0) - screen->vram_domain = NOUVEAU_BO_VRAM; - else - screen->vram_domain = NOUVEAU_BO_GART; - } - - ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS, - data, size, &screen->channel); - if (ret) - return ret; - screen->device = dev; - - ret = nouveau_client_new(screen->device, &screen->client); - if (ret) - return ret; - ret = nouveau_pushbuf_new(screen->client, screen->channel, - 4, 512 * 1024, 1, - &screen->pushbuf); - if (ret) - return ret; - - /* getting CPU time first appears to be more accurate */ - screen->cpu_gpu_time_delta = os_time_get(); - - ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time); - if (!ret) - screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000; - - pscreen->get_name = nouveau_screen_get_name; - pscreen->get_vendor = nouveau_screen_get_vendor; - pscreen->get_device_vendor = nouveau_screen_get_device_vendor; - - pscreen->get_timestamp = nouveau_screen_get_timestamp; - - pscreen->fence_reference = nouveau_screen_fence_ref; - pscreen->fence_finish = nouveau_screen_fence_finish; - - util_format_s3tc_init(); - - screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */ - screen->vidmem_bindings = - PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL | - PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | - PIPE_BIND_CURSOR | - PIPE_BIND_SAMPLER_VIEW | - PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | - PIPE_BIND_COMPUTE_RESOURCE | - PIPE_BIND_GLOBAL; - screen->sysmem_bindings = - PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT | - PIPE_BIND_COMMAND_ARGS_BUFFER; - - memset(&mm_config, 0, sizeof(mm_config)); - - screen->mm_GART = nouveau_mm_create(dev, - NOUVEAU_BO_GART | NOUVEAU_BO_MAP, - &mm_config); - screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config); - return 0; + struct pipe_screen *pscreen = &screen->base; + struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 }; + struct nvc0_fifo nvc0_data = { }; + uint64_t time; + int size, ret; + void *data; + union nouveau_bo_config mm_config; + + char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG"); + if (nv_dbg) + nouveau_mesa_debug = atoi(nv_dbg); + + /* + * this is initialized to 1 in nouveau_drm_screen_create after screen + * is fully constructed and added to the global screen list. + */ + screen->refcount = -1; + + if (dev->chipset < 0xc0) { + data = &nv04_data; + size = sizeof(nv04_data); + } else { + data = &nvc0_data; + size = sizeof(nvc0_data); + } + + /* + * Set default VRAM domain if not overridden + */ + if (!screen->vram_domain) { + if (dev->vram_size > 0) + screen->vram_domain = NOUVEAU_BO_VRAM; + else + screen->vram_domain = NOUVEAU_BO_GART; + } + + ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS, + data, size, &screen->channel); + if (ret) + return ret; + screen->device = dev; + + ret = nouveau_client_new(screen->device, &screen->client); + if (ret) + return ret; + ret = nouveau_pushbuf_new(screen->client, screen->channel, + 4, 512 * 1024, 1, + &screen->pushbuf); + if (ret) + return ret; + + /* getting CPU time first appears to be more accurate */ + screen->cpu_gpu_time_delta = os_time_get(); + + ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time); + if (!ret) + screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000; + + pscreen->get_name = nouveau_screen_get_name; + pscreen->get_vendor = nouveau_screen_get_vendor; + pscreen->get_device_vendor = nouveau_screen_get_device_vendor; + + pscreen->get_timestamp = nouveau_screen_get_timestamp; + + pscreen->fence_reference = nouveau_screen_fence_ref; + pscreen->fence_finish = nouveau_screen_fence_finish; + + util_format_s3tc_init(); + + screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */ + screen->vidmem_bindings = + PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | + PIPE_BIND_CURSOR | + PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | + PIPE_BIND_COMPUTE_RESOURCE | + PIPE_BIND_GLOBAL; + screen->sysmem_bindings = + PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT | + PIPE_BIND_COMMAND_ARGS_BUFFER; + + memset(&mm_config, 0, sizeof(mm_config)); + + screen->mm_GART = nouveau_mm_create(dev, + NOUVEAU_BO_GART | NOUVEAU_BO_MAP, + &mm_config); + screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config); + return 0; } void nouveau_screen_fini(struct nouveau_screen *screen) { - nouveau_mm_destroy(screen->mm_GART); - nouveau_mm_destroy(screen->mm_VRAM); + nouveau_mm_destroy(screen->mm_GART); + nouveau_mm_destroy(screen->mm_VRAM); - nouveau_pushbuf_del(&screen->pushbuf); + nouveau_pushbuf_del(&screen->pushbuf); - nouveau_client_del(&screen->client); - nouveau_object_del(&screen->channel); + nouveau_client_del(&screen->client); + nouveau_object_del(&screen->channel); - nouveau_device_del(&screen->device); + nouveau_device_del(&screen->device); } diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index 4fdde9fbf3d..328646fe3ce 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -16,47 +16,47 @@ extern int nouveau_mesa_debug; struct nouveau_bo; struct nouveau_screen { - struct pipe_screen base; - struct nouveau_device *device; - struct nouveau_object *channel; - struct nouveau_client *client; - struct nouveau_pushbuf *pushbuf; + struct pipe_screen base; + struct nouveau_device *device; + struct nouveau_object *channel; + struct nouveau_client *client; + struct nouveau_pushbuf *pushbuf; - int refcount; + int refcount; - unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */ - unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */ - unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */ - /* - * For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides - * placement. - */ + unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */ + unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */ + unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */ + /* + * For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides + * placement. + */ - uint16_t class_3d; + uint16_t class_3d; - struct { - struct nouveau_fence *head; - struct nouveau_fence *tail; - struct nouveau_fence *current; - u32 sequence; - u32 sequence_ack; - void (*emit)(struct pipe_screen *, u32 *sequence); - u32 (*update)(struct pipe_screen *); - } fence; + struct { + struct nouveau_fence *head; + struct nouveau_fence *tail; + struct nouveau_fence *current; + u32 sequence; + u32 sequence_ack; + void (*emit)(struct pipe_screen *, u32 *sequence); + u32 (*update)(struct pipe_screen *); + } fence; - struct nouveau_mman *mm_VRAM; - struct nouveau_mman *mm_GART; + struct nouveau_mman *mm_VRAM; + struct nouveau_mman *mm_GART; - int64_t cpu_gpu_time_delta; + int64_t cpu_gpu_time_delta; - bool hint_buf_keep_sysmem_copy; + bool hint_buf_keep_sysmem_copy; - unsigned vram_domain; + unsigned vram_domain; - struct { - unsigned profiles_checked; - unsigned profiles_present; - } firmware_info; + struct { + unsigned profiles_checked; + unsigned profiles_present; + } firmware_info; #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS union { @@ -100,10 +100,10 @@ struct nouveau_screen { #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS # define NOUVEAU_DRV_STAT(s, n, v) do { \ - (s)->stats.named.n += (v); \ + (s)->stats.named.n += (v); \ } while(0) -# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \ - nouveau_screen((r)->base.screen)->stats.named.n += (v); \ +# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \ + nouveau_screen((r)->base.screen)->stats.named.n += (v); \ } while(0) # define NOUVEAU_DRV_STAT_IFD(x) x #else @@ -115,20 +115,20 @@ struct nouveau_screen { static inline struct nouveau_screen * nouveau_screen(struct pipe_screen *pscreen) { - return (struct nouveau_screen *)pscreen; + return (struct nouveau_screen *)pscreen; } bool nouveau_drm_screen_unref(struct nouveau_screen *screen); bool nouveau_screen_bo_get_handle(struct pipe_screen *pscreen, - struct nouveau_bo *bo, - unsigned stride, - struct winsys_handle *whandle); + struct nouveau_bo *bo, + unsigned stride, + struct winsys_handle *whandle); struct nouveau_bo * nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, - struct winsys_handle *whandle, - unsigned *out_stride); + struct winsys_handle *whandle, + unsigned *out_stride); int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *); diff --git a/src/gallium/drivers/nouveau/nouveau_statebuf.h b/src/gallium/drivers/nouveau/nouveau_statebuf.h index f38014091ba..da5d7972d9c 100644 --- a/src/gallium/drivers/nouveau/nouveau_statebuf.h +++ b/src/gallium/drivers/nouveau/nouveau_statebuf.h @@ -6,9 +6,9 @@ struct nouveau_statebuf_builder { - uint32_t* p; + uint32_t* p; #ifdef DEBUG - uint32_t* pend; + uint32_t* pend; #endif }; @@ -22,7 +22,7 @@ struct nouveau_statebuf_builder static inline uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size) { - return (size << 18) | (subc << 13) | mthd; + return (size << 18) | (subc << 13) | mthd; } #define sb_method(sb, v, n) sb_data(sb, sb_header(SUBC_3D(v), n)); diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c index e414a534418..8bb12b22ac1 100644 --- a/src/gallium/drivers/nouveau/nouveau_video.c +++ b/src/gallium/drivers/nouveau/nouveau_video.c @@ -831,7 +831,7 @@ error: static int nouveau_screen_get_video_param(struct pipe_screen *pscreen, enum pipe_video_profile profile, - enum pipe_video_entrypoint entrypoint, + enum pipe_video_entrypoint entrypoint, enum pipe_video_cap param) { switch (param) { diff --git a/src/gallium/drivers/nouveau/nouveau_video.h b/src/gallium/drivers/nouveau/nouveau_video.h index fd1bd527deb..3ef6f89ce28 100644 --- a/src/gallium/drivers/nouveau/nouveau_video.h +++ b/src/gallium/drivers/nouveau/nouveau_video.h @@ -83,7 +83,7 @@ BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) static inline void PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd, struct nouveau_bo *bo, uint32_t offset, - struct nouveau_bufctx *ctx, int bin, uint32_t rw) + struct nouveau_bufctx *ctx, int bin, uint32_t rw) { nouveau_bufctx_mthd(ctx, bin, NV04_FIFO_PKHDR(subc, mthd, 1), bo, offset, diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video.h b/src/gallium/drivers/nouveau/nouveau_vp3_video.h index 33e3bef3df3..58df5ee847f 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video.h +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.h @@ -117,22 +117,22 @@ struct nouveau_vp3_decoder { }; struct comm { - uint32_t bsp_cur_index; // 000 - uint32_t byte_ofs; // 004 - uint32_t status[0x10]; // 008 - uint32_t pos[0x10]; // 048 - uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted - - uint32_t pvp_cur_index; // 100 - uint32_t acked_byte_ofs; // 104 - uint32_t status_vp[0x10]; // 108 - uint16_t mb_y[0x10]; //148 - uint32_t pvp_stage; // 168 0xeeXX - uint16_t parse_endpos_index; // 16c - uint16_t irq_index; // 16e - uint8_t irq_470[0x10]; // 170 - uint32_t irq_pos[0x10]; // 180 - uint32_t parse_endpos[0x10]; // 1c0 + uint32_t bsp_cur_index; // 000 + uint32_t byte_ofs; // 004 + uint32_t status[0x10]; // 008 + uint32_t pos[0x10]; // 048 + uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted + + uint32_t pvp_cur_index; // 100 + uint32_t acked_byte_ofs; // 104 + uint32_t status_vp[0x10]; // 108 + uint16_t mb_y[0x10]; //148 + uint32_t pvp_stage; // 168 0xeeXX + uint16_t parse_endpos_index; // 16c + uint16_t irq_index; // 16e + uint8_t irq_470[0x10]; // 170 + uint32_t irq_pos[0x10]; // 180 + uint32_t parse_endpos[0x10]; // 1c0 }; static inline uint32_t nouveau_vp3_video_align(uint32_t h) diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c index 6d968c18399..692772e49d1 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c @@ -23,90 +23,90 @@ #include "nouveau_vp3_video.h" struct strparm_bsp { - uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi - uint32_t w1[4]; // bit 8-24 addr_lo - uint32_t unk20; // should be idx * 0x8000000, bitstream offset - uint32_t do_crypto_crap; // set to 0 + uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi + uint32_t w1[4]; // bit 8-24 addr_lo + uint32_t unk20; // should be idx * 0x8000000, bitstream offset + uint32_t do_crypto_crap; // set to 0 }; struct mpeg12_picparm_bsp { - uint16_t width; - uint16_t height; - uint8_t picture_structure; - uint8_t picture_coding_type; - uint8_t intra_dc_precision; - uint8_t frame_pred_frame_dct; - uint8_t concealment_motion_vectors; - uint8_t intra_vlc_format; - uint16_t pad; - uint8_t f_code[2][2]; + uint16_t width; + uint16_t height; + uint8_t picture_structure; + uint8_t picture_coding_type; + uint8_t intra_dc_precision; + uint8_t frame_pred_frame_dct; + uint8_t concealment_motion_vectors; + uint8_t intra_vlc_format; + uint16_t pad; + uint8_t f_code[2][2]; }; struct mpeg4_picparm_bsp { - uint16_t width; - uint16_t height; - uint8_t vop_time_increment_size; - uint8_t interlaced; - uint8_t resync_marker_disable; + uint16_t width; + uint16_t height; + uint8_t vop_time_increment_size; + uint8_t interlaced; + uint8_t resync_marker_disable; }; struct vc1_picparm_bsp { - uint16_t width; - uint16_t height; - uint8_t profile; // 04 0 simple, 1 main, 2 advanced - uint8_t postprocflag; // 05 - uint8_t pulldown; // 06 - uint8_t interlaced; // 07 - uint8_t tfcntrflag; // 08 - uint8_t finterpflag; // 09 - uint8_t psf; // 0a - uint8_t pad; // 0b - uint8_t multires; // 0c - uint8_t syncmarker; // 0d - uint8_t rangered; // 0e - uint8_t maxbframes; // 0f - uint8_t dquant; // 10 - uint8_t panscan_flag; // 11 - uint8_t refdist_flag; // 12 - uint8_t quantizer; // 13 - uint8_t extended_mv; // 14 - uint8_t extended_dmv; // 15 - uint8_t overlap; // 16 - uint8_t vstransform; // 17 + uint16_t width; + uint16_t height; + uint8_t profile; // 04 0 simple, 1 main, 2 advanced + uint8_t postprocflag; // 05 + uint8_t pulldown; // 06 + uint8_t interlaced; // 07 + uint8_t tfcntrflag; // 08 + uint8_t finterpflag; // 09 + uint8_t psf; // 0a + uint8_t pad; // 0b + uint8_t multires; // 0c + uint8_t syncmarker; // 0d + uint8_t rangered; // 0e + uint8_t maxbframes; // 0f + uint8_t dquant; // 10 + uint8_t panscan_flag; // 11 + uint8_t refdist_flag; // 12 + uint8_t quantizer; // 13 + uint8_t extended_mv; // 14 + uint8_t extended_dmv; // 15 + uint8_t overlap; // 16 + uint8_t vstransform; // 17 }; struct h264_picparm_bsp { - // 00 - uint32_t unk00; - // 04 - uint32_t log2_max_frame_num_minus4; // 04 checked - uint32_t pic_order_cnt_type; // 08 checked - uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked - uint32_t delta_pic_order_always_zero_flag; // 10, or unknown + // 00 + uint32_t unk00; + // 04 + uint32_t log2_max_frame_num_minus4; // 04 checked + uint32_t pic_order_cnt_type; // 08 checked + uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked + uint32_t delta_pic_order_always_zero_flag; // 10, or unknown - uint32_t frame_mbs_only_flag; // 14, always 1? - uint32_t direct_8x8_inference_flag; // 18, always 1? - uint32_t width_mb; // 1c checked - uint32_t height_mb; // 20 checked - // 24 - //struct picparm2 - uint32_t entropy_coding_mode_flag; // 00, checked - uint32_t pic_order_present_flag; // 04 checked - uint32_t unk; // 08 seems to be 0? - uint32_t pad1; // 0c seems to be 0? - uint32_t pad2; // 10 always 0 ? - uint32_t num_ref_idx_l0_active_minus1; // 14 always 0? - uint32_t num_ref_idx_l1_active_minus1; // 18 always 0? - uint32_t weighted_pred_flag; // 1c checked - uint32_t weighted_bipred_idc; // 20 checked - uint32_t pic_init_qp_minus26; // 24 checked - uint32_t deblocking_filter_control_present_flag; // 28 always 1? - uint32_t redundant_pic_cnt_present_flag; // 2c always 0? - uint32_t transform_8x8_mode_flag; // 30 checked - uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish - uint8_t field_pic_flag; // 38 checked - uint8_t bottom_field_flag; // 39 checked - uint8_t real_pad[0x1b]; // XX why? + uint32_t frame_mbs_only_flag; // 14, always 1? + uint32_t direct_8x8_inference_flag; // 18, always 1? + uint32_t width_mb; // 1c checked + uint32_t height_mb; // 20 checked + // 24 + //struct picparm2 + uint32_t entropy_coding_mode_flag; // 00, checked + uint32_t pic_order_present_flag; // 04 checked + uint32_t unk; // 08 seems to be 0? + uint32_t pad1; // 0c seems to be 0? + uint32_t pad2; // 10 always 0 ? + uint32_t num_ref_idx_l0_active_minus1; // 14 always 0? + uint32_t num_ref_idx_l1_active_minus1; // 18 always 0? + uint32_t weighted_pred_flag; // 1c checked + uint32_t weighted_bipred_idc; // 20 checked + uint32_t pic_init_qp_minus26; // 24 checked + uint32_t deblocking_filter_control_present_flag; // 28 always 1? + uint32_t redundant_pic_cnt_present_flag; // 2c always 0? + uint32_t transform_8x8_mode_flag; // 30 checked + uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish + uint8_t field_pic_flag; // 38 checked + uint8_t bottom_field_flag; // 39 checked + uint8_t real_pad[0x1b]; // XX why? }; static uint32_t diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c index 25283b79952..53f5db0003d 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c @@ -23,147 +23,147 @@ #include "nouveau_vp3_video.h" struct mpeg12_picparm_vp { - uint16_t width; // 00 in mb units - uint16_t height; // 02 in mb units - - uint32_t unk04; // 04 stride for Y? - uint32_t unk08; // 08 stride for CbCr? - - uint32_t ofs[6]; // 1c..20 ofs - uint32_t bucket_size; // 24 - uint32_t inter_ring_data_size; // 28 - uint16_t unk2c; // 2c - uint16_t alternate_scan; // 2e - uint16_t unk30; // 30 not seen set yet - uint16_t picture_structure; // 32 - uint16_t pad2[3]; - uint16_t unk3a; // 3a set on I frame? - - uint32_t f_code[4]; // 3c - uint32_t picture_coding_type; // 4c - uint32_t intra_dc_precision; // 50 - uint32_t q_scale_type; // 54 - uint32_t top_field_first; // 58 - uint32_t full_pel_forward_vector; // 5c - uint32_t full_pel_backward_vector; // 60 - uint8_t intra_quantizer_matrix[0x40]; // 64 - uint8_t non_intra_quantizer_matrix[0x40]; // a4 + uint16_t width; // 00 in mb units + uint16_t height; // 02 in mb units + + uint32_t unk04; // 04 stride for Y? + uint32_t unk08; // 08 stride for CbCr? + + uint32_t ofs[6]; // 1c..20 ofs + uint32_t bucket_size; // 24 + uint32_t inter_ring_data_size; // 28 + uint16_t unk2c; // 2c + uint16_t alternate_scan; // 2e + uint16_t unk30; // 30 not seen set yet + uint16_t picture_structure; // 32 + uint16_t pad2[3]; + uint16_t unk3a; // 3a set on I frame? + + uint32_t f_code[4]; // 3c + uint32_t picture_coding_type; // 4c + uint32_t intra_dc_precision; // 50 + uint32_t q_scale_type; // 54 + uint32_t top_field_first; // 58 + uint32_t full_pel_forward_vector; // 5c + uint32_t full_pel_backward_vector; // 60 + uint8_t intra_quantizer_matrix[0x40]; // 64 + uint8_t non_intra_quantizer_matrix[0x40]; // a4 }; struct mpeg4_picparm_vp { - uint32_t width; // 00 in normal units - uint32_t height; // 04 in normal units - uint32_t unk08; // stride 1 - uint32_t unk0c; // stride 2 - uint32_t ofs[6]; // 10..24 ofs - uint32_t bucket_size; // 28 - uint32_t pad1; // 2c, pad - uint32_t pad2; // 30 - uint32_t inter_ring_data_size; // 34 - - uint32_t trd[2]; // 38, 3c - uint32_t trb[2]; // 40, 44 - uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile - uint16_t f_code_fw; // 4c - uint16_t f_code_bw; // 4e - uint8_t interlaced; // 50 - - uint8_t quant_type; // bool, written to 528 - uint8_t quarter_sample; // bool, written to 548 - uint8_t short_video_header; // bool, negated written to 528 shifted by 1 - uint8_t u54; // bool, written to 0x740 - uint8_t vop_coding_type; // 55 - uint8_t rounding_control; // 56 - uint8_t alternate_vertical_scan_flag; // 57 bool - uint8_t top_field_first; // bool, written to vuc - - uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob - - uint32_t intra[0x10]; // 5c - uint32_t non_intra[0x10]; // 9c - uint32_t pad5[0x10]; // bc what does this do? - // udc..uff pad? + uint32_t width; // 00 in normal units + uint32_t height; // 04 in normal units + uint32_t unk08; // stride 1 + uint32_t unk0c; // stride 2 + uint32_t ofs[6]; // 10..24 ofs + uint32_t bucket_size; // 28 + uint32_t pad1; // 2c, pad + uint32_t pad2; // 30 + uint32_t inter_ring_data_size; // 34 + + uint32_t trd[2]; // 38, 3c + uint32_t trb[2]; // 40, 44 + uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile + uint16_t f_code_fw; // 4c + uint16_t f_code_bw; // 4e + uint8_t interlaced; // 50 + + uint8_t quant_type; // bool, written to 528 + uint8_t quarter_sample; // bool, written to 548 + uint8_t short_video_header; // bool, negated written to 528 shifted by 1 + uint8_t u54; // bool, written to 0x740 + uint8_t vop_coding_type; // 55 + uint8_t rounding_control; // 56 + uint8_t alternate_vertical_scan_flag; // 57 bool + uint8_t top_field_first; // bool, written to vuc + + uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob + + uint32_t intra[0x10]; // 5c + uint32_t non_intra[0x10]; // 9c + uint32_t pad5[0x10]; // bc what does this do? + // udc..uff pad? }; // Full version, with data pumped from BSP struct vc1_picparm_vp { - uint32_t bucket_size; // 00 - uint32_t pad; // 04 - - uint32_t inter_ring_data_size; // 08 - uint32_t unk0c; // stride 1 - uint32_t unk10; // stride 2 - uint32_t ofs[6]; // 14..28 ofs - - uint16_t width; // 2c - uint16_t height; // 2e - - uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced - uint8_t loopfilter; // 31 written into vuc - uint8_t fastuvmc; // 32, written into vuc - uint8_t dquant; // 33 - - uint8_t overlap; // 34 - uint8_t quantizer; // 35 - uint8_t u36; // 36, bool - uint8_t pad2; // 37, to align to 0x38 + uint32_t bucket_size; // 00 + uint32_t pad; // 04 + + uint32_t inter_ring_data_size; // 08 + uint32_t unk0c; // stride 1 + uint32_t unk10; // stride 2 + uint32_t ofs[6]; // 14..28 ofs + + uint16_t width; // 2c + uint16_t height; // 2e + + uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced + uint8_t loopfilter; // 31 written into vuc + uint8_t fastuvmc; // 32, written into vuc + uint8_t dquant; // 33 + + uint8_t overlap; // 34 + uint8_t quantizer; // 35 + uint8_t u36; // 36, bool + uint8_t pad2; // 37, to align to 0x38 }; struct h264_picparm_vp { // 700..a00 - uint16_t width, height; - uint32_t stride1, stride2; // 04 08 - uint32_t ofs[6]; // 0c..24 in-image offset - - uint32_t tmp_stride; - uint32_t bucket_size; // 28 bucket size - uint32_t inter_ring_data_size; // 2c - - unsigned mb_adaptive_frame_field_flag : 1; // 0 - unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56 - unsigned weighted_pred_flag : 1; // 2 0x04 - unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68 - unsigned is_reference : 1; // 4 - unsigned interlace : 1; // 5 field_pic_flag - unsigned bottom_field_flag : 1; // 6 - unsigned second_field : 1; // 7 0x80: nfi yet - - signed log2_max_frame_num_minus4 : 4; // 31 0..3 - unsigned chroma_format_idc : 2; // 31 4..5 - unsigned pic_order_cnt_type : 2; // 31 6..7 - signed pic_init_qp_minus26 : 6; // 32 0..5 - signed chroma_qp_index_offset : 5; // 32 6..10 - signed second_chroma_qp_index_offset : 5; // 32 11..15 - - unsigned weighted_bipred_idc : 2; // 34 0..1 - unsigned fifo_dec_index : 7; // 34 2..8 - unsigned tmp_idx : 5; // 34 9..13 - unsigned frame_number : 16; // 34 14..29 - unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30] - unsigned u34_3131 : 1; // 34 31..31 pad? - - uint32_t field_order_cnt[2]; // 38, 3c - - struct { // 40 - unsigned fifo_idx : 7; // 00 0..6 - unsigned tmp_idx : 5; // 00 7..11 - unsigned top_is_reference : 1; // 00 12 - unsigned bottom_is_reference : 1; // 00 13 - unsigned is_long_term : 1; // 00 14 - unsigned notseenyet : 1; // 00 15 pad? - unsigned field_pic_flag : 1; // 00 16 - unsigned top_field_marking : 4; // 00 17..20 - unsigned bottom_field_marking : 4; // 00 21..24 - unsigned pad : 7; // 00 d25..31 - - uint32_t field_order_cnt[2]; // 04,08 - uint32_t frame_idx; // 0c - } refs[0x10]; - - uint8_t m4x4[6][16]; // 140 - uint8_t m8x8[2][64]; // 1a0 - uint32_t u220; // 220 number of extra reorder_list to append? - uint8_t u224[0x20]; // 224..244 reorder_list append ? - uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read + uint16_t width, height; + uint32_t stride1, stride2; // 04 08 + uint32_t ofs[6]; // 0c..24 in-image offset + + uint32_t tmp_stride; + uint32_t bucket_size; // 28 bucket size + uint32_t inter_ring_data_size; // 2c + + unsigned mb_adaptive_frame_field_flag : 1; // 0 + unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56 + unsigned weighted_pred_flag : 1; // 2 0x04 + unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68 + unsigned is_reference : 1; // 4 + unsigned interlace : 1; // 5 field_pic_flag + unsigned bottom_field_flag : 1; // 6 + unsigned second_field : 1; // 7 0x80: nfi yet + + signed log2_max_frame_num_minus4 : 4; // 31 0..3 + unsigned chroma_format_idc : 2; // 31 4..5 + unsigned pic_order_cnt_type : 2; // 31 6..7 + signed pic_init_qp_minus26 : 6; // 32 0..5 + signed chroma_qp_index_offset : 5; // 32 6..10 + signed second_chroma_qp_index_offset : 5; // 32 11..15 + + unsigned weighted_bipred_idc : 2; // 34 0..1 + unsigned fifo_dec_index : 7; // 34 2..8 + unsigned tmp_idx : 5; // 34 9..13 + unsigned frame_number : 16; // 34 14..29 + unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30] + unsigned u34_3131 : 1; // 34 31..31 pad? + + uint32_t field_order_cnt[2]; // 38, 3c + + struct { // 40 + unsigned fifo_idx : 7; // 00 0..6 + unsigned tmp_idx : 5; // 00 7..11 + unsigned top_is_reference : 1; // 00 12 + unsigned bottom_is_reference : 1; // 00 13 + unsigned is_long_term : 1; // 00 14 + unsigned notseenyet : 1; // 00 15 pad? + unsigned field_pic_flag : 1; // 00 16 + unsigned top_field_marking : 4; // 00 17..20 + unsigned bottom_field_marking : 4; // 00 21..24 + unsigned pad : 7; // 00 d25..31 + + uint32_t field_order_cnt[2]; // 04,08 + uint32_t frame_idx; // 0c + } refs[0x10]; + + uint8_t m4x4[6][16]; // 140 + uint8_t m8x8[2][64]; // 1a0 + uint32_t u220; // 220 number of extra reorder_list to append? + uint8_t u224[0x20]; // 224..244 reorder_list append ? + uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read }; static void diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index a44fd3efcf7..1319c3290cf 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -65,18 +65,18 @@ PUSH_KICK(struct nouveau_pushbuf *push) static inline uint32_t nouveau_screen_transfer_flags(unsigned pipe) { - uint32_t flags = 0; - - if (!(pipe & PIPE_TRANSFER_UNSYNCHRONIZED)) { - if (pipe & PIPE_TRANSFER_READ) - flags |= NOUVEAU_BO_RD; - if (pipe & PIPE_TRANSFER_WRITE) - flags |= NOUVEAU_BO_WR; - if (pipe & PIPE_TRANSFER_DONTBLOCK) - flags |= NOUVEAU_BO_NOBLOCK; - } - - return flags; + uint32_t flags = 0; + + if (!(pipe & PIPE_TRANSFER_UNSYNCHRONIZED)) { + if (pipe & PIPE_TRANSFER_READ) + flags |= NOUVEAU_BO_RD; + if (pipe & PIPE_TRANSFER_WRITE) + flags |= NOUVEAU_BO_WR; + if (pipe & PIPE_TRANSFER_DONTBLOCK) + flags |= NOUVEAU_BO_NOBLOCK; + } + + return flags; } extern struct pipe_screen * diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 03301649e38..bdecb0a32b3 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -172,6 +172,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 69c121274a9..fb74a9748a3 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -16,6 +16,7 @@ #include "nv50/nv50_program.h" #include "nv50/nv50_resource.h" #include "nv50/nv50_transfer.h" +#include "nv50/nv50_query.h" #include "nouveau_context.h" #include "nouveau_debug.h" @@ -195,17 +196,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *); /* nv50_draw.c */ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); -/* nv50_query.c */ -void nv50_init_query_functions(struct nv50_context *); -void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method, - struct pipe_query *, unsigned result_offset); -void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); -void nva0_so_target_save_offset(struct pipe_context *, - struct pipe_stream_output_target *, - unsigned index, bool seralize); - -#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) - /* nv50_shader_state.c */ void nv50_vertprog_validate(struct nv50_context *); void nv50_gmtyprog_validate(struct nv50_context *); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index eff4477472c..299629b6438 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -336,7 +336,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset) info->io.ucpCBSlot = 15; info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; - info->io.sampleInterp = prog->fp.sample_interp; info->io.resInfoCBSlot = 15; info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; @@ -374,6 +373,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset) prog->code = info->bin.code; prog->code_size = info->bin.codeSize; prog->fixups = info->bin.relocData; + prog->interps = info->bin.interpData; prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; @@ -420,8 +420,8 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) switch (prog->type) { case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; - case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break; - case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break; + case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break; + case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break; default: assert(!"invalid program type"); return false; @@ -456,6 +456,10 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) if (prog->fixups) nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); + if (prog->interps) + nv50_ir_change_interp(prog->interps, prog->code, + prog->fp.force_persample_interp, + false /* flatshade */); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index f4e8e9402ca..24cc96567d7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -86,7 +86,7 @@ struct nv50_program { uint32_t interp; /* 0x1988 */ uint32_t colors; /* 0x1904 */ uint8_t has_samplemask; - uint8_t sample_interp; + uint8_t force_persample_interp; } fp; struct { @@ -99,6 +99,7 @@ struct nv50_program { } gp; void *fixups; /* relocation records */ + void *interps; /* interpolation records */ struct nouveau_heap *mem; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 5368ee73750..dd9b85b7208 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -25,356 +25,46 @@ #define NV50_PUSH_EXPLICIT_SPACE_CHECKING #include "nv50/nv50_context.h" -#include "nv_object.xml.h" - -#define NV50_QUERY_STATE_READY 0 -#define NV50_QUERY_STATE_ACTIVE 1 -#define NV50_QUERY_STATE_ENDED 2 -#define NV50_QUERY_STATE_FLUSHED 3 - -/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts - * (since we use only a single GPU channel per screen) will not work properly. - * - * The first is not that big of an issue because OpenGL does not allow nested - * queries anyway. - */ - -struct nv50_query { - uint32_t *data; - uint16_t type; - uint16_t index; - uint32_t sequence; - struct nouveau_bo *bo; - uint32_t base; - uint32_t offset; /* base + i * 32 */ - uint8_t state; - bool is64bit; - int nesting; /* only used for occlusion queries */ - struct nouveau_mm_allocation *mm; - struct nouveau_fence *fence; -}; - -#define NV50_QUERY_ALLOC_SPACE 256 - -static inline struct nv50_query * -nv50_query(struct pipe_query *pipe) -{ - return (struct nv50_query *)pipe; -} - -static bool -nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) -{ - struct nv50_screen *screen = nv50->screen; - int ret; - - if (q->bo) { - nouveau_bo_ref(NULL, &q->bo); - if (q->mm) { - if (q->state == NV50_QUERY_STATE_READY) - nouveau_mm_free(q->mm); - else - nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, - q->mm); - } - } - if (size) { - q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); - if (!q->bo) - return false; - q->offset = q->base; - - ret = nouveau_bo_map(q->bo, 0, screen->base.client); - if (ret) { - nv50_query_allocate(nv50, q, 0); - return false; - } - q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base); - } - return true; -} - -static void -nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); - nouveau_fence_ref(NULL, &nv50_query(pq)->fence); - FREE(nv50_query(pq)); -} +#include "nv50/nv50_query.h" +#include "nv50/nv50_query_hw.h" static struct pipe_query * -nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) +nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index) { struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q; - q = CALLOC_STRUCT(nv50_query); - if (!q) - return NULL; - - if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) { - FREE(q); - return NULL; - } - - q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || - type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS || - type == PIPE_QUERY_PIPELINE_STATISTICS); - q->type = type; - - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset -= 32; - q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ - } - + q = nv50_hw_create_query(nv50, type, index); return (struct pipe_query *)q; } static void -nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, - unsigned offset, uint32_t get) +nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq) { - offset += q->offset; - - PUSH_SPACE(push, 5); - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); - PUSH_DATAh(push, q->bo->offset + offset); - PUSH_DATA (push, q->bo->offset + offset); - PUSH_DATA (push, q->sequence); - PUSH_DATA (push, get); + struct nv50_query *q = nv50_query(pq); + q->funcs->destroy_query(nv50_context(pipe), q); } static boolean -nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +nv50_begin_query(struct pipe_context *pipe, struct pipe_query *pq) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); - - /* For occlusion queries we have to change the storage, because a previous - * query might set the initial render conition to false even *after* we re- - * initialized it to true. - */ - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset += 32; - q->data += 32 / sizeof(*q->data); - if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE) - nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE); - - /* XXX: can we do this with the GPU, and sync with respect to a previous - * query ? - */ - q->data[0] = q->sequence; /* initialize sequence */ - q->data[1] = 1; /* initial render condition = true */ - q->data[4] = q->sequence + 1; /* for comparison COND_MODE */ - q->data[5] = 0; - } - if (!q->is64bit) - q->data[0] = q->sequence++; /* the previously used one */ - - switch (q->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - q->nesting = nv50->screen->num_occlusion_queries_active++; - if (q->nesting) { - nv50_query_get(push, q, 0x10, 0x0100f002); - } else { - PUSH_SPACE(push, 4); - BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); - PUSH_DATA (push, 1); - } - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - nv50_query_get(push, q, 0x10, 0x06805002); - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - nv50_query_get(push, q, 0x10, 0x05805002); - break; - case PIPE_QUERY_SO_STATISTICS: - nv50_query_get(push, q, 0x20, 0x05805002); - nv50_query_get(push, q, 0x30, 0x06805002); - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - nv50_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ - nv50_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ - nv50_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ - nv50_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ - nv50_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ - nv50_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ - nv50_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ - nv50_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ - break; - case PIPE_QUERY_TIME_ELAPSED: - nv50_query_get(push, q, 0x10, 0x00005002); - break; - default: - break; - } - q->state = NV50_QUERY_STATE_ACTIVE; - return true; + return q->funcs->begin_query(nv50_context(pipe), q); } static void -nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) +nv50_end_query(struct pipe_context *pipe, struct pipe_query *pq) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); - - q->state = NV50_QUERY_STATE_ENDED; - - switch (q->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - nv50_query_get(push, q, 0, 0x0100f002); - if (--nv50->screen->num_occlusion_queries_active == 0) { - PUSH_SPACE(push, 2); - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); - PUSH_DATA (push, 0); - } - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - nv50_query_get(push, q, 0, 0x06805002); - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - nv50_query_get(push, q, 0, 0x05805002); - break; - case PIPE_QUERY_SO_STATISTICS: - nv50_query_get(push, q, 0x00, 0x05805002); - nv50_query_get(push, q, 0x10, 0x06805002); - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - nv50_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ - nv50_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ - nv50_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ - nv50_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ - nv50_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ - nv50_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ - nv50_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ - nv50_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ - break; - case PIPE_QUERY_TIMESTAMP: - q->sequence++; - /* fall through */ - case PIPE_QUERY_TIME_ELAPSED: - nv50_query_get(push, q, 0, 0x00005002); - break; - case PIPE_QUERY_GPU_FINISHED: - q->sequence++; - nv50_query_get(push, q, 0, 0x1000f010); - break; - case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: - q->sequence++; - nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); - break; - case PIPE_QUERY_TIMESTAMP_DISJOINT: - /* This query is not issued on GPU because disjoint is forced to false */ - q->state = NV50_QUERY_STATE_READY; - break; - default: - assert(0); - break; - } - - if (q->is64bit) - nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence); -} - -static inline void -nv50_query_update(struct nv50_query *q) -{ - if (q->is64bit) { - if (nouveau_fence_signalled(q->fence)) - q->state = NV50_QUERY_STATE_READY; - } else { - if (q->data[0] == q->sequence) - q->state = NV50_QUERY_STATE_READY; - } + q->funcs->end_query(nv50_context(pipe), q); } static boolean -nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, union pipe_query_result *result) +nv50_get_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, union pipe_query_result *result) { - struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q = nv50_query(pq); - uint64_t *res64 = (uint64_t *)result; - uint32_t *res32 = (uint32_t *)result; - uint8_t *res8 = (uint8_t *)result; - uint64_t *data64 = (uint64_t *)q->data; - int i; - - if (q->state != NV50_QUERY_STATE_READY) - nv50_query_update(q); - - if (q->state != NV50_QUERY_STATE_READY) { - if (!wait) { - /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ - if (q->state != NV50_QUERY_STATE_FLUSHED) { - q->state = NV50_QUERY_STATE_FLUSHED; - PUSH_KICK(nv50->base.pushbuf); - } - return false; - } - if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) - return false; - } - q->state = NV50_QUERY_STATE_READY; - - switch (q->type) { - case PIPE_QUERY_GPU_FINISHED: - res8[0] = true; - break; - case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ - res64[0] = q->data[1] - q->data[5]; - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ - case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ - res64[0] = data64[0] - data64[2]; - break; - case PIPE_QUERY_SO_STATISTICS: - res64[0] = data64[0] - data64[4]; - res64[1] = data64[2] - data64[6]; - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - for (i = 0; i < 8; ++i) - res64[i] = data64[i * 2] - data64[16 + i * 2]; - break; - case PIPE_QUERY_TIMESTAMP: - res64[0] = data64[1]; - break; - case PIPE_QUERY_TIMESTAMP_DISJOINT: - res64[0] = 1000000000; - res8[8] = false; - break; - case PIPE_QUERY_TIME_ELAPSED: - res64[0] = data64[1] - data64[3]; - break; - case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: - res32[0] = q->data[1]; - break; - default: - return false; - } - - return true; -} - -void -nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq) -{ - struct nv50_query *q = nv50_query(pq); - unsigned offset = q->offset; - - PUSH_SPACE(push, 5); - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); - PUSH_DATAh(push, q->bo->offset + offset); - PUSH_DATA (push, q->bo->offset + offset); - PUSH_DATA (push, q->sequence); - PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); + return q->funcs->get_query_result(nv50_context(pipe), q, wait, result); } static void @@ -384,7 +74,8 @@ nv50_render_condition(struct pipe_context *pipe, { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; - struct nv50_query *q; + struct nv50_query *q = nv50_query(pq); + struct nv50_hw_query *hq = nv50_hw_query(q); uint32_t cond; bool wait = mode != PIPE_RENDER_COND_NO_WAIT && @@ -394,7 +85,6 @@ nv50_render_condition(struct pipe_context *pipe, cond = NV50_3D_COND_MODE_ALWAYS; } else { - q = nv50_query(pq); /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: @@ -405,7 +95,7 @@ nv50_render_condition(struct pipe_context *pipe, case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: if (likely(!condition)) { - if (unlikely(q->nesting)) + if (unlikely(hq->nesting)) cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS; else @@ -440,48 +130,15 @@ nv50_render_condition(struct pipe_context *pipe, PUSH_DATA (push, 0); } - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3); - PUSH_DATAh(push, q->bo->offset + q->offset); - PUSH_DATA (push, q->bo->offset + q->offset); + PUSH_DATAh(push, hq->bo->offset + hq->offset); + PUSH_DATA (push, hq->bo->offset + hq->offset); PUSH_DATA (push, cond); BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2); - PUSH_DATAh(push, q->bo->offset + q->offset); - PUSH_DATA (push, q->bo->offset + q->offset); -} - -void -nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, - struct pipe_query *pq, unsigned result_offset) -{ - struct nv50_query *q = nv50_query(pq); - - nv50_query_update(q); - if (q->state != NV50_QUERY_STATE_READY) - nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client); - q->state = NV50_QUERY_STATE_READY; - - BEGIN_NV04(push, SUBC_3D(method), 1); - PUSH_DATA (push, q->data[result_offset / 4]); -} - -void -nva0_so_target_save_offset(struct pipe_context *pipe, - struct pipe_stream_output_target *ptarg, - unsigned index, bool serialize) -{ - struct nv50_so_target *targ = nv50_so_target(ptarg); - - if (serialize) { - struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; - PUSH_SPACE(push, 2); - BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); - PUSH_DATA (push, 0); - } - - nv50_query(targ->pq)->index = index; - nv50_query_end(pipe, targ->pq); + PUSH_DATAh(push, hq->bo->offset + hq->offset); + PUSH_DATA (push, hq->bo->offset + hq->offset); } void @@ -489,10 +146,10 @@ nv50_init_query_functions(struct nv50_context *nv50) { struct pipe_context *pipe = &nv50->base.pipe; - pipe->create_query = nv50_query_create; - pipe->destroy_query = nv50_query_destroy; - pipe->begin_query = nv50_query_begin; - pipe->end_query = nv50_query_end; - pipe->get_query_result = nv50_query_result; + pipe->create_query = nv50_create_query; + pipe->destroy_query = nv50_destroy_query; + pipe->begin_query = nv50_begin_query; + pipe->end_query = nv50_end_query; + pipe->get_query_result = nv50_get_query_result; pipe->render_condition = nv50_render_condition; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h new file mode 100644 index 00000000000..d990285c857 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h @@ -0,0 +1,33 @@ +#ifndef __NV50_QUERY_H__ +#define __NV50_QUERY_H__ + +#include "pipe/p_context.h" + +#include "nouveau_context.h" + +struct nv50_context; +struct nv50_query; + +struct nv50_query_funcs { + void (*destroy_query)(struct nv50_context *, struct nv50_query *); + boolean (*begin_query)(struct nv50_context *, struct nv50_query *); + void (*end_query)(struct nv50_context *, struct nv50_query *); + boolean (*get_query_result)(struct nv50_context *, struct nv50_query *, + boolean, union pipe_query_result *); +}; + +struct nv50_query { + const struct nv50_query_funcs *funcs; + uint16_t type; + uint16_t index; +}; + +static inline struct nv50_query * +nv50_query(struct pipe_query *pipe) +{ + return (struct nv50_query *)pipe; +} + +void nv50_init_query_functions(struct nv50_context *); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c new file mode 100644 index 00000000000..945ce7abe50 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -0,0 +1,406 @@ +/* + * Copyright 2011 Christoph Bumiller + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NV50_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" +#include "nv_object.xml.h" + +#define NV50_HW_QUERY_STATE_READY 0 +#define NV50_HW_QUERY_STATE_ACTIVE 1 +#define NV50_HW_QUERY_STATE_ENDED 2 +#define NV50_HW_QUERY_STATE_FLUSHED 3 + +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts + * (since we use only a single GPU channel per screen) will not work properly. + * + * The first is not that big of an issue because OpenGL does not allow nested + * queries anyway. + */ + +#define NV50_HW_QUERY_ALLOC_SPACE 256 + +static bool +nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q, + int size) +{ + struct nv50_screen *screen = nv50->screen; + struct nv50_hw_query *hq = nv50_hw_query(q); + int ret; + + if (hq->bo) { + nouveau_bo_ref(NULL, &hq->bo); + if (hq->mm) { + if (hq->state == NV50_HW_QUERY_STATE_READY) + nouveau_mm_free(hq->mm); + else + nouveau_fence_work(screen->base.fence.current, + nouveau_mm_free_work, hq->mm); + } + } + if (size) { + hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size, + &hq->bo, &hq->base_offset); + if (!hq->bo) + return false; + hq->offset = hq->base_offset; + + ret = nouveau_bo_map(hq->bo, 0, screen->base.client); + if (ret) { + nv50_hw_query_allocate(nv50, q, 0); + return false; + } + hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset); + } + return true; +} + +static void +nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, + unsigned offset, uint32_t get) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + + offset += hq->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(push, hq->bo->offset + offset); + PUSH_DATA (push, hq->bo->offset + offset); + PUSH_DATA (push, hq->sequence); + PUSH_DATA (push, get); +} + +static inline void +nv50_hw_query_update(struct nv50_query *q) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + + if (hq->is64bit) { + if (nouveau_fence_signalled(hq->fence)) + hq->state = NV50_HW_QUERY_STATE_READY; + } else { + if (hq->data[0] == hq->sequence) + hq->state = NV50_HW_QUERY_STATE_READY; + } +} + +static void +nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + nv50_hw_query_allocate(nv50, q, 0); + nouveau_fence_ref(NULL, &hq->fence); + FREE(hq); +} + +static boolean +nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_query *hq = nv50_hw_query(q); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render condition to false even *after* we re- + * initialized it to true. + */ + if (hq->rotate) { + hq->offset += hq->rotate; + hq->data += hq->rotate / sizeof(*hq->data); + if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) + nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); + + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + hq->data[0] = hq->sequence; /* initialize sequence */ + hq->data[1] = 1; /* initial render condition = true */ + hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */ + hq->data[5] = 0; + } + if (!hq->is64bit) + hq->data[0] = hq->sequence++; /* the previously used one */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + hq->nesting = nv50->screen->num_occlusion_queries_active++; + if (hq->nesting) { + nv50_hw_query_get(push, q, 0x10, 0x0100f002); + } else { + PUSH_SPACE(push, 4); + BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); + PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 1); + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_hw_query_get(push, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_hw_query_get(push, q, 0x10, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_hw_query_get(push, q, 0x20, 0x05805002); + nv50_hw_query_get(push, q, 0x30, 0x06805002); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ + nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ + nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ + nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ + nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ + nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ + break; + case PIPE_QUERY_TIME_ELAPSED: + nv50_hw_query_get(push, q, 0x10, 0x00005002); + break; + default: + assert(0); + return false; + } + hq->state = NV50_HW_QUERY_STATE_ACTIVE; + return true; +} + +static void +nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_query *hq = nv50_hw_query(q); + + hq->state = NV50_HW_QUERY_STATE_ENDED; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + nv50_hw_query_get(push, q, 0, 0x0100f002); + if (--nv50->screen->num_occlusion_queries_active == 0) { + PUSH_SPACE(push, 2); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 0); + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_hw_query_get(push, q, 0, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_hw_query_get(push, q, 0, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_hw_query_get(push, q, 0x00, 0x05805002); + nv50_hw_query_get(push, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ + nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ + nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ + nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ + nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ + nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ + break; + case PIPE_QUERY_TIMESTAMP: + hq->sequence++; + /* fall through */ + case PIPE_QUERY_TIME_ELAPSED: + nv50_hw_query_get(push, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + hq->sequence++; + nv50_hw_query_get(push, q, 0, 0x1000f010); + break; + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + hq->sequence++; + nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + /* This query is not issued on GPU because disjoint is forced to false */ + hq->state = NV50_HW_QUERY_STATE_READY; + break; + default: + assert(0); + break; + } + if (hq->is64bit) + nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence); +} + +static boolean +nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, + boolean wait, union pipe_query_result *result) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + uint64_t *res64 = (uint64_t *)result; + uint32_t *res32 = (uint32_t *)result; + uint8_t *res8 = (uint8_t *)result; + uint64_t *data64 = (uint64_t *)hq->data; + int i; + + if (hq->state != NV50_HW_QUERY_STATE_READY) + nv50_hw_query_update(q); + + if (hq->state != NV50_HW_QUERY_STATE_READY) { + if (!wait) { + /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ + if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) { + hq->state = NV50_HW_QUERY_STATE_FLUSHED; + PUSH_KICK(nv50->base.pushbuf); + } + return false; + } + if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) + return false; + } + hq->state = NV50_HW_QUERY_STATE_READY; + + switch (q->type) { + case PIPE_QUERY_GPU_FINISHED: + res8[0] = true; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res64[0] = hq->data[1] - hq->data[5]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0] - data64[2]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0] - data64[4]; + res64[1] = data64[2] - data64[6]; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + for (i = 0; i < 8; ++i) + res64[i] = data64[i * 2] - data64[16 + i * 2]; + break; + case PIPE_QUERY_TIMESTAMP: + res64[0] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + res64[0] = 1000000000; + res8[8] = false; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + res32[0] = hq->data[1]; + break; + default: + assert(0); + return false; + } + + return true; +} + +static const struct nv50_query_funcs hw_query_funcs = { + .destroy_query = nv50_hw_destroy_query, + .begin_query = nv50_hw_begin_query, + .end_query = nv50_hw_end_query, + .get_query_result = nv50_hw_get_query_result, +}; + +struct nv50_query * +nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) +{ + struct nv50_hw_query *hq; + struct nv50_query *q; + + hq = CALLOC_STRUCT(nv50_hw_query); + if (!hq) + return NULL; + + q = &hq->base; + q->funcs = &hw_query_funcs; + q->type = type; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + hq->rotate = 32; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_PIPELINE_STATISTICS: + hq->is64bit = true; + break; + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + break; + default: + debug_printf("invalid query type: %u\n", type); + FREE(q); + return NULL; + } + + if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { + FREE(hq); + return NULL; + } + + if (hq->rotate) { + /* we advance before query_begin ! */ + hq->offset -= hq->rotate; + hq->data -= hq->rotate / sizeof(*hq->data); + } + + return q; +} + +void +nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, + struct nv50_query *q, unsigned result_offset) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + + nv50_hw_query_update(q); + if (hq->state != NV50_HW_QUERY_STATE_READY) + nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client); + hq->state = NV50_HW_QUERY_STATE_READY; + + BEGIN_NV04(push, SUBC_3D(method), 1); + PUSH_DATA (push, hq->data[result_offset / 4]); +} + +void +nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + unsigned offset = hq->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); + PUSH_DATAh(push, hq->bo->offset + offset); + PUSH_DATA (push, hq->bo->offset + offset); + PUSH_DATA (push, hq->sequence); + PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h new file mode 100644 index 00000000000..294c67de9a4 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -0,0 +1,40 @@ +#ifndef __NV50_QUERY_HW_H__ +#define __NV50_QUERY_HW_H__ + +#include "nouveau_fence.h" +#include "nouveau_mm.h" + +#include "nv50_query.h" + +#define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) + +struct nv50_hw_query { + struct nv50_query base; + uint32_t *data; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base_offset; + uint32_t offset; /* base + i * rotate */ + uint8_t state; + bool is64bit; + uint8_t rotate; + int nesting; /* only used for occlusion queries */ + struct nouveau_mm_allocation *mm; + struct nouveau_fence *fence; +}; + +static inline struct nv50_hw_query * +nv50_hw_query(struct nv50_query *q) +{ + return (struct nv50_hw_query *)q; +} + +struct nv50_query * +nv50_hw_create_query(struct nv50_context *, unsigned, unsigned); +void +nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, + struct nv50_query *, unsigned); +void +nv84_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c b/src/gallium/drivers/nouveau/nv50/nv50_resource.c index d289b4a24e8..325c19fb80c 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_resource.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c @@ -32,8 +32,8 @@ nv50_resource_from_handle(struct pipe_screen * screen, struct pipe_surface * nv50_surface_from_buffer(struct pipe_context *pipe, - struct pipe_resource *pbuf, - const struct pipe_surface *templ) + struct pipe_resource *pbuf, + const struct pipe_surface *templ) { struct nv50_surface *sf = CALLOC_STRUCT(nv50_surface); if (!sf) @@ -65,8 +65,8 @@ nv50_surface_from_buffer(struct pipe_context *pipe, static struct pipe_surface * nv50_surface_create(struct pipe_context *pipe, - struct pipe_resource *pres, - const struct pipe_surface *templ) + struct pipe_resource *pres, + const struct pipe_surface *templ) { /* surfaces are assumed to be miptrees all over the place. */ assert(pres->target != PIPE_BUFFER); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ec51d00f266..a9e0c478322 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -180,6 +180,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_SHAREABLE_SHADERS: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; /* class_3d >= NVA0_3D_CLASS; */ @@ -191,6 +193,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return class_3d >= NVA3_3D_CLASS; /* unsupported caps */ @@ -215,8 +218,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: - case PIPE_CAP_FORCE_PERSAMPLE_INTERP: - case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index 941555ffbf8..9b911043132 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -27,6 +27,7 @@ #include "util/u_inlines.h" #include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" void nv50_constbufs_validate(struct nv50_context *nv50) @@ -168,11 +169,23 @@ nv50_fragprog_validate(struct nv50_context *nv50) { struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_program *fp = nv50->fragprog; + struct pipe_rasterizer_state *rast = &nv50->rast->pipe; - fp->fp.sample_interp = nv50->min_samples > 1; + if (fp->fp.force_persample_interp != rast->force_persample_interp) { + /* Force the program to be reuploaded, which will trigger interp fixups + * to get applied + */ + if (fp->mem) + nouveau_heap_free(&fp->mem); + + fp->fp.force_persample_interp = rast->force_persample_interp; + } + + if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES))) + return; if (!nv50_program_validate(nv50, fp)) - return; + return; nv50_program_update_context_state(nv50, fp, 1); BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1); @@ -629,7 +642,7 @@ nv50_stream_output_validate(struct nv50_context *nv50) const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3; if (n == 4 && !targ->clean) - nv84_query_fifo_wait(push, targ->pq); + nv84_hw_query_fifo_wait(push, nv50_query(targ->pq)); BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n); PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); @@ -638,8 +651,8 @@ nv50_stream_output_validate(struct nv50_context *nv50) PUSH_DATA(push, targ->pipe.buffer_size); if (!targ->clean) { assert(targ->pq); - nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i), - targ->pq, 0x4); + nv50_hw_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i), + nv50_query(targ->pq), 0x4); } else { BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1); PUSH_DATA(push, 0); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 410e6311e60..6c8c9f0b4e6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -30,6 +30,7 @@ #include "nv50/nv50_stateobj.h" #include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" #include "nv50/nv50_3d.xml.h" #include "nv50/nv50_texture.xml.h" @@ -725,6 +726,9 @@ nv50_sp_state_create(struct pipe_context *pipe, if (cso->stream_output.num_outputs) prog->pipe.stream_output = cso->stream_output; + prog->translated = nv50_program_translate( + prog, nv50_context(pipe)->screen->base.device->chipset); + return (void *)prog; } @@ -1033,7 +1037,7 @@ nv50_so_target_create(struct pipe_context *pipe, if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) { targ->pq = pipe->create_query(pipe, - NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0); + NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0); if (!targ->pq) { FREE(targ); return NULL; @@ -1057,6 +1061,24 @@ nv50_so_target_create(struct pipe_context *pipe, } static void +nva0_so_target_save_offset(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg, + unsigned index, bool serialize) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + + if (serialize) { + struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + nv50_query(targ->pq)->index = index; + pipe->end_query(pipe, targ->pq); +} + +static void nv50_so_target_destroy(struct pipe_context *pipe, struct pipe_stream_output_target *ptarg) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 66dcf43533b..b6181edf24f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -487,7 +487,7 @@ static struct state_validate { { nv50_validate_viewport, NV50_NEW_VIEWPORT }, { nv50_vertprog_validate, NV50_NEW_VERTPROG }, { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG }, - { nv50_fragprog_validate, NV50_NEW_FRAGPROG | + { nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | NV50_NEW_MIN_SAMPLES }, { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER }, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 64348b3c378..237d76d6adb 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -220,10 +220,14 @@ nv50_resource_copy_region(struct pipe_context *pipe, nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; if (m2mf) { + struct nv50_miptree *src_mt = nv50_miptree(src); + struct nv50_miptree *dst_mt = nv50_miptree(dst); struct nv50_m2mf_rect drect, srect; unsigned i; - unsigned nx = util_format_get_nblocksx(src->format, src_box->width); - unsigned ny = util_format_get_nblocksy(src->format, src_box->height); + unsigned nx = util_format_get_nblocksx(src->format, src_box->width) + << src_mt->ms_x; + unsigned ny = util_format_get_nblocksy(src->format, src_box->height) + << src_mt->ms_y; nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); nv50_m2mf_rect_setup(&srect, src, src_level, @@ -232,15 +236,15 @@ nv50_resource_copy_region(struct pipe_context *pipe, for (i = 0; i < src_box->depth; ++i) { nv50_m2mf_transfer_rect(nv50, &drect, &srect, nx, ny); - if (nv50_miptree(dst)->layout_3d) + if (dst_mt->layout_3d) drect.z++; else - drect.base += nv50_miptree(dst)->layer_stride; + drect.base += dst_mt->layer_stride; - if (nv50_miptree(src)->layout_3d) + if (src_mt->layout_3d) srect.z++; else - srect.base += nv50_miptree(src)->layer_stride; + srect.base += src_mt->layer_stride; } return; } @@ -270,7 +274,7 @@ nv50_resource_copy_region(struct pipe_context *pipe, static void nv50_clear_render_target(struct pipe_context *pipe, struct pipe_surface *dst, - const union pipe_color_union *color, + const union pipe_color_union *color, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index f5f47087bef..9fa6fceeefa 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -27,6 +27,7 @@ #include "translate/translate.h" #include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" #include "nv50/nv50_resource.h" #include "nv50/nv50_3d.xml.h" @@ -745,7 +746,8 @@ nva0_draw_stream_output(struct nv50_context *nv50, PUSH_DATA (push, 0); BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1); PUSH_DATA (push, so->stride); - nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4); + nv50_hw_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, + nv50_query(so->pq), 0x4); BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); PUSH_DATA (push, 0); diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c index 7780a179399..d13480c21d5 100644 --- a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c +++ b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c @@ -27,33 +27,33 @@ static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq, struct nouveau_bo *inter_bo, unsigned slice_size) { - unsigned i, idx = comm->pvp_cur_index & 0xf; - debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); + unsigned i, idx = comm->pvp_cur_index & 0xf; + debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); #if 0 - debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); - debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); + debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); + debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); - for (i = 0; i != comm->irq_index; ++i) - debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); - for (i = 0; i != comm->parse_endpos_index; ++i) - debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); + for (i = 0; i != comm->irq_index; ++i) + debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); + for (i = 0; i != comm->parse_endpos_index; ++i) + debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); #endif - debug_printf("mb_y = %u\n", comm->mb_y[idx]); - if (comm->status_vp[idx] <= 1) - return; - - if ((comm->pvp_stage & 0xff) != 0xff) { - unsigned *map; - int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client); - assert(ret >= 0); - map = inter_bo->map; - for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { - debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); - } - munmap(inter_bo->map, inter_bo->size); - inter_bo->map = NULL; - } - assert((comm->pvp_stage & 0xff) == 0xff); + debug_printf("mb_y = %u\n", comm->mb_y[idx]); + if (comm->status_vp[idx] <= 1) + return; + + if ((comm->pvp_stage & 0xff) != 0xff) { + unsigned *map; + int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client); + assert(ret >= 0); + map = inter_bo->map; + for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { + debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); + } + munmap(inter_bo->map, inter_bo->size); + inter_bo->map = NULL; + } + assert((comm->pvp_stage & 0xff) == 0xff); } #endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index a168dd684ab..68048f9d6c0 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -252,10 +252,10 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) } } - vp->vp.clip_enable = info->io.clipDistanceMask; - for (i = 0; i < 8; ++i) - if (info->io.cullDistanceMask & (1 << i)) - vp->vp.clip_mode |= 1 << (i * 4); + vp->vp.clip_enable = + (1 << (info->io.clipDistances + info->io.cullDistances)) - 1; + for (i = 0; i < info->io.cullDistances; ++i) + vp->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4); if (info->io.genUserClip < 0) vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */ @@ -269,8 +269,6 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) vp->hdr[0] = 0x20061 | (1 << 10); vp->hdr[4] = 0xff000; - vp->hdr[18] = info->io.clipDistanceMask; - return nvc0_vtgp_gen_header(vp, info); } @@ -424,6 +422,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) for (i = 0; i < info->numInputs; ++i) { m = nvc0_hdr_interp_mode(&info->in[i]); + if (info->in[i].sn == TGSI_SEMANTIC_COLOR) { + fp->fp.colors |= 1 << info->in[i].si; + if (info->in[i].sc) + fp->fp.color_interp[info->in[i].si] = m | (info->in[i].mask << 4); + } for (c = 0; c < 4; ++c) { if (!(info->in[i].mask & (1 << c))) continue; @@ -531,7 +534,6 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) info->io.genUserClip = prog->vp.num_ucps; info->io.ucpBase = 256; info->io.ucpCBSlot = 15; - info->io.sampleInterp = prog->fp.sample_interp; if (prog->type == PIPE_SHADER_COMPUTE) { if (chipset >= NVISA_GK104_CHIPSET) { @@ -575,6 +577,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) prog->immd_data = info->immd.buf; prog->immd_size = info->immd.bufSize; prog->relocs = info->bin.relocData; + prog->interps = info->bin.interpData; prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); prog->num_barriers = info->numBarriers; @@ -713,6 +716,23 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) if (prog->relocs) nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0); + if (prog->interps) { + nv50_ir_change_interp(prog->interps, prog->code, + prog->fp.force_persample_interp, + prog->fp.flatshade); + for (int i = 0; i < 2; i++) { + unsigned mask = prog->fp.color_interp[i] >> 4; + unsigned interp = prog->fp.color_interp[i] & 3; + if (!mask) + continue; + prog->hdr[14] &= ~(0xff << (8 * i)); + if (prog->fp.flatshade) + interp = NVC0_INTERP_FLAT; + for (int c = 0; c < 4; c++) + if (mask & (1 << c)) + prog->hdr[14] |= interp << (2 * (4 * i + c)); + } + } #ifdef DEBUG if (debug_get_bool_option("NV50_PROG_DEBUG", false)) @@ -773,6 +793,7 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) FREE(prog->code); /* may be 0 for hardcoded shaders */ FREE(prog->immd_data); FREE(prog->relocs); + FREE(prog->interps); if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms) FREE(prog->cp.syms); if (prog->tfb) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h index 390e0c7a4f0..9c45e7b3e31 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -45,8 +45,10 @@ struct nvc0_program { } vp; struct { uint8_t early_z; - uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; - uint8_t sample_interp; + uint8_t colors; + uint8_t color_interp[2]; + bool force_persample_interp; + bool flatshade; } fp; struct { uint32_t tess_mode; /* ~0 if defined by the other stage */ @@ -61,6 +63,7 @@ struct nvc0_program { uint8_t num_barriers; void *relocs; + void *interps; struct nvc0_transform_feedback_state *tfb; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index e4752e2dbc5..f53921092a5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -28,6 +28,7 @@ #include "nvc0/nvc0_query.h" #include "nvc0/nvc0_query_sw.h" #include "nvc0/nvc0_query_hw.h" +#include "nvc0/nvc0_query_hw_metric.h" #include "nvc0/nvc0_query_hw_sm.h" static struct pipe_query * @@ -188,7 +189,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, count++; } else if (screen->base.class_3d < NVE4_3D_CLASS) { - count++; + count += 2; } } } @@ -218,6 +219,17 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, return 1; } } + } else + if (id == NVC0_HW_METRIC_QUERY_GROUP) { + if (screen->compute) { + if (screen->base.class_3d < NVE4_3D_CLASS) { + info->name = "Performance metrics"; + info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; + info->max_active_queries = 1; + info->num_queries = NVC0_HW_METRIC_QUERY_COUNT; + return 1; + } + } } #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h index 6883ab6ab9d..c46361c31aa 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h @@ -32,7 +32,8 @@ nvc0_query(struct pipe_query *pipe) * Driver queries groups: */ #define NVC0_HW_SM_QUERY_GROUP 0 -#define NVC0_SW_QUERY_DRV_STAT_GROUP 1 +#define NVC0_HW_METRIC_QUERY_GROUP 1 +#define NVC0_SW_QUERY_DRV_STAT_GROUP 2 void nvc0_init_query_functions(struct nvc0_context *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c index 25aa09be42a..fb2806a805e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -431,7 +431,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id, id = nvc0_hw_metric_get_next_query_id(queries, id); info->name = nvc0_hw_metric_names[id]; info->query_type = NVC0_HW_METRIC_QUERY(id); - info->group_id = -1; + info->group_id = NVC0_HW_METRIC_QUERY_GROUP; return 1; } } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c index 12b5a025064..15c803c4307 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c @@ -26,7 +26,8 @@ nvc0_resource_from_handle(struct pipe_screen * screen, } else { struct pipe_resource *res = nv50_miptree_from_handle(screen, templ, whandle); - nv04_resource(res)->vtbl = &nvc0_miptree_vtbl; + if (res) + nv04_resource(res)->vtbl = &nvc0_miptree_vtbl; return res; } } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index af8e5f72670..6ad3980911d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -179,6 +179,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -201,8 +204,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_FORCE_PERSAMPLE_INTERP: - case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_VENDOR_ID: @@ -352,45 +353,51 @@ static int nvc0_screen_get_compute_param(struct pipe_screen *pscreen, enum pipe_compute_cap param, void *data) { - uint64_t *data64 = (uint64_t *)data; - uint32_t *data32 = (uint32_t *)data; - const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass; + struct nvc0_screen *screen = nvc0_screen(pscreen); + const uint16_t obj_class = screen->compute->oclass; + +#define RET(x) do { \ + if (data) \ + memcpy(data, x, sizeof(x)); \ + return sizeof(x); \ +} while (0) switch (param) { case PIPE_COMPUTE_CAP_GRID_DIMENSION: - data64[0] = 3; - return 8; + RET((uint64_t []) { 3 }); case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: - data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535; - data64[1] = 65535; - data64[2] = 65535; - return 24; + if (obj_class >= NVE4_COMPUTE_CLASS) { + RET(((uint64_t []) { 0x7fffffff, 65535, 65535 })); + } else { + RET(((uint64_t []) { 65535, 65535, 65535 })); + } case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: - data64[0] = 1024; - data64[1] = 1024; - data64[2] = 64; - return 24; + RET(((uint64_t []) { 1024, 1024, 64 })); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: - data64[0] = 1024; - return 8; + RET((uint64_t []) { 1024 }); case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */ - data64[0] = (uint64_t)1 << 40; - return 8; + RET((uint64_t []) { 1ULL << 40 }); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ - data64[0] = 48 << 10; - return 8; + RET((uint64_t []) { 48 << 10 }); case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ - data64[0] = 512 << 10; - return 8; + RET((uint64_t []) { 512 << 10 }); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ - data64[0] = 4096; - return 8; + RET((uint64_t []) { 4096 }); case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: - data32[0] = 32; - return 4; + RET((uint32_t []) { 32 }); + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + RET((uint64_t []) { 1ULL << 40 }); + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + RET((uint32_t []) { 0 }); + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + RET((uint32_t []) { screen->mp_count_compute }); + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */ default: return 0; } + +#undef RET } static void @@ -827,6 +834,8 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, 1); BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1); PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1); + PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH); if (screen->eng3d->oclass < NVE4_3D_CLASS) { BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 857eb0316c7..8b73102b98b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -38,6 +38,7 @@ struct nvc0_graph_state { uint32_t constant_elts; int32_t index_bias; uint16_t scissor; + bool flatshade; uint8_t patch_vertices; uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */ uint8_t num_vtxbufs; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index af837fc4a33..8595800592c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -107,8 +107,54 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *fp = nvc0->fragprog; + struct pipe_rasterizer_state *rast = &nvc0->rast->pipe; - fp->fp.sample_interp = nvc0->min_samples > 1; + if (fp->fp.force_persample_interp != rast->force_persample_interp) { + /* Force the program to be reuploaded, which will trigger interp fixups + * to get applied + */ + if (fp->mem) + nouveau_heap_free(&fp->mem); + + fp->fp.force_persample_interp = rast->force_persample_interp; + } + + /* Shade model works well enough when both colors follow it. However if one + * (or both) is explicitly set, then we have to go the patching route. + */ + bool has_explicit_color = fp->fp.colors && + (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) || + ((fp->fp.colors & 2) && !fp->fp.color_interp[1])); + bool hwflatshade = false; + if (has_explicit_color && fp->fp.flatshade != rast->flatshade) { + /* Force re-upload */ + if (fp->mem) + nouveau_heap_free(&fp->mem); + + fp->fp.flatshade = rast->flatshade; + + /* Always smooth-shade in this mode, the shader will decide on its own + * when to flat-shade. + */ + } else if (!has_explicit_color) { + hwflatshade = rast->flatshade; + + /* No need to binary-patch the shader each time, make sure that it's set + * up for the default behaviour. + */ + fp->fp.flatshade = 0; + } + + if (hwflatshade != nvc0->state.flatshade) { + nvc0->state.flatshade = hwflatshade; + BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1); + PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT : + NVC0_3D_SHADE_MODEL_SMOOTH); + } + + if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) { + return; + } if (!nvc0_program_validate(nvc0, fp)) return; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 742bef39247..ba1714da010 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -212,9 +212,6 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, * always emit 16 commands, one for each scissor rectangle, here. */ - SB_BEGIN_3D(so, SHADE_MODEL, 1); - SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : - NVC0_3D_SHADE_MODEL_SMOOTH); SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first); SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside); @@ -683,6 +680,9 @@ nvc0_sp_state_create(struct pipe_context *pipe, if (cso->stream_output.num_outputs) prog->pipe.stream_output = cso->stream_output; + prog->translated = nvc0_program_translate( + prog, nvc0_context(pipe)->screen->base.device->chipset); + return (void *)prog; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index aec06097bbd..205e7dc6ae9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -606,6 +606,9 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to) ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1; } + /* Reset tfb as the shader that owns it may have been deleted. */ + ctx_to->state.tfb = NULL; + if (!ctx_to->vertex) ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS); if (!ctx_to->idxbuf.buffer) @@ -645,7 +648,7 @@ static struct state_validate { { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, { nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR }, { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, - { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER }, { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA | NVC0_NEW_RASTERIZER }, { nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER }, diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h index 8bc33c6a0e0..f9680f5a90f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h @@ -23,7 +23,7 @@ struct nvc0_blend_stateobj { struct nvc0_rasterizer_stateobj { struct pipe_rasterizer_state pipe; int size; - uint32_t state[44]; + uint32_t state[42]; }; struct nvc0_zsa_stateobj { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index dbdf292c862..be123349148 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -225,10 +225,14 @@ nvc0_resource_copy_region(struct pipe_context *pipe, nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; if (m2mf) { + struct nv50_miptree *src_mt = nv50_miptree(src); + struct nv50_miptree *dst_mt = nv50_miptree(dst); struct nv50_m2mf_rect drect, srect; unsigned i; - unsigned nx = util_format_get_nblocksx(src->format, src_box->width); - unsigned ny = util_format_get_nblocksy(src->format, src_box->height); + unsigned nx = util_format_get_nblocksx(src->format, src_box->width) + << src_mt->ms_x; + unsigned ny = util_format_get_nblocksy(src->format, src_box->height) + << src_mt->ms_y; nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); nv50_m2mf_rect_setup(&srect, src, src_level, @@ -237,15 +241,15 @@ nvc0_resource_copy_region(struct pipe_context *pipe, for (i = 0; i < src_box->depth; ++i) { nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny); - if (nv50_miptree(dst)->layout_3d) + if (dst_mt->layout_3d) drect.z++; else - drect.base += nv50_miptree(dst)->layer_stride; + drect.base += dst_mt->layer_stride; - if (nv50_miptree(src)->layout_3d) + if (src_mt->layout_3d) srect.z++; else - srect.base += nv50_miptree(src)->layer_stride; + srect.base += src_mt->layer_stride; } return; } @@ -493,57 +497,57 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { - struct nvc0_context *nvc0 = nvc0_context(pipe); - struct nouveau_pushbuf *push = nvc0->base.pushbuf; - struct nv50_miptree *mt = nv50_miptree(dst->texture); - struct nv50_surface *sf = nv50_surface(dst); - uint32_t mode = 0; - int unk = mt->base.base.target == PIPE_TEXTURE_2D; - unsigned z; - - if (!PUSH_SPACE(push, 32 + sf->depth)) - return; - - PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR); - - if (clear_flags & PIPE_CLEAR_DEPTH) { - BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1); - PUSH_DATAf(push, depth); - mode |= NVC0_3D_CLEAR_BUFFERS_Z; - } - - if (clear_flags & PIPE_CLEAR_STENCIL) { - BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1); - PUSH_DATA (push, stencil & 0xff); - mode |= NVC0_3D_CLEAR_BUFFERS_S; - } - - BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); - PUSH_DATA (push, ( width << 16) | dstx); - PUSH_DATA (push, (height << 16) | dsty); - - BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5); - PUSH_DATAh(push, mt->base.address + sf->offset); - PUSH_DATA (push, mt->base.address + sf->offset); - PUSH_DATA (push, nvc0_format_table[dst->format].rt); - PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); - PUSH_DATA (push, mt->layer_stride >> 2); - BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); - PUSH_DATA (push, 1); - BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3); - PUSH_DATA (push, sf->width); - PUSH_DATA (push, sf->height); - PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth)); - BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); - PUSH_DATA (push, dst->u.tex.first_layer); - - BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); - for (z = 0; z < sf->depth; ++z) { - PUSH_DATA (push, mode | - (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); - } - - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); + uint32_t mode = 0; + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + unsigned z; + + if (!PUSH_SPACE(push, 32 + sf->depth)) + return; + + PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR); + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1); + PUSH_DATAf(push, depth); + mode |= NVC0_3D_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1); + PUSH_DATA (push, stencil & 0xff); + mode |= NVC0_3D_CLEAR_BUFFERS_S; + } + + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); + PUSH_DATA (push, ( width << 16) | dstx); + PUSH_DATA (push, (height << 16) | dsty); + + BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, mt->base.address + sf->offset); + PUSH_DATA (push, mt->base.address + sf->offset); + PUSH_DATA (push, nvc0_format_table[dst->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, mt->layer_stride >> 2); + BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth)); + BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); + PUSH_DATA (push, dst->u.tex.first_layer); + + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); + for (z = 0; z < sf->depth; ++z) { + PUSH_DATA (push, mode | + (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); + } + + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c index 8b23a4887da..9c19ba20a7e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c @@ -27,6 +27,7 @@ struct push_context { struct { bool enabled; bool value; + uint8_t width; unsigned stride; const uint8_t *data; } edgeflag; @@ -53,6 +54,7 @@ nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx) /* silence warnings */ ctx->edgeflag.data = NULL; ctx->edgeflag.stride = 0; + ctx->edgeflag.width = 0; } static inline void @@ -100,6 +102,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0, struct nv04_resource *buf = nv04_resource(vb->buffer); ctx->edgeflag.stride = vb->stride; + ctx->edgeflag.width = util_format_get_blocksize(ve->src_format); if (buf) { unsigned offset = vb->buffer_offset + ve->src_offset; ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base, @@ -137,10 +140,17 @@ prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index) } static inline bool -ef_value(const struct push_context *ctx, uint32_t index) +ef_value_8(const struct push_context *ctx, uint32_t index) { - float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; - return *pf ? true : false; + uint8_t *pf = (uint8_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; + return !!*pf; +} + +static inline bool +ef_value_32(const struct push_context *ctx, uint32_t index) +{ + uint32_t *pf = (uint32_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; + return !!*pf; } static inline bool @@ -154,7 +164,11 @@ static inline unsigned ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n) { unsigned i; - for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + bool ef = ctx->edgeflag.value; + if (ctx->edgeflag.width == 1) + for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i); + else + for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i); return i; } @@ -162,7 +176,11 @@ static inline unsigned ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n) { unsigned i; - for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + bool ef = ctx->edgeflag.value; + if (ctx->edgeflag.width == 1) + for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i); + else + for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i); return i; } @@ -170,7 +188,11 @@ static inline unsigned ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n) { unsigned i; - for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + bool ef = ctx->edgeflag.value; + if (ctx->edgeflag.width == 1) + for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i); + else + for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i); return i; } @@ -178,7 +200,11 @@ static inline unsigned ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n) { unsigned i; - for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i); + bool ef = ctx->edgeflag.value; + if (ctx->edgeflag.width == 1) + for (i = 0; i < n && ef_value_8(ctx, start++) == ef; ++i); + else + for (i = 0; i < n && ef_value_32(ctx, start++) == ef; ++i); return i; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c index 28bcb629e43..91543782dfc 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c @@ -27,33 +27,33 @@ static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq, struct nouveau_bo *inter_bo, unsigned slice_size) { - unsigned i, idx = comm->pvp_cur_index & 0xf; - debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); + unsigned i, idx = comm->pvp_cur_index & 0xf; + debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); #if 0 - debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); - debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); + debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); + debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); - for (i = 0; i != comm->irq_index; ++i) - debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); - for (i = 0; i != comm->parse_endpos_index; ++i) - debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); + for (i = 0; i != comm->irq_index; ++i) + debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); + for (i = 0; i != comm->parse_endpos_index; ++i) + debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); #endif - debug_printf("mb_y = %u\n", comm->mb_y[idx]); - if (comm->status_vp[idx] <= 1) - return; - - if ((comm->pvp_stage & 0xff) != 0xff) { - unsigned *map; - int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client); - assert(ret >= 0); - map = inter_bo->map; - for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { - debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); - } - munmap(inter_bo->map, inter_bo->size); - inter_bo->map = NULL; - } - assert((comm->pvp_stage & 0xff) == 0xff); + debug_printf("mb_y = %u\n", comm->mb_y[idx]); + if (comm->status_vp[idx] <= 1) + return; + + if ((comm->pvp_stage & 0xff) != 0xff) { + unsigned *map; + int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client); + assert(ret >= 0); + map = inter_bo->map; + for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { + debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); + } + munmap(inter_bo->map, inter_bo->size); + inter_bo->map = NULL; + } + assert((comm->pvp_stage & 0xff) == 0xff); } #endif diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index a576abdfaf2..d5981248a86 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -198,6 +198,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; /* SWTCL-only features. */ diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index bc6980660a5..ee7beee3001 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -635,7 +635,7 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, return 0; } -void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg) +void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs) { switch(value) { case 0: @@ -655,11 +655,11 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *ne break; case 0xBF800000: /* -1.0f */ *sel = V_SQ_ALU_SRC_1; - *neg ^= 1; + *neg ^= !abs; break; case 0xBF000000: /* -0.5f */ *sel = V_SQ_ALU_SRC_0_5; - *neg ^= 1; + *neg ^= !abs; break; default: *sel = V_SQ_ALU_SRC_LITERAL; @@ -1208,7 +1208,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, } if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) r600_bytecode_special_constants(nalu->src[i].value, - &nalu->src[i].sel, &nalu->src[i].neg); + &nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs); } if (nalu->dst.sel >= bc->ngpr) { bc->ngpr = nalu->dst.sel + 1; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 7cf3a090908..d48ad1ebf01 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -255,7 +255,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, unsigned type); void r600_bytecode_special_constants(uint32_t value, - unsigned *sel, unsigned *neg); + unsigned *sel, unsigned *neg, unsigned abs); void r600_bytecode_disasm(struct r600_bytecode *bc); void r600_bytecode_alu_read(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 9a97de9965e..9f4cda2c142 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -344,6 +344,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8efe902a329..fc6335ae8bc 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -162,10 +162,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx, goto error; } - /* disable SB for geom shaders on R6xx/R7xx due to some mysterious gs piglit regressions with it enabled. */ - if (rctx->b.chip_class <= R700) { - use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY); - } /* disable SB for shaders using doubles */ use_sb &= !shader->shader.uses_doubles; @@ -1008,7 +1004,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; - r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) return; } diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index f341ecb41a5..0dc6c918331 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -443,6 +443,27 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, return &rbuffer->b.b; } +struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen, + unsigned bind, + unsigned usage, + unsigned size, + unsigned alignment) +{ + struct pipe_resource buffer; + + memset(&buffer, 0, sizeof buffer); + buffer.target = PIPE_BUFFER; + buffer.format = PIPE_FORMAT_R8_UNORM; + buffer.bind = bind; + buffer.usage = usage; + buffer.flags = 0; + buffer.width0 = size; + buffer.height0 = 1; + buffer.depth0 = 1; + buffer.array_size = 1; + return r600_buffer_create(screen, &buffer, alignment); +} + struct pipe_resource * r600_buffer_from_user_memory(struct pipe_screen *screen, const struct pipe_resource *templ, diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 7ac94caad9f..0ad36849645 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -360,6 +360,8 @@ static const struct debug_named_value common_debug_options[] = { { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, { "nowc", DBG_NO_WC, "Disable GTT write combining" }, { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, + { "nodcc", DBG_NO_DCC, "Disable DCC." }, + { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, DEBUG_NAMED_VALUE_END /* must be last */ }; @@ -416,6 +418,7 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen) case CHIP_ICELAND: return "AMD ICELAND"; case CHIP_CARRIZO: return "AMD CARRIZO"; case CHIP_FIJI: return "AMD FIJI"; + case CHIP_STONEY: return "AMD STONEY"; default: return "AMD unknown"; } } @@ -540,6 +543,11 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) case CHIP_ICELAND: return "iceland"; case CHIP_CARRIZO: return "carrizo"; case CHIP_FIJI: return "fiji"; +#if HAVE_LLVM <= 0x0307 + case CHIP_STONEY: return "carrizo"; +#else + case CHIP_STONEY: return "stoney"; +#endif default: return ""; } } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index b58b500bd76..c300c0b3332 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -99,6 +99,8 @@ #define DBG_INFO (1llu << 40) #define DBG_NO_WC (1llu << 41) #define DBG_CHECK_VM (1llu << 42) +#define DBG_NO_DCC (1llu << 43) +#define DBG_NO_DCC_CLEAR (1llu << 44) #define R600_MAP_BUFFER_ALIGNMENT 64 @@ -214,6 +216,7 @@ struct r600_texture { struct r600_fmask_info fmask; struct r600_cmask_info cmask; struct r600_resource *cmask_buffer; + struct r600_resource *dcc_buffer; unsigned cb_color_info; /* fast clear enable bit */ unsigned color_clear_value[2]; @@ -243,6 +246,7 @@ struct r600_surface { unsigned cb_color_dim; /* EG only */ unsigned cb_color_pitch; /* EG and later */ unsigned cb_color_slice; /* EG and later */ + unsigned cb_dcc_base; /* VI and later */ unsigned cb_color_attrib; /* EG and later */ unsigned cb_dcc_control; /* VI and later */ unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ @@ -489,6 +493,11 @@ bool r600_init_resource(struct r600_common_screen *rscreen, struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ, unsigned alignment); +struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen, + unsigned bind, + unsigned usage, + unsigned size, + unsigned alignment); struct pipe_resource * r600_buffer_from_user_memory(struct pipe_screen *screen, const struct pipe_resource *templ, diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index fc69f48bb70..edfdfe33187 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -268,6 +268,7 @@ static void r600_texture_destroy(struct pipe_screen *screen, if (rtex->cmask_buffer != &rtex->resource) { pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL); } + pipe_resource_reference((struct pipe_resource**)&rtex->dcc_buffer, NULL); pb_reference(&resource->buf, NULL); FREE(rtex); } @@ -482,6 +483,25 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); } +static void vi_texture_alloc_dcc_separate(struct r600_common_screen *rscreen, + struct r600_texture *rtex) +{ + if (rscreen->debug_flags & DBG_NO_DCC) + return; + + rtex->dcc_buffer = (struct r600_resource *) + r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, + PIPE_USAGE_DEFAULT, rtex->surface.dcc_size, rtex->surface.dcc_alignment); + if (rtex->dcc_buffer == NULL) { + return; + } + + r600_screen_clear_buffer(rscreen, &rtex->dcc_buffer->b.b, 0, rtex->surface.dcc_size, + 0xFFFFFFFF, true); + + rtex->cb_color_info |= VI_S_028C70_DCC_ENABLE(1); +} + static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, struct r600_texture *rtex) { @@ -621,6 +641,8 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; } } + if (rtex->surface.dcc_size) + vi_texture_alloc_dcc_separate(rscreen, rtex); } /* Now create the backing buffer. */ @@ -1219,6 +1241,81 @@ static void evergreen_set_clear_color(struct r600_texture *rtex, memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); } +static void vi_get_fast_clear_parameters(enum pipe_format surface_format, + const union pipe_color_union *color, + uint32_t* reset_value, + bool* clear_words_needed) +{ + bool values[4] = {}; + int i; + bool main_value = false; + bool extra_value = false; + int extra_channel; + const struct util_format_description *desc = util_format_description(surface_format); + + *clear_words_needed = true; + *reset_value = 0x20202020U; + + /* If we want to clear without needing a fast clear eliminate step, we + * can set each channel to 0 or 1 (or 0/max for integer formats). We + * have two sets of flags, one for the last or first channel(extra) and + * one for the other channels(main). + */ + + if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT || + surface_format == PIPE_FORMAT_B5G6R5_UNORM || + surface_format == PIPE_FORMAT_B5G6R5_SRGB) { + extra_channel = -1; + } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { + if(r600_translate_colorswap(surface_format) <= 1) + extra_channel = desc->nr_channels - 1; + else + extra_channel = 0; + } else + return; + + for (i = 0; i < 4; ++i) { + int index = desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X; + + if (desc->swizzle[i] < UTIL_FORMAT_SWIZZLE_X || + desc->swizzle[i] > UTIL_FORMAT_SWIZZLE_W) + continue; + + if (util_format_is_pure_sint(surface_format)) { + values[i] = color->i[i] != 0; + if (color->i[i] != 0 && color->i[i] != INT32_MAX) + return; + } else if (util_format_is_pure_uint(surface_format)) { + values[i] = color->ui[i] != 0U; + if (color->ui[i] != 0U && color->ui[i] != UINT32_MAX) + return; + } else { + values[i] = color->f[i] != 0.0F; + if (color->f[i] != 0.0F && color->f[i] != 1.0F) + return; + } + + if (index == extra_channel) + extra_value = values[i]; + else + main_value = values[i]; + } + + for (int i = 0; i < 4; ++i) + if (values[i] != main_value && + desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X != extra_channel && + desc->swizzle[i] >= UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W) + return; + + *clear_words_needed = false; + if (main_value) + *reset_value |= 0x80808080U; + + if (extra_value) + *reset_value |= 0x40404040U; +} + void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct pipe_framebuffer_state *fb, struct r600_atom *fb_state, @@ -1272,18 +1369,36 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, continue; } - /* ensure CMASK is enabled */ - r600_texture_alloc_cmask_separate(rctx->screen, tex); - if (tex->cmask.size == 0) { - continue; + if (tex->dcc_buffer) { + uint32_t reset_value; + bool clear_words_needed; + + if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR) + continue; + + vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed); + + rctx->clear_buffer(&rctx->b, &tex->dcc_buffer->b.b, + 0, tex->surface.dcc_size, reset_value, true); + + if (clear_words_needed) + tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; + } else { + /* ensure CMASK is enabled */ + r600_texture_alloc_cmask_separate(rctx->screen, tex); + if (tex->cmask.size == 0) { + continue; + } + + /* Do the fast clear. */ + rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, + tex->cmask.offset, tex->cmask.size, 0, true); + + tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; } - /* Do the fast clear. */ evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); - rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, - tex->cmask.offset, tex->cmask.size, 0, true); - tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; if (dirty_cbufs) *dirty_cbufs |= 1 << i; rctx->set_atom_dirty(rctx, fb_state, true); diff --git a/src/gallium/drivers/radeon/r600d_common.h b/src/gallium/drivers/radeon/r600d_common.h index 115042d153e..a3d182cd30f 100644 --- a/src/gallium/drivers/radeon/r600d_common.h +++ b/src/gallium/drivers/radeon/r600d_common.h @@ -202,6 +202,7 @@ #define EG_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 17) #define SI_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 13) +#define VI_S_028C70_DCC_ENABLE(x) (((x) & 0x1) << 28) /*CIK+*/ #define R_0300FC_CP_STRMOUT_CNTL 0x0300FC diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index c3ac7e7f2ef..33b01361aa5 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -478,6 +478,8 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO) result.sps_info_flags |= 1 << 9; + if (pic->UseRefPicList == true) + result.sps_info_flags |= 1 << 10; result.chroma_format = pic->pps->sps->chroma_format_idc; result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; @@ -586,6 +588,11 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); + for (i = 0 ; i < 2 ; i++) { + for (int j = 0 ; j < 15 ; j++) + result.direct_reflist[i][j] = pic->RefPicList[i][j]; + } + /* TODO result.highestTid; result.isNonRef; diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h index 452fbd60880..9cc0a694c30 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.h +++ b/src/gallium/drivers/radeon/radeon_uvd.h @@ -233,6 +233,15 @@ struct ruvd_h265 { uint8_t highestTid; uint8_t isNonRef; + + uint8_t p010_mode; + uint8_t msb_mode; + uint8_t luma_10to8; + uint8_t chroma_10to8; + uint8_t sclr_luma10to8; + uint8_t sclr_chroma10to8; + + uint8_t direct_reflist[2][15]; }; struct ruvd_vc1 { diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index 3a1834b948f..32bfc32073b 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -205,11 +205,12 @@ int rvid_get_video_param(struct pipe_screen *screen, enum pipe_video_cap param) { struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; + enum pipe_video_format codec = u_reduce_video_profile(profile); if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { switch (param) { case PIPE_VIDEO_CAP_SUPPORTED: - return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC && + return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && rvce_is_fw_version_supported(rscreen); case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; @@ -232,38 +233,19 @@ int rvid_get_video_param(struct pipe_screen *screen, } } - /* UVD 2.x limits */ - if (rscreen->family < CHIP_PALM) { - enum pipe_video_format codec = u_reduce_video_profile(profile); - switch (param) { - case PIPE_VIDEO_CAP_SUPPORTED: - /* no support for MPEG4 */ - return codec != PIPE_VIDEO_FORMAT_MPEG4 && - /* FIXME: VC-1 simple/main profile is broken */ - profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE && - profile != PIPE_VIDEO_PROFILE_VC1_MAIN; - case PIPE_VIDEO_CAP_PREFERS_INTERLACED: - case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: - /* MPEG2 only with shaders and no support for - interlacing on R6xx style UVD */ - return codec != PIPE_VIDEO_FORMAT_MPEG12 && - rscreen->family > CHIP_RV770; - default: - break; - } - } - switch (param) { case PIPE_VIDEO_CAP_SUPPORTED: - switch (u_reduce_video_profile(profile)) { + switch (codec) { case PIPE_VIDEO_FORMAT_MPEG12: case PIPE_VIDEO_FORMAT_MPEG4: case PIPE_VIDEO_FORMAT_MPEG4_AVC: - return entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE; + if (rscreen->family < CHIP_PALM) + /* no support for MPEG4 */ + return codec != PIPE_VIDEO_FORMAT_MPEG4; + return true; case PIPE_VIDEO_FORMAT_VC1: /* FIXME: VC-1 simple/main profile is broken */ - return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED && - entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE; + return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED; case PIPE_VIDEO_FORMAT_HEVC: /* Carrizo only supports HEVC Main */ return rscreen->family >= CHIP_CARRIZO && @@ -280,13 +262,17 @@ int rvid_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_PREFERED_FORMAT: return PIPE_FORMAT_NV12; case PIPE_VIDEO_CAP_PREFERS_INTERLACED: - if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC) - return false; //The hardware doesn't support interlaced HEVC. - return true; case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: - if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC) - return false; //The hardware doesn't support interlaced HEVC. - return true; + if (rscreen->family < CHIP_PALM) { + /* MPEG2 only with shaders and no support for + interlacing on R6xx style UVD */ + return codec != PIPE_VIDEO_FORMAT_MPEG12 && + rscreen->family > CHIP_RV770; + } else { + if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC) + return false; //The firmware doesn't support interlaced HEVC. + return true; + } case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: return true; case PIPE_VIDEO_CAP_MAX_LEVEL: diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index b91e1adf41d..8bf1e15f3be 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -137,6 +137,7 @@ enum radeon_family { CHIP_ICELAND, CHIP_CARRIZO, CHIP_FIJI, + CHIP_STONEY, CHIP_LAST, }; @@ -331,6 +332,7 @@ struct radeon_surf_level { uint32_t nblk_z; uint32_t pitch_bytes; uint32_t mode; + uint64_t dcc_offset; }; struct radeon_surf { @@ -366,6 +368,9 @@ struct radeon_surf { uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL]; uint32_t pipe_config; uint32_t num_banks; + + uint64_t dcc_size; + uint64_t dcc_alignment; }; struct radeon_bo_list_item { diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 6454b8ce8c0..e53af1dd6b5 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -242,7 +242,8 @@ void cik_sdma_copy(struct pipe_context *ctx, if (src->format != dst->format || rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 || - (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) { + (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) || + rdst->dcc_buffer || rsrc->dcc_buffer) { goto fallback; } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 082ea850675..fce014a1e6b 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx, assert(view); tex = (struct r600_texture *)view->texture; - assert(tex->cmask.size || tex->fmask.size); + assert(tex->cmask.size || tex->fmask.size || tex->dcc_buffer); si_blit_decompress_color(&sctx->b.b, tex, view->u.tex.first_level, view->u.tex.last_level, @@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context *ctx, si_blit_decompress_depth_in_place(sctx, rtex, true, level, level, first_layer, last_layer); - } else if (rtex->fmask.size || rtex->cmask.size) { + } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_buffer) { si_blit_decompress_color(ctx, rtex, level, level, first_layer, last_layer); } @@ -507,7 +507,7 @@ void si_resource_copy_region(struct pipe_context *ctx, util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz); util_blitter_default_src_texture(&src_templ, src, src_level); - if (util_format_is_compressed(src->format) && + if (util_format_is_compressed(src->format) || util_format_is_compressed(dst->format)) { unsigned blocksize = util_format_get_blocksize(src->format); @@ -536,7 +536,7 @@ void si_resource_copy_region(struct pipe_context *ctx, src_force_level = src_level; } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src) || /* also *8_SNORM has precision issues, use UNORM instead */ - util_format_is_snorm(src->format)) { + util_format_is_snorm8(src->format)) { if (util_format_is_subsampled_422(src->format)) { src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; @@ -675,7 +675,8 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, info->src.box.depth == 1 && dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D && !(dst->surface.flags & RADEON_SURF_SCANOUT) && - (!dst->cmask.size || !dst->dirty_level_mask) /* dst cannot be fast-cleared */) { + (!dst->cmask.size || !dst->dirty_level_mask) && /* dst cannot be fast-cleared */ + !dst->dcc_buffer) { si_blitter_begin(ctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); util_blitter_custom_resolve_color(sctx->blitter, diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 13738da5e2c..a8ff6f27319 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -181,6 +181,11 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader, rview->resource, RADEON_USAGE_READ, r600_get_sampler_view_priority(rview->resource)); + if (rview->dcc_buffer && rview->dcc_buffer != rview->resource) + radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, + rview->dcc_buffer, RADEON_USAGE_READ, + RADEON_PRIO_DCC); + pipe_sampler_view_reference(&views->views[slot], view); memcpy(views->desc.list + slot*8, view_desc, 8*4); views->desc.enabled_mask |= 1llu << slot; @@ -229,7 +234,8 @@ static void si_set_sampler_views(struct pipe_context *ctx, } else { samplers->depth_texture_mask &= ~(1 << slot); } - if (rtex->cmask.size || rtex->fmask.size) { + if (rtex->cmask.size || rtex->fmask.size || + (rtex->dcc_buffer && rtex->dirty_level_mask)) { samplers->compressed_colortex_mask |= 1 << slot; } else { samplers->compressed_colortex_mask &= ~(1 << slot); diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 31b0b41e5a4..581e89f42d8 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -248,7 +248,8 @@ void si_dma_copy(struct pipe_context *ctx, if (src->format != dst->format || src_box->depth > 1 || (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) || rdst->cmask.size || rdst->fmask.size || - rsrc->cmask.size || rsrc->fmask.size) { + rsrc->cmask.size || rsrc->fmask.size || + rdst->dcc_buffer || rsrc->dcc_buffer) { goto fallback; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5f910c95ef3..60baad3d13c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -55,8 +55,6 @@ static void si_destroy_context(struct pipe_context *context) if (sctx->pstipple_sampler_state) sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state); - if (sctx->dummy_pixel_shader) - sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader); if (sctx->fixed_func_tcs_shader.cso) sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso); if (sctx->custom_dsa_flush) @@ -300,6 +298,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -578,6 +577,33 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen) return true; } +static bool si_init_gs_info(struct si_screen *sscreen) +{ + switch (sscreen->b.family) { + case CHIP_OLAND: + case CHIP_HAINAN: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: + case CHIP_ICELAND: + case CHIP_CARRIZO: + case CHIP_STONEY: + sscreen->gs_table_depth = 16; + return true; + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_TONGA: + case CHIP_FIJI: + sscreen->gs_table_depth = 32; + return true; + default: + return false; + } +} + struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); @@ -595,7 +621,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.b.resource_create = r600_resource_create_common; if (!r600_common_screen_init(&sscreen->b, ws) || - !si_initialize_pipe_config(sscreen)) { + !si_initialize_pipe_config(sscreen) || + !si_init_gs_info(sscreen)) { FREE(sscreen); return NULL; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index d7a2282952a..42cd8803c36 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -42,6 +42,7 @@ #define SI_BASE_VERTEX_UNKNOWN INT_MIN #define SI_RESTART_INDEX_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 +#define SI_GS_PER_ES 128 /* Instruction cache. */ #define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0) @@ -85,6 +86,7 @@ struct si_compute; struct si_screen { struct r600_common_screen b; + unsigned gs_table_depth; }; struct si_blend_color { @@ -96,6 +98,7 @@ struct si_sampler_view { struct pipe_sampler_view base; struct list_head list; struct r600_resource *resource; + struct r600_resource *dcc_buffer; /* [0..7] = image descriptor * [4..7] = buffer descriptor */ uint32_t state[8]; @@ -203,9 +206,6 @@ struct si_context { struct si_pm4_state *init_config; bool init_config_has_vgt_flush; struct si_pm4_state *vgt_shader_config[4]; - /* With rasterizer discard, there doesn't have to be a pixel shader. - * In that case, we bind this one: */ - void *dummy_pixel_shader; /* shaders */ struct si_shader_ctx_state ps_shader; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 243bdc6e6d7..18b64056bc7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -266,6 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at * Reproducible with Unigine Heaven 4.0 and drirc missing. */ if (blend->dual_src_blend && + sctx->ps_shader.cso && (sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3) mask = 0; @@ -697,6 +698,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, rs->clamp_fragment_color = state->clamp_fragment_color; rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; + rs->rasterizer_discard = state->rasterizer_discard; rs->pa_sc_line_stipple = state->line_stipple_enable ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; @@ -1924,8 +1926,21 @@ static void si_initialize_color_surface(struct si_context *sctx, surf->cb_color_info = color_info; surf->cb_color_attrib = color_attrib; - if (sctx->b.chip_class >= VI) - surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1); + if (sctx->b.chip_class >= VI && rtex->dcc_buffer) { + unsigned max_uncompressed_block_size = 2; + uint64_t dcc_offset = rtex->surface.level[level].dcc_offset; + + if (rtex->surface.nsamples > 1) { + if (rtex->surface.bpe == 1) + max_uncompressed_block_size = 0; + else if (rtex->surface.bpe == 2) + max_uncompressed_block_size = 1; + } + + surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | + S_028C78_INDEPENDENT_64B_BLOCKS(1); + surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) >> 8; + } if (rtex->fmask.size) { surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; @@ -2249,6 +2264,12 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom RADEON_PRIO_CMASK); } + if (tex->dcc_buffer && tex->dcc_buffer != &tex->resource) { + radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, + tex->dcc_buffer, RADEON_USAGE_READWRITE, + RADEON_PRIO_DCC); + } + radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, sctx->b.chip_class >= VI ? 14 : 13); radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ @@ -2266,7 +2287,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ if (sctx->b.chip_class >= VI) - radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */ + radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */ } /* set CB_COLOR1_INFO for possible dual-src blending */ if (i == 1 && state->cbufs[0] && @@ -2633,8 +2654,18 @@ si_create_sampler_view_custom(struct pipe_context *ctx, view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | S_008F24_LAST_ARRAY(last_layer)); - view->state[6] = 0; - view->state[7] = 0; + + if (tmp->dcc_buffer) { + uint64_t dcc_offset = surflevel[base_level].dcc_offset; + unsigned swap = r600_translate_colorswap(pipe_format); + + view->state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1); + view->state[7] = (tmp->dcc_buffer->gpu_address + dcc_offset) >> 8; + view->dcc_buffer = tmp->dcc_buffer; + } else { + view->state[6] = 0; + view->state[7] = 0; + } /* Initialize the sampler view for FMASK. */ if (tmp->fmask.size) { @@ -3262,7 +3293,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ - si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80); + si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); @@ -3336,6 +3367,7 @@ static void si_init_config(struct si_context *sctx) break; case CHIP_KABINI: case CHIP_MULLINS: + case CHIP_STONEY: raster_config = 0x00000000; raster_config_1 = 0x00000000; break; @@ -3406,7 +3438,8 @@ static void si_init_config(struct si_context *sctx) if (sctx->b.chip_class >= VI) { si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, - S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1)); + S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | + S_028424_OVERWRITE_COMBINER_WATERMARK(4)); si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index fba6619d2fd..8b9a311cd3f 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -61,6 +61,7 @@ struct si_state_rasterizer { bool poly_smooth; bool uses_poly_offset; bool clamp_fragment_color; + bool rasterizer_discard; }; struct si_dsa_stencil_ref_part { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index ce6c98c3124..cf0891a2ab7 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -223,6 +223,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned prim = info->mode; unsigned primgroup_size = 128; /* recommended without a GS */ + unsigned max_primgroup_in_wave = 2; /* SWITCH_ON_EOP(0) is always preferable. */ bool wd_switch_on_eop = false; @@ -246,13 +247,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, /* primgroup_size must be set to a multiple of NUM_PATCHES */ primgroup_size = (primgroup_size / num_patches) * num_patches; - /* SWITCH_ON_EOI must be set if PrimID is used. - * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ + /* SWITCH_ON_EOI must be set if PrimID is used. */ if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) || - sctx->tes_shader.cso->info.uses_primid) { + sctx->tes_shader.cso->info.uses_primid) ia_switch_on_eoi = true; - partial_es_wave = true; - } /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ if ((sctx->b.family == CHIP_TAHITI || @@ -269,10 +267,6 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, wd_switch_on_eop = true; } - if (sctx->b.streamout.streamout_enabled || - sctx->b.streamout.prims_gen_query_enabled) - partial_vs_wave = true; - if (sctx->b.chip_class >= CIK) { /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. @@ -282,7 +276,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, prim == PIPE_PRIM_LINE_LOOP || prim == PIPE_PRIM_TRIANGLE_FAN || prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY || - info->primitive_restart) + info->primitive_restart || + info->count_from_stream_output) wd_switch_on_eop = true; /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. @@ -292,14 +287,34 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, (info->indirect || info->instance_count > 1)) wd_switch_on_eop = true; - /* USE_OPAQUE doesn't work when WD_SWITCH_ON_EOP is 0. */ - if (info->count_from_stream_output) - wd_switch_on_eop = true; + /* Required on CIK and later. */ + if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop) + ia_switch_on_eoi = true; + + /* Required by Hawaii and, for some special cases, by VI. */ + if (ia_switch_on_eoi && + (sctx->b.family == CHIP_HAWAII || + (sctx->b.chip_class == VI && + (sctx->gs_shader.cso || max_primgroup_in_wave != 2)))) + partial_vs_wave = true; + + /* Instancing bug on Bonaire. */ + if (sctx->b.family == CHIP_BONAIRE && ia_switch_on_eoi && + (info->indirect || info->instance_count > 1)) + partial_vs_wave = true; /* If the WD switch is false, the IA switch must be false too. */ assert(wd_switch_on_eop || !ia_switch_on_eop); } + /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ + if (ia_switch_on_eoi) + partial_es_wave = true; + + /* GS requirement. */ + if (SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3) + partial_es_wave = true; + /* Hw bug with single-primitive instances and SWITCH_ON_EOI * on multi-SE chips. */ if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi && @@ -308,18 +323,14 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, u_prims_for_vertices(info->mode, info->count) <= 1))) sctx->b.flags |= SI_CONTEXT_VGT_FLUSH; - /* Instancing bug on 2 SE chips. */ - if (sctx->b.screen->info.max_se == 2 && ia_switch_on_eoi && - (info->indirect || info->instance_count > 1)) - partial_vs_wave = true; - return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) | S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) | - S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0); + S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? + max_primgroup_in_wave : 0); } static unsigned si_get_ls_hs_config(struct si_context *sctx, @@ -636,6 +647,17 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom) S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1); + + /* Necessary for DCC */ + if (sctx->chip_class >= VI) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0) | compute); + radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) | + EVENT_INDEX(5)); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + } } if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) { cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | @@ -728,6 +750,7 @@ static void si_get_draw_start_count(struct si_context *sctx, void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct si_context *sctx = (struct si_context *)ctx; + struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct pipe_index_buffer ib = {}; unsigned mask; @@ -735,7 +758,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) (info->indexed || !info->count_from_stream_output)) return; - if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) { + if (!sctx->vs_shader.cso) { + assert(0); + return; + } + if (!sctx->ps_shader.cso && (!rs || !rs->rasterizer_discard)) { assert(0); return; } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index eea00e0fafc..4a3a04caa52 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -799,11 +799,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - if (sctx->vs_shader.cso == sel || !sel) + if (sctx->vs_shader.cso == sel) return; sctx->vs_shader.cso = sel; - sctx->vs_shader.current = sel->first_variant; + sctx->vs_shader.current = sel ? sel->first_variant : NULL; si_mark_atom_dirty(sctx, &sctx->clip_regs); si_update_viewports_and_scissors(sctx); } @@ -864,16 +864,6 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) si_update_viewports_and_scissors(sctx); } -static void si_make_dummy_ps(struct si_context *sctx) -{ - if (!sctx->dummy_pixel_shader) { - sctx->dummy_pixel_shader = - util_make_fragment_cloneinput_shader(&sctx->b.b, 0, - TGSI_SEMANTIC_GENERIC, - TGSI_INTERPOLATE_CONSTANT); - } -} - static void si_bind_ps_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; @@ -883,14 +873,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) if (sctx->ps_shader.cso == sel) return; - /* use a dummy shader if binding a NULL shader */ - if (!sel) { - si_make_dummy_ps(sctx); - sel = sctx->dummy_pixel_shader; - } - sctx->ps_shader.cso = sel; - sctx->ps_shader.current = sel->first_variant; + sctx->ps_shader.current = sel ? sel->first_variant : NULL; si_mark_atom_dirty(sctx, &sctx->cb_target_mask); } @@ -956,13 +940,15 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; struct si_shader *ps = sctx->ps_shader.current; struct si_shader *vs = si_get_vs_state(sctx); - struct tgsi_shader_info *psinfo = &ps->selector->info; + struct tgsi_shader_info *psinfo; struct tgsi_shader_info *vsinfo = &vs->selector->info; unsigned i, j, tmp, num_written = 0; - if (!ps->nparam) + if (!ps || !ps->nparam) return; + psinfo = &ps->selector->info; + radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam); for (i = 0; i < psinfo->num_inputs; i++) { @@ -1025,7 +1011,12 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; struct si_shader *ps = sctx->ps_shader.current; - unsigned input_ena = ps->spi_ps_input_ena; + unsigned input_ena; + + if (!ps) + return; + + input_ena = ps->spi_ps_input_ena; /* we need to enable at least one of them, otherwise we hang the GPU */ assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) || @@ -1531,23 +1522,38 @@ bool si_update_shaders(struct si_context *sctx) si_update_vgt_shader_config(sctx); - r = si_shader_select(ctx, &sctx->ps_shader); - if (r) - return false; - si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); - - if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || - sctx->sprite_coord_enable != rs->sprite_coord_enable || - sctx->flatshade != rs->flatshade) { - sctx->sprite_coord_enable = rs->sprite_coord_enable; - sctx->flatshade = rs->flatshade; - si_mark_atom_dirty(sctx, &sctx->spi_map); - } + if (sctx->ps_shader.cso) { + r = si_shader_select(ctx, &sctx->ps_shader); + if (r) + return false; + si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); + + if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || + sctx->sprite_coord_enable != rs->sprite_coord_enable || + sctx->flatshade != rs->flatshade) { + sctx->sprite_coord_enable = rs->sprite_coord_enable; + sctx->flatshade = rs->flatshade; + si_mark_atom_dirty(sctx, &sctx->spi_map); + } + + if (si_pm4_state_changed(sctx, ps) || + sctx->force_persample_interp != rs->force_persample_interp) { + sctx->force_persample_interp = rs->force_persample_interp; + si_mark_atom_dirty(sctx, &sctx->spi_ps_input); + } + + if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) { + sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control; + si_mark_atom_dirty(sctx, &sctx->db_render_state); + } + + if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) { + sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing; + si_mark_atom_dirty(sctx, &sctx->msaa_config); - if (si_pm4_state_changed(sctx, ps) || - sctx->force_persample_interp != rs->force_persample_interp) { - sctx->force_persample_interp = rs->force_persample_interp; - si_mark_atom_dirty(sctx, &sctx->spi_ps_input); + if (sctx->b.chip_class == SI) + si_mark_atom_dirty(sctx, &sctx->db_render_state); + } } if (si_pm4_state_changed(sctx, ls) || @@ -1559,19 +1565,6 @@ bool si_update_shaders(struct si_context *sctx) if (!si_update_spi_tmpring_size(sctx)) return false; } - - if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) { - sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control; - si_mark_atom_dirty(sctx, &sctx->db_render_state); - } - - if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) { - sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing; - si_mark_atom_dirty(sctx, &sctx->msaa_config); - - if (sctx->b.chip_class == SI) - si_mark_atom_dirty(sctx, &sctx->db_render_state); - } return true; } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index e7006d2fa0d..c0fc82b2f2c 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -249,6 +249,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index d7a3360713f..23ec4ef3cb6 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -214,10 +214,10 @@ prepare_shader_sampling( row_stride[j] = sp_tex->stride[j]; img_stride[j] = sp_tex->img_stride[j]; } - if (view->target == PIPE_TEXTURE_1D_ARRAY || - view->target == PIPE_TEXTURE_2D_ARRAY || - view->target == PIPE_TEXTURE_CUBE || - view->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (tex->target == PIPE_TEXTURE_1D_ARRAY || + tex->target == PIPE_TEXTURE_2D_ARRAY || + tex->target == PIPE_TEXTURE_CUBE || + tex->target == PIPE_TEXTURE_CUBE_ARRAY) { num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1; for (j = first_level; j <= last_level; j++) { mip_offsets[j] += view->u.tex.first_layer * diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 8a0935062b6..e3e28a3ef32 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1033,6 +1033,7 @@ img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; + addr.bits.z = sp_sview->base.u.tex.first_layer; /* Can we fetch all four at once: */ @@ -1081,6 +1082,7 @@ img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; + addr.bits.z = sp_sview->base.u.tex.first_layer; out = get_texel_2d_no_border(sp_sview, addr, x0, y0); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1111,6 +1113,7 @@ img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; + addr.bits.z = sp_sview->base.u.tex.first_layer; x0 = util_ifloor(u); if (x0 < 0) @@ -1154,7 +1157,8 @@ img_filter_1d_nearest(const struct sp_sampler_view *sp_sview, sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); - out = get_texel_2d(sp_sview, sp_samp, addr, x, 0); + out = get_texel_1d_array(sp_sview, sp_samp, addr, x, + sp_sview->base.u.tex.first_layer); for (c = 0; c < TGSI_QUAD_SIZE; c++) rgba[TGSI_NUM_CHANNELS*c] = out[c]; @@ -1215,6 +1219,7 @@ img_filter_2d_nearest(const struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; + addr.bits.z = sp_sview->base.u.tex.first_layer; sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y); @@ -1396,8 +1401,10 @@ img_filter_1d_linear(const struct sp_sampler_view *sp_sview, sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); - tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0); - tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0); + tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, + sp_sview->base.u.tex.first_layer); + tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, + sp_sview->base.u.tex.first_layer); /* interpolate R, G, B, A */ for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1523,6 +1530,7 @@ img_filter_2d_linear(const struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; + addr.bits.z = sp_sview->base.u.tex.first_layer; sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw); @@ -3252,10 +3260,22 @@ sp_get_texels(const struct sp_sampler_view *sp_sview, switch (sp_sview->base.target) { case PIPE_BUFFER: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + const int x = CLAMP(v_i[j] + offset[0] + + sp_sview->base.u.buf.first_element, + sp_sview->base.u.buf.first_element, + sp_sview->base.u.buf.last_element); + tx = get_texel_2d_no_border(sp_sview, addr, x, 0); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; case PIPE_TEXTURE_1D: for (j = 0; j < TGSI_QUAD_SIZE; j++) { const int x = CLAMP(v_i[j] + offset[0], 0, width - 1); - tx = get_texel_2d_no_border(sp_sview, addr, x, 0); + tx = get_texel_2d_no_border(sp_sview, addr, x, + sp_sview->base.u.tex.first_layer); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -3277,7 +3297,8 @@ sp_get_texels(const struct sp_sampler_view *sp_sview, for (j = 0; j < TGSI_QUAD_SIZE; j++) { const int x = CLAMP(v_i[j] + offset[0], 0, width - 1); const int y = CLAMP(v_j[j] + offset[1], 0, height - 1); - tx = get_texel_2d_no_border(sp_sview, addr, x, y); + tx = get_texel_3d_no_border(sp_sview, addr, x, y, + sp_sview->base.u.tex.first_layer); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -3307,6 +3328,7 @@ sp_get_texels(const struct sp_sampler_view *sp_sview, } break; case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */ + case PIPE_TEXTURE_CUBE_ARRAY: default: assert(!"Unknown or CUBE texture type in TXF processing\n"); break; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index e1ea5df24ca..3347f5f1883 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -127,7 +127,8 @@ softpipe_can_create_resource(struct pipe_screen *screen, */ static boolean softpipe_displaytarget_layout(struct pipe_screen *screen, - struct softpipe_resource *spr) + struct softpipe_resource *spr, + const void *map_front_private) { struct sw_winsys *winsys = softpipe_screen(screen)->winsys; @@ -139,6 +140,7 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, spr->base.width0, spr->base.height0, 64, + map_front_private, &spr->stride[0] ); return spr->dt != NULL; @@ -149,8 +151,9 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, * Create new pipe_resource given the template information. */ static struct pipe_resource * -softpipe_resource_create(struct pipe_screen *screen, - const struct pipe_resource *templat) +softpipe_resource_create_front(struct pipe_screen *screen, + const struct pipe_resource *templat, + const void *map_front_private) { struct softpipe_resource *spr = CALLOC_STRUCT(softpipe_resource); if (!spr) @@ -169,7 +172,7 @@ softpipe_resource_create(struct pipe_screen *screen, if (spr->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) { - if (!softpipe_displaytarget_layout(screen, spr)) + if (!softpipe_displaytarget_layout(screen, spr, map_front_private)) goto fail; } else { @@ -184,6 +187,12 @@ softpipe_resource_create(struct pipe_screen *screen, return NULL; } +static struct pipe_resource * +softpipe_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templat) +{ + return softpipe_resource_create_front(screen, templat, NULL); +} static void softpipe_resource_destroy(struct pipe_screen *pscreen, @@ -514,6 +523,7 @@ void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->resource_create = softpipe_resource_create; + screen->resource_create_front = softpipe_resource_create_front; screen->resource_destroy = softpipe_resource_destroy; screen->resource_from_handle = softpipe_resource_from_handle; screen->resource_get_handle = softpipe_resource_get_handle; diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c index d3cf52f08e2..0e1e332d6cb 100644 --- a/src/gallium/drivers/svga/svga_cmd.c +++ b/src/gallium/drivers/svga/svga_cmd.c @@ -1016,6 +1016,8 @@ SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc, *decls = declArray; *ranges = rangeArray; + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; + return PIPE_OK; } diff --git a/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/src/gallium/drivers/svga/svga_cmd_vgpu10.c index 596ba953cd2..5c121089f91 100644 --- a/src/gallium/drivers/svga/svga_cmd_vgpu10.c +++ b/src/gallium/drivers/svga/svga_cmd_vgpu10.c @@ -535,6 +535,7 @@ SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc, SVGA3D_COPY_BASIC_2(vertexCount, startVertexLocation); + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; swc->commit(swc); return PIPE_OK; } @@ -550,6 +551,7 @@ SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc, SVGA3D_COPY_BASIC_3(indexCount, startIndexLocation, baseVertexLocation); + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; swc->commit(swc); return PIPE_OK; } @@ -566,6 +568,7 @@ SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc, SVGA3D_COPY_BASIC_4(vertexCountPerInstance, instanceCount, startVertexLocation, startInstanceLocation); + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; swc->commit(swc); return PIPE_OK; } @@ -584,6 +587,8 @@ SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc, startIndexLocation, baseVertexLocation, startInstanceLocation); + + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; swc->commit(swc); return PIPE_OK; } @@ -593,6 +598,7 @@ SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc) { SVGA3D_CREATE_COMMAND(DrawAuto, DRAW_AUTO); + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; swc->commit(swc); return PIPE_OK; } diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index 5635411d938..caf4b17de16 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -32,6 +32,7 @@ #include "svga_draw.h" #include "svga_draw_private.h" #include "svga_context.h" +#include "svga_shader.h" #define DBG 0 @@ -206,6 +207,32 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, unsigned gen_prim, gen_size, gen_nr, gen_type; u_generate_func gen_func; enum pipe_error ret = PIPE_OK; + unsigned api_pv = hwtnl->api_pv; + struct svga_context *svga = hwtnl->svga; + + if (svga->curr.rast->templ.flatshade && + svga->state.hw_draw.fs->constant_color_output) { + /* The fragment color is a constant, not per-vertex so the whole + * primitive will be the same color (except for possible blending). + * We can ignore the current provoking vertex state and use whatever + * the hardware wants. + */ + api_pv = hwtnl->hw_pv; + + if (hwtnl->api_fillmode == PIPE_POLYGON_MODE_FILL) { + /* Do some simple primitive conversions to avoid index buffer + * generation below. Note that polygons and quads are not directly + * supported by the svga device. Also note, we can only do this + * for flat/constant-colored rendering because of provoking vertex. + */ + if (prim == PIPE_PRIM_POLYGON) { + prim = PIPE_PRIM_TRIANGLE_FAN; + } + else if (prim == PIPE_PRIM_QUADS && count == 4) { + prim = PIPE_PRIM_TRIANGLE_FAN; + } + } + } if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && prim >= PIPE_PRIM_TRIANGLES) { @@ -226,7 +253,7 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, prim, start, count, - hwtnl->api_pv, + api_pv, hwtnl->hw_pv, &gen_prim, &gen_size, &gen_nr, &gen_func); } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index f6fafca5c0b..5aa7b0d86eb 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -382,6 +382,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; } diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h index efcac408626..f49fdb46d0e 100644 --- a/src/gallium/drivers/svga/svga_shader.h +++ b/src/gallium/drivers/svga/svga_shader.h @@ -155,6 +155,9 @@ struct svga_shader_variant * applied to any of the varyings. */ + /** Is the color output just a constant value? (fragment shader only) */ + boolean constant_color_output; + /** For FS-based polygon stipple */ unsigned pstipple_sampler_unit; diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index 202eee276b7..4c16f4313a0 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -240,6 +240,13 @@ svga_tgsi_vgpu9_translate(struct svga_context *svga, variant->pstipple_sampler_unit = emit.pstipple_sampler_unit; + /* If there was exactly one write to a fragment shader output register + * and it came from a constant buffer, we know all fragments will have + * the same color (except for blending). + */ + variant->constant_color_output = + emit.constant_color_output && emit.num_output_writes == 1; + #if 0 if (!svga_shader_verify(variant->tokens, variant->nr_tokens) || SVGA_DEBUG & DEBUG_TGSI) { diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index 0b82483ab2e..83f0c8bd4d0 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -84,6 +84,9 @@ struct svga_shader_emitter int dynamic_branching_level; + unsigned num_output_writes; + boolean constant_color_output; + boolean in_main_func; boolean created_common_immediate; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 00c91a4fa61..dbb90f7654e 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -99,6 +99,7 @@ translate_dst_register( struct svga_shader_emitter *emit, * Need to lookup a table built at decl time: */ dest = emit->output_map[reg->Register.Index]; + emit->num_output_writes++; break; default: @@ -2103,6 +2104,29 @@ emit_simple_instruction(struct svga_shader_emitter *emit, /** + * TGSI_OPCODE_MOVE is only special-cased here to detect the + * svga_fragment_shader::constant_color_output case. + */ +static boolean +emit_mov(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + const struct tgsi_full_src_register *src = &insn->Src[0]; + const struct tgsi_full_dst_register *dst = &insn->Dst[0]; + + if (emit->unit == PIPE_SHADER_FRAGMENT && + dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == 0 && + src->Register.File == TGSI_FILE_CONSTANT && + !src->Register.Indirect) { + emit->constant_color_output = TRUE; + } + + return emit_simple_instruction(emit, SVGA3DOP_MOV, insn); +} + + +/** * Translate/emit TGSI DDX, DDY instructions. */ static boolean @@ -3045,6 +3069,9 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_SSG: return emit_ssg( emit, insn ); + case TGSI_OPCODE_MOV: + return emit_mov( emit, insn ); + default: { unsigned opcode = translate_opcode(insn->Instruction.Opcode); diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index d62f2bbcc96..e70ee689c59 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -202,6 +202,9 @@ struct svga_shader_emitter_v10 /* user clip plane constant slot indexes */ unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; + unsigned num_output_writes; + boolean constant_color_output; + boolean uses_flat_interp; /* For all shaders: const reg index for RECT coord scaling */ @@ -913,6 +916,8 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, */ assert(sem_name == TGSI_SEMANTIC_COLOR); index = emit->info.output_semantic_index[index]; + + emit->num_output_writes++; } } } @@ -3097,7 +3102,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) unsigned i; unsigned clip_plane_enable = emit->key.clip_plane_enable; unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; - unsigned num_written_clipdist = emit->info.num_written_clipdistance; + int num_written_clipdist = emit->info.num_written_clipdistance; assert(emit->clip_dist_out_index != INVALID_INDEX); assert(emit->clip_dist_tmp_index != INVALID_INDEX); @@ -3109,7 +3114,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) */ emit->clip_dist_tmp_index = INVALID_INDEX; - for (i = 0; i < 2 && num_written_clipdist; i++, num_written_clipdist-=4) { + for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); @@ -5573,6 +5578,29 @@ emit_simple(struct svga_shader_emitter_v10 *emit, /** + * We only special case the MOV instruction to try to detect constant + * color writes in the fragment shader. + */ +static boolean +emit_mov(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const struct tgsi_full_src_register *src = &inst->Src[0]; + const struct tgsi_full_dst_register *dst = &inst->Dst[0]; + + if (emit->unit == PIPE_SHADER_FRAGMENT && + dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == 0 && + src->Register.File == TGSI_FILE_CONSTANT && + !src->Register.Indirect) { + emit->constant_color_output = TRUE; + } + + return emit_simple(emit, inst); +} + + +/** * Emit a simple VGPU10 instruction which writes to multiple dest registers, * where TGSI only uses one dest register. */ @@ -5652,7 +5680,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_MAD: case TGSI_OPCODE_MAX: case TGSI_OPCODE_MIN: - case TGSI_OPCODE_MOV: case TGSI_OPCODE_MUL: case TGSI_OPCODE_NOP: case TGSI_OPCODE_NOT: @@ -5677,7 +5704,8 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, /* simple instructions */ return emit_simple(emit, inst); - + case TGSI_OPCODE_MOV: + return emit_mov(emit, inst); case TGSI_OPCODE_EMIT: return emit_vertex(emit, inst); case TGSI_OPCODE_ENDPRIM: @@ -6762,6 +6790,13 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; + /* If there was exactly one write to a fragment shader output register + * and it came from a constant buffer, we know all fragments will have + * the same color (except for blending). + */ + variant->constant_color_output = + emit->constant_color_output && emit->num_output_writes == 1; + /** keep track in the variant if flat interpolation is used * for any of the varyings. */ diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index c750603989f..3129e46ed06 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -85,6 +85,8 @@ struct winsys_handle; #define SVGA_QUERY_FLAG_SET (1 << 0) #define SVGA_QUERY_FLAG_REF (1 << 1) +#define SVGA_HINT_FLAG_DRAW_EMITTED (1 << 0) + /** Opaque surface handle */ struct svga_winsys_surface; @@ -213,6 +215,11 @@ struct svga_winsys_context uint32 cid; /** + * Flags to hint the current context state + */ + uint32 hints; + + /** ** BEGIN new functions for guest-backed surfaces. **/ diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c index 6d748010baf..476d2b5b0b1 100644 --- a/src/gallium/drivers/vc4/vc4_cl_dump.c +++ b/src/gallium/drivers/vc4/vc4_cl_dump.c @@ -22,6 +22,7 @@ */ #include "util/u_math.h" +#include "util/u_prim.h" #include "util/macros.h" #include "vc4_context.h" @@ -163,6 +164,26 @@ dump_VC4_PACKET_LOAD_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_ } static void +dump_VC4_PACKET_GL_INDEXED_PRIMITIVE(void *cl, uint32_t offset, uint32_t hw_offset) +{ + uint8_t *b = cl + offset; + uint32_t *count = cl + offset + 1; + uint32_t *ib_offset = cl + offset + 5; + uint32_t *max_index = cl + offset + 9; + + fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s\n", + offset, hw_offset, + b[0], (b[0] & VC4_INDEX_BUFFER_U16) ? "16-bit" : "8-bit", + u_prim_name(b[0] & 0x7)); + fprintf(stderr, "0x%08x 0x%08x: %d verts\n", + offset + 1, hw_offset + 1, *count); + fprintf(stderr, "0x%08x 0x%08x: 0x%08x IB offset\n", + offset + 5, hw_offset + 5, *ib_offset); + fprintf(stderr, "0x%08x 0x%08x: 0x%08x max index\n", + offset + 9, hw_offset + 9, *max_index); +} + +static void dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset) { uint32_t *bits = cl + offset; @@ -262,14 +283,14 @@ dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t h shorts[1]); const char *format = "???"; - switch ((bytes[0] >> 2) & 3) { - case 0: + switch (VC4_GET_FIELD(shorts[2], VC4_RENDER_CONFIG_FORMAT)) { + case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED: format = "BGR565_DITHERED"; break; - case 1: + case VC4_RENDER_CONFIG_FORMAT_RGBA8888: format = "RGBA8888"; break; - case 2: + case VC4_RENDER_CONFIG_FORMAT_BGR565: format = "BGR565"; break; } @@ -277,29 +298,31 @@ dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t h format = "64bit"; const char *tiling = "???"; - switch ((bytes[0] >> 6) & 3) { - case 0: + switch (VC4_GET_FIELD(shorts[2], VC4_RENDER_CONFIG_MEMORY_FORMAT)) { + case VC4_TILING_FORMAT_LINEAR: tiling = "linear"; break; - case 1: + case VC4_TILING_FORMAT_T: tiling = "T"; break; - case 2: + case VC4_TILING_FORMAT_LT: tiling = "LT"; break; } - fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s %s\n", + fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s %s %s\n", offset + 8, hw_offset + 8, bytes[0], format, tiling, - (bytes[0] & VC4_RENDER_CONFIG_MS_MODE_4X) ? "ms" : "ss"); + (shorts[2] & VC4_RENDER_CONFIG_MS_MODE_4X) ? "ms" : "ss", + (shorts[2] & VC4_RENDER_CONFIG_DECIMATE_MODE_4X) ? + "ms_decimate" : "ss_decimate"); const char *earlyz = ""; - if (bytes[1] & (1 << 3)) { + if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE) { earlyz = "early_z disabled"; } else { - if (bytes[1] & (1 << 2)) + if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G) earlyz = "early_z >"; else earlyz = "early_z <"; @@ -356,7 +379,7 @@ static const struct packet_info { PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL), PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL), - PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE), + PACKET_DUMP(VC4_PACKET_GL_INDEXED_PRIMITIVE), PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE), PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE), diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index c7698422951..86f2ce5e608 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -250,10 +250,10 @@ struct vc4_context { bool needs_flush; /** - * Set when needs_flush, and the queued rendering is not just composed - * of full-buffer clears. + * Number of draw calls (not counting full buffer clears) queued in + * the current job. */ - bool draw_call_queued; + uint32_t draw_calls_queued; /** Maximum index buffer valid for the current shader_rec. */ uint32_t max_index; @@ -291,7 +291,10 @@ struct vc4_context { struct vc4_vertex_stateobj *vtx; - struct pipe_blend_color blend_color; + struct { + struct pipe_blend_color f; + uint8_t ub[4]; + } blend_color; struct pipe_stencil_ref stencil_ref; unsigned sample_mask; struct pipe_framebuffer_state framebuffer; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index a4e5e092b1a..624a236c573 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -25,6 +25,7 @@ #include "util/u_prim.h" #include "util/u_format.h" #include "util/u_pack_color.h" +#include "util/u_upload_mgr.h" #include "indices/u_primconvert.h" #include "vc4_context.h" @@ -100,7 +101,7 @@ vc4_start_draw(struct vc4_context *vc4) VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES)); vc4->needs_flush = true; - vc4->draw_call_queued = true; + vc4->draw_calls_queued++; vc4->draw_width = width; vc4->draw_height = height; @@ -226,6 +227,38 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i vc4->max_index = max_index; } +/** + * HW-2116 workaround: Flush the batch before triggering the hardware state + * counter wraparound behavior. + * + * State updates are tracked by a global counter which increments at the first + * state update after a draw or a START_BINNING. Tiles can then have their + * state updated at draw time with a set of cheap checks for whether the + * state's copy of the global counter matches the global counter the last time + * that state was written to the tile. + * + * The state counters are relatively small and wrap around quickly, so you + * could get false negatives for needing to update a particular state in the + * tile. To avoid this, the hardware attempts to write all of the state in + * the tile at wraparound time. This apparently is broken, so we just flush + * everything before that behavior is triggered. A batch flush is sufficient + * to get our current contents drawn and reset the counters to 0. + * + * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the + * tiles with VC4_PACKET_RETURN_FROM_LIST. + */ +static void +vc4_hw_2116_workaround(struct pipe_context *pctx) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + if (vc4->draw_calls_queued == 0x1ef0) { + perf_debug("Flushing batch due to HW-2116 workaround " + "(too many draw calls per scene\n"); + vc4_flush(pctx); + } +} + static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { @@ -244,6 +277,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) vc4_update_shadow_textures(pctx, &vc4->verttex); vc4_update_shadow_textures(pctx, &vc4->fragtex); + vc4_hw_2116_workaround(pctx); + vc4_get_draw_cl_space(vc4); if (vc4->prim_mode != info->mode) { @@ -285,7 +320,15 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) info->count, &offset); index_size = 2; } else { - prsc = vc4->indexbuf.buffer; + if (vc4->indexbuf.user_buffer) { + prsc = NULL; + u_upload_data(vc4->uploader, 0, + info->count * index_size, + vc4->indexbuf.user_buffer, + &offset, &prsc); + } else { + prsc = vc4->indexbuf.buffer; + } } struct vc4_resource *rsc = vc4_resource(prsc); @@ -300,7 +343,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset); cl_u32(&bcl, vc4->max_index); - if (vc4->indexbuf.index_size == 4) + if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer) pipe_resource_reference(&prsc, NULL); } else { cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); @@ -343,8 +386,8 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers, /* We can't flag new buffers for clearing once we've queued draws. We * could avoid this by using the 3d engine to clear. */ - if (vc4->draw_call_queued) { - perf_debug("Flushing rendering to process new clear."); + if (vc4->draw_calls_queued) { + perf_debug("Flushing rendering to process new clear.\n"); vc4_flush(pctx); } diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index 7ebd9f160eb..9ad79c2ea10 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -55,7 +55,7 @@ vc4_job_reset(struct vc4_context *vc4) vc4->shader_rec_count = 0; vc4->needs_flush = false; - vc4->draw_call_queued = false; + vc4->draw_calls_queued = 0; /* We have no hardware context saved between our draw calls, so we * need to flag the next draw as needing all state emitted. Emitting diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index 17b524653bb..373c9e12d11 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -86,11 +86,11 @@ vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear) } static nir_ssa_def * -vc4_blend_channel(nir_builder *b, - nir_ssa_def **src, - nir_ssa_def **dst, - unsigned factor, - int channel) +vc4_blend_channel_f(nir_builder *b, + nir_ssa_def **src, + nir_ssa_def **dst, + unsigned factor, + int channel) { switch(factor) { case PIPE_BLENDFACTOR_ONE: @@ -146,8 +146,75 @@ vc4_blend_channel(nir_builder *b, } static nir_ssa_def * -vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, - unsigned func) +vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1, + int chan) +{ + unsigned chan_mask = 0xff << (chan * 8); + return nir_ior(b, + nir_iand(b, src0, nir_imm_int(b, ~chan_mask)), + nir_iand(b, src1, nir_imm_int(b, chan_mask))); +} + +static nir_ssa_def * +vc4_blend_channel_i(nir_builder *b, + nir_ssa_def *src, + nir_ssa_def *dst, + nir_ssa_def *src_a, + nir_ssa_def *dst_a, + unsigned factor, + int a_chan) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + return nir_imm_int(b, ~0); + case PIPE_BLENDFACTOR_SRC_COLOR: + return src; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return src_a; + case PIPE_BLENDFACTOR_DST_ALPHA: + return dst_a; + case PIPE_BLENDFACTOR_DST_COLOR: + return dst; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return vc4_nir_set_packed_chan(b, + nir_umin_4x8(b, + src_a, + nir_inot(b, dst_a)), + nir_imm_int(b, ~0), + a_chan); + case PIPE_BLENDFACTOR_CONST_COLOR: + return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA); + case PIPE_BLENDFACTOR_CONST_ALPHA: + return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA); + case PIPE_BLENDFACTOR_ZERO: + return nir_imm_int(b, 0); + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return nir_inot(b, src); + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return nir_inot(b, src_a); + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return nir_inot(b, dst_a); + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return nir_inot(b, dst); + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA)); + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA)); + + default: + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* Unsupported. */ + fprintf(stderr, "Unknown blend factor %d\n", factor); + return nir_imm_int(b, ~0); + } +} + +static nir_ssa_def * +vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, + unsigned func) { switch (func) { case PIPE_BLEND_ADD: @@ -169,9 +236,33 @@ vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, } } +static nir_ssa_def * +vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, + unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return nir_usadd_4x8(b, src, dst); + case PIPE_BLEND_SUBTRACT: + return nir_ussub_4x8(b, src, dst); + case PIPE_BLEND_REVERSE_SUBTRACT: + return nir_ussub_4x8(b, dst, src); + case PIPE_BLEND_MIN: + return nir_umin_4x8(b, src, dst); + case PIPE_BLEND_MAX: + return nir_umax_4x8(b, src, dst); + + default: + /* Unsupported. */ + fprintf(stderr, "Unknown blend func %d\n", func); + return src; + + } +} + static void -vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, - nir_ssa_def **src_color, nir_ssa_def **dst_color) +vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, + nir_ssa_def **src_color, nir_ssa_def **dst_color) { struct pipe_rt_blend_state *blend = &c->fs_key->blend; @@ -192,20 +283,106 @@ vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, int dst_factor = ((i != 3) ? blend->rgb_dst_factor : blend->alpha_dst_factor); src_blend[i] = nir_fmul(b, src_color[i], - vc4_blend_channel(b, - src_color, dst_color, - src_factor, i)); + vc4_blend_channel_f(b, + src_color, dst_color, + src_factor, i)); dst_blend[i] = nir_fmul(b, dst_color[i], - vc4_blend_channel(b, - src_color, dst_color, - dst_factor, i)); + vc4_blend_channel_f(b, + src_color, dst_color, + dst_factor, i)); } for (int i = 0; i < 4; i++) { - result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i], - ((i != 3) ? blend->rgb_func : - blend->alpha_func)); + result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i], + ((i != 3) ? blend->rgb_func : + blend->alpha_func)); + } +} + +static nir_ssa_def * +vc4_nir_splat(nir_builder *b, nir_ssa_def *src) +{ + nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8))); + return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16))); +} + +static nir_ssa_def * +vc4_do_blending_i(struct vc4_compile *c, nir_builder *b, + nir_ssa_def *src_color, nir_ssa_def *dst_color, + nir_ssa_def *src_float_a) +{ + struct pipe_rt_blend_state *blend = &c->fs_key->blend; + + if (!blend->blend_enable) + return src_color; + + enum pipe_format color_format = c->fs_key->color_format; + const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); + nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff); + nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a); + nir_ssa_def *dst_a; + int alpha_chan; + for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) { + if (format_swiz[alpha_chan] == 3) + break; + } + if (alpha_chan != 4) { + nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8); + dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color, + shift), imm_0xff)); + } else { + dst_a = nir_imm_int(b, ~0); + } + + nir_ssa_def *src_factor = vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->rgb_src_factor, + alpha_chan); + nir_ssa_def *dst_factor = vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->rgb_dst_factor, + alpha_chan); + + if (alpha_chan != 4 && + blend->alpha_src_factor != blend->rgb_src_factor) { + nir_ssa_def *src_alpha_factor = + vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->alpha_src_factor, + alpha_chan); + src_factor = vc4_nir_set_packed_chan(b, src_factor, + src_alpha_factor, + alpha_chan); + } + if (alpha_chan != 4 && + blend->alpha_dst_factor != blend->rgb_dst_factor) { + nir_ssa_def *dst_alpha_factor = + vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->alpha_dst_factor, + alpha_chan); + dst_factor = vc4_nir_set_packed_chan(b, dst_factor, + dst_alpha_factor, + alpha_chan); + } + nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor); + nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor); + + nir_ssa_def *result = + vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func); + if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) { + nir_ssa_def *result_a = vc4_blend_func_i(b, + src_blend, + dst_blend, + blend->alpha_func); + result = vc4_nir_set_packed_chan(b, result, result_a, + alpha_chan); } + return result; } static nir_ssa_def * @@ -299,12 +476,33 @@ vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b, nir_builder_instr_insert(b, &discard->instr); } +static nir_ssa_def * +vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b, + nir_ssa_def **colors) +{ + enum pipe_format color_format = c->fs_key->color_format; + const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); + + nir_ssa_def *swizzled[4]; + for (int i = 0; i < 4; i++) { + swizzled[i] = vc4_nir_get_swizzled_channel(b, colors, + format_swiz[i]); + } + + return nir_pack_unorm_4x8(b, + nir_vec4(b, + swizzled[0], swizzled[1], + swizzled[2], swizzled[3])); + +} + static void vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { enum pipe_format color_format = c->fs_key->color_format; const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); + bool srgb = util_format_is_srgb(color_format); /* Pull out the float src/dst color components. */ nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b); @@ -315,45 +513,39 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false); } - /* Unswizzle the destination color. */ - nir_ssa_def *dst_color[4]; - for (unsigned i = 0; i < 4; i++) { - dst_color[i] = vc4_nir_get_swizzled_channel(b, - unpacked_dst_color, - format_swiz[i]); - } - vc4_nir_emit_alpha_test_discard(c, b, src_color[3]); - /* Turn dst color to linear. */ - if (util_format_is_srgb(color_format)) { + nir_ssa_def *packed_color; + if (srgb) { + /* Unswizzle the destination color. */ + nir_ssa_def *dst_color[4]; + for (unsigned i = 0; i < 4; i++) { + dst_color[i] = vc4_nir_get_swizzled_channel(b, + unpacked_dst_color, + format_swiz[i]); + } + + /* Turn dst color to linear. */ for (int i = 0; i < 3; i++) dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]); - } - nir_ssa_def *blend_color[4]; - vc4_do_blending(c, b, blend_color, src_color, dst_color); + nir_ssa_def *blend_color[4]; + vc4_do_blending_f(c, b, blend_color, src_color, dst_color); - /* sRGB encode the output color */ - if (util_format_is_srgb(color_format)) { + /* sRGB encode the output color */ for (int i = 0; i < 3; i++) blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]); - } - nir_ssa_def *swizzled_outputs[4]; - for (int i = 0; i < 4; i++) { - swizzled_outputs[i] = - vc4_nir_get_swizzled_channel(b, blend_color, - format_swiz[i]); - } + packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color); + } else { + nir_ssa_def *packed_src_color = + vc4_nir_swizzle_and_pack(c, b, src_color); - nir_ssa_def *packed_color = - nir_pack_unorm_4x8(b, - nir_vec4(b, - swizzled_outputs[0], - swizzled_outputs[1], - swizzled_outputs[2], - swizzled_outputs[3])); + packed_color = + vc4_do_blending_i(c, b, + packed_src_color, packed_dst_color, + src_color[3]); + } packed_color = vc4_logicop(b, c->fs_key->logicop_func, packed_color, packed_dst_color); diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index caf706aa2a6..7ea263afb68 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -406,6 +406,7 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b, case nir_intrinsic_load_uniform: case nir_intrinsic_load_uniform_indirect: + case nir_intrinsic_load_user_clip_plane: vc4_nir_lower_uniform(c, b, intr); break; diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index 5b435832b92..f1bab810eff 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -64,6 +64,7 @@ is_constant_value(struct vc4_compile *c, struct qreg reg, uint32_t val) { if (reg.file == QFILE_UNIF && + !reg.pack && c->uniform_contents[reg.index] == QUNIFORM_CONSTANT && c->uniform_data[reg.index] == val) { return true; diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c index fd2539aed95..0eee5c34e1d 100644 --- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c +++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c @@ -41,34 +41,77 @@ qir_opt_copy_propagation(struct vc4_compile *c) bool debug = false; list_for_each_entry(struct qinst, inst, &c->instructions, link) { - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { - int index = inst->src[i].index; - if (inst->src[i].file == QFILE_TEMP && - c->defs[index] && - c->defs[index]->op == QOP_MOV && - (c->defs[index]->src[0].file == QFILE_TEMP || - c->defs[index]->src[0].file == QFILE_UNIF)) { - /* If it has a pack, it shouldn't be an SSA - * def. + int nsrc = qir_get_op_nsrc(inst->op); + for (int i = 0; i < nsrc; i++) { + if (inst->src[i].file != QFILE_TEMP) + continue; + + struct qinst *mov = c->defs[inst->src[i].index]; + if (!mov || + (mov->op != QOP_MOV && + mov->op != QOP_FMOV && + mov->op != QOP_MMOV)) { + continue; + } + + if (mov->src[0].file != QFILE_TEMP && + mov->src[0].file != QFILE_UNIF) { + continue; + } + + if (mov->dst.pack) + continue; + + uint8_t unpack; + if (mov->src[0].pack) { + /* Make sure that the meaning of the unpack + * would be the same between the two + * instructions. */ - assert(!c->defs[index]->dst.pack); + if (qir_is_float_input(inst) != + qir_is_float_input(mov)) { + continue; + } - if (debug) { - fprintf(stderr, "Copy propagate: "); - qir_dump_inst(c, inst); - fprintf(stderr, "\n"); + /* There's only one unpack field, so make sure + * this instruction doesn't already use it. + */ + bool already_has_unpack = false; + for (int j = 0; j < nsrc; j++) { + if (inst->src[j].pack) + already_has_unpack = true; } + if (already_has_unpack) + continue; - inst->src[i] = c->defs[index]->src[0]; + /* A destination pack requires the PM bit to + * be set to a specific value already, which + * may be different from ours. + */ + if (inst->dst.pack) + continue; - if (debug) { - fprintf(stderr, "to: "); - qir_dump_inst(c, inst); - fprintf(stderr, "\n"); - } + unpack = mov->src[0].pack; + } else { + unpack = inst->src[i].pack; + } - progress = true; + if (debug) { + fprintf(stderr, "Copy propagate: "); + qir_dump_inst(c, inst); + fprintf(stderr, "\n"); } + + inst->src[i] = mov->src[0]; + inst->src[i].pack = unpack; + + if (debug) { + fprintf(stderr, "to: "); + qir_dump_inst(c, inst); + fprintf(stderr, "\n"); + } + + progress = true; } } return progress; diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c index 0e5480ea781..8b4d429074c 100644 --- a/src/gallium/drivers/vc4/vc4_opt_cse.c +++ b/src/gallium/drivers/vc4/vc4_opt_cse.c @@ -65,6 +65,7 @@ vc4_find_cse(struct vc4_compile *c, struct hash_table *ht, struct qinst *inst, uint32_t sf_count) { if (inst->dst.file != QFILE_TEMP || + !c->defs[inst->dst.index] || inst->op == QOP_MOV || qir_get_op_nsrc(inst->op) > 4) { return NULL; diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c index d6e98f0aebf..e61562171aa 100644 --- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c +++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c @@ -56,6 +56,7 @@ qir_opt_small_immediates(struct vc4_compile *c) struct qreg src = qir_follow_movs(c, inst->src[i]); if (src.file != QFILE_UNIF || + src.pack || c->uniform_contents[src.index] != QUNIFORM_CONSTANT) { continue; @@ -72,9 +73,6 @@ qir_opt_small_immediates(struct vc4_compile *c) continue; } - if (qir_src_needs_a_file(inst)) - continue; - uint32_t imm = c->uniform_data[src.index]; uint32_t small_imm = qpu_encode_small_immediate(imm); if (small_imm == ~0) diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c index f2cdf8f694f..73ded766db9 100644 --- a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c +++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c @@ -58,7 +58,7 @@ qir_opt_vpm_writes(struct vc4_compile *c) } for (int i = 0; i < vpm_write_count; i++) { - if (vpm_writes[i]->op != QOP_MOV || + if (!qir_is_raw_mov(vpm_writes[i]) || vpm_writes[i]->src[0].file != QFILE_TEMP) { continue; } diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 6e9ec6530c6..a48dad804e2 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -738,6 +738,20 @@ ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr) vec4 = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); } + /* If the pack is replicating the same channel 4 times, use the 8888 + * pack flag. This is common for blending using the alpha + * channel. + */ + if (instr->src[0].swizzle[0] == instr->src[0].swizzle[1] && + instr->src[0].swizzle[0] == instr->src[0].swizzle[2] && + instr->src[0].swizzle[0] == instr->src[0].swizzle[3]) { + struct qreg *dest = ntq_get_dest(c, &instr->dest.dest); + *dest = qir_PACK_8888_F(c, + ntq_get_src(c, instr->src[0].src, + instr->src[0].swizzle[0])); + return; + } + for (int i = 0; i < 4; i++) { int swiz = instr->src[0].swizzle[i]; struct qreg src; @@ -1040,41 +1054,37 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) *dest = ntq_emit_ubfe(c, src[0], src[1], src[2]); break; - default: - fprintf(stderr, "unknown NIR ALU inst: "); - nir_print_instr(&instr->instr, stderr); - fprintf(stderr, "\n"); - abort(); - } -} + case nir_op_usadd_4x8: + *dest = qir_V8ADDS(c, src[0], src[1]); + break; -static void -clip_distance_discard(struct vc4_compile *c) -{ - for (int i = 0; i < PIPE_MAX_CLIP_PLANES; i++) { - if (!(c->key->ucp_enables & (1 << i))) - continue; + case nir_op_ussub_4x8: + *dest = qir_V8SUBS(c, src[0], src[1]); + break; - struct qreg dist = - emit_fragment_varying(c, - VARYING_SLOT_CLIP_DIST0 + (i / 4), - i % 4); + case nir_op_umin_4x8: + *dest = qir_V8MIN(c, src[0], src[1]); + break; - qir_SF(c, dist); + case nir_op_umax_4x8: + *dest = qir_V8MAX(c, src[0], src[1]); + break; - if (c->discard.file == QFILE_NULL) - c->discard = qir_uniform_ui(c, 0); + case nir_op_umul_unorm_4x8: + *dest = qir_V8MULD(c, src[0], src[1]); + break; - c->discard = qir_SEL_X_Y_NS(c, qir_uniform_ui(c, ~0), - c->discard); + default: + fprintf(stderr, "unknown NIR ALU inst: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); + abort(); } } static void emit_frag_end(struct vc4_compile *c) { - clip_distance_discard(c); - struct qreg color; if (c->output_color_index != -1) { color = c->outputs[c->output_color_index]; @@ -1190,45 +1200,6 @@ emit_stub_vpm_read(struct vc4_compile *c) } static void -emit_ucp_clipdistance(struct vc4_compile *c) -{ - unsigned cv; - if (c->output_clipvertex_index != -1) - cv = c->output_clipvertex_index; - else if (c->output_position_index != -1) - cv = c->output_position_index; - else - return; - - for (int plane = 0; plane < PIPE_MAX_CLIP_PLANES; plane++) { - if (!(c->key->ucp_enables & (1 << plane))) - continue; - - /* Pick the next outputs[] that hasn't been written to, since - * there are no other program writes left to be processed at - * this point. If something had been declared but not written - * (like a w component), we'll just smash over the top of it. - */ - uint32_t output_index = c->num_outputs++; - add_output(c, output_index, - VARYING_SLOT_CLIP_DIST0 + plane / 4, - plane % 4); - - - struct qreg dist = qir_uniform_f(c, 0.0); - for (int i = 0; i < 4; i++) { - struct qreg pos_chan = c->outputs[cv + i]; - struct qreg ucp = - qir_uniform(c, QUNIFORM_USER_CLIP_PLANE, - plane * 4 + i); - dist = qir_FADD(c, dist, qir_FMUL(c, pos_chan, ucp)); - } - - c->outputs[output_index] = dist; - } -} - -static void emit_vert_end(struct vc4_compile *c, struct vc4_varying_slot *fs_inputs, uint32_t num_fs_inputs) @@ -1236,7 +1207,6 @@ emit_vert_end(struct vc4_compile *c, struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]); emit_stub_vpm_read(c); - emit_ucp_clipdistance(c); emit_scaled_viewport_write(c, rcp_w); emit_zs_write(c, rcp_w); @@ -1391,9 +1361,6 @@ ntq_setup_outputs(struct vc4_compile *c) case VARYING_SLOT_POS: c->output_position_index = loc; break; - case VARYING_SLOT_CLIP_VERTEX: - c->output_clipvertex_index = loc; - break; case VARYING_SLOT_PSIZ: c->output_point_size_index = loc; break; @@ -1486,6 +1453,11 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) break; + case nir_intrinsic_load_user_clip_plane: + *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE, + instr->const_index[0]); + break; + case nir_intrinsic_load_input: assert(instr->num_components == 1); if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) { @@ -1683,10 +1655,18 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, c->s = tgsi_to_nir(tokens, &nir_options); nir_opt_global_to_local(c->s); nir_convert_to_ssa(c->s); + if (stage == QSTAGE_FRAG) vc4_nir_lower_blend(c); + if (c->fs_key && c->fs_key->light_twoside) nir_lower_two_sided_color(c->s); + + if (stage == QSTAGE_FRAG) + nir_lower_clip_fs(c->s, c->key->ucp_enables); + else + nir_lower_clip_vs(c->s, c->key->ucp_enables); + vc4_nir_lower_io(c); nir_lower_idiv(c->s); nir_lower_load_const_to_scalar(c->s); diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index e385fbb65ae..7894b081b19 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -36,10 +36,17 @@ struct qir_op_info { static const struct qir_op_info qir_op_info[] = { [QOP_MOV] = { "mov", 1, 1 }, + [QOP_FMOV] = { "fmov", 1, 1 }, + [QOP_MMOV] = { "mmov", 1, 1 }, [QOP_FADD] = { "fadd", 1, 2 }, [QOP_FSUB] = { "fsub", 1, 2 }, [QOP_FMUL] = { "fmul", 1, 2 }, [QOP_MUL24] = { "mul24", 1, 2 }, + [QOP_V8MULD] = {"v8muld", 1, 2 }, + [QOP_V8MIN] = {"v8min", 1, 2 }, + [QOP_V8MAX] = {"v8max", 1, 2 }, + [QOP_V8ADDS] = {"v8adds", 1, 2 }, + [QOP_V8SUBS] = {"v8subs", 1, 2 }, [QOP_FMIN] = { "fmin", 1, 2 }, [QOP_FMAX] = { "fmax", 1, 2 }, [QOP_FMINABS] = { "fminabs", 1, 2 }, @@ -71,11 +78,6 @@ static const struct qir_op_info qir_op_info[] = { [QOP_RSQ] = { "rsq", 1, 1, false, true }, [QOP_EXP2] = { "exp2", 1, 2, false, true }, [QOP_LOG2] = { "log2", 1, 2, false, true }, - [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1 }, - [QOP_PACK_8A_F] = { "pack_8a_f", 1, 1 }, - [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 }, - [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 }, - [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 }, [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true }, @@ -95,18 +97,6 @@ static const struct qir_op_info qir_op_info[] = { [QOP_TEX_B] = { "tex_b", 0, 2 }, [QOP_TEX_DIRECT] = { "tex_direct", 0, 2 }, [QOP_TEX_RESULT] = { "tex_result", 1, 0, true }, - [QOP_UNPACK_8A_F] = { "unpack_8a_f", 1, 1 }, - [QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 }, - [QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 }, - [QOP_UNPACK_8D_F] = { "unpack_8d_f", 1, 1 }, - [QOP_UNPACK_16A_F] = { "unpack_16a_f", 1, 1 }, - [QOP_UNPACK_16B_F] = { "unpack_16b_f", 1, 1 }, - [QOP_UNPACK_8A_I] = { "unpack_8a_i", 1, 1 }, - [QOP_UNPACK_8B_I] = { "unpack_8b_i", 1, 1 }, - [QOP_UNPACK_8C_I] = { "unpack_8c_i", 1, 1 }, - [QOP_UNPACK_8D_I] = { "unpack_8d_i", 1, 1 }, - [QOP_UNPACK_16A_I] = { "unpack_16a_i", 1, 1 }, - [QOP_UNPACK_16B_I] = { "unpack_16b_i", 1, 1 }, }; static const char * @@ -171,8 +161,14 @@ bool qir_is_mul(struct qinst *inst) { switch (inst->op) { + case QOP_MMOV: case QOP_FMUL: case QOP_MUL24: + case QOP_V8MULD: + case QOP_V8MIN: + case QOP_V8MAX: + case QOP_V8ADDS: + case QOP_V8SUBS: return true; default: return false; @@ -180,6 +176,35 @@ qir_is_mul(struct qinst *inst) } bool +qir_is_float_input(struct qinst *inst) +{ + switch (inst->op) { + case QOP_FMOV: + case QOP_FMUL: + case QOP_FADD: + case QOP_FSUB: + case QOP_FMIN: + case QOP_FMAX: + case QOP_FMINABS: + case QOP_FMAXABS: + case QOP_FTOI: + return true; + default: + return false; + } +} + +bool +qir_is_raw_mov(struct qinst *inst) +{ + return ((inst->op == QOP_MOV || + inst->op == QOP_FMOV || + inst->op == QOP_MMOV) && + !inst->dst.pack && + !inst->src[0].pack); +} + +bool qir_is_tex(struct qinst *inst) { return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT; @@ -204,28 +229,6 @@ qir_depends_on_flags(struct qinst *inst) } bool -qir_src_needs_a_file(struct qinst *inst) -{ - switch (inst->op) { - case QOP_UNPACK_8A_F: - case QOP_UNPACK_8B_F: - case QOP_UNPACK_8C_F: - case QOP_UNPACK_8D_F: - case QOP_UNPACK_16A_F: - case QOP_UNPACK_16B_F: - case QOP_UNPACK_8A_I: - case QOP_UNPACK_8B_I: - case QOP_UNPACK_8C_I: - case QOP_UNPACK_8D_I: - case QOP_UNPACK_16A_I: - case QOP_UNPACK_16B_I: - return true; - default: - return false; - } -} - -bool qir_writes_r4(struct qinst *inst) { switch (inst->op) { @@ -295,6 +298,7 @@ qir_dump_inst(struct vc4_compile *c, struct qinst *inst) for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { fprintf(stderr, ", "); qir_print_reg(c, inst->src[i], false); + vc4_qpu_disasm_unpack(stderr, inst->src[i].pack); } } @@ -385,7 +389,6 @@ qir_compile_init(void) list_inithead(&c->instructions); c->output_position_index = -1; - c->output_clipvertex_index = -1; c->output_color_index = -1; c->output_point_size_index = -1; @@ -411,7 +414,8 @@ qir_follow_movs(struct vc4_compile *c, struct qreg reg) { while (reg.file == QFILE_TEMP && c->defs[reg.index] && - c->defs[reg.index]->op == QOP_MOV) { + c->defs[reg.index]->op == QOP_MOV && + !c->defs[reg.index]->dst.pack) { reg = c->defs[reg.index]->src[0]; } diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index ddde96db6b4..a92ad93ee07 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -37,6 +37,7 @@ #include "util/u_math.h" #include "vc4_screen.h" +#include "vc4_qpu_defines.h" #include "pipe/p_state.h" struct nir_builder; @@ -64,9 +65,16 @@ struct qreg { enum qop { QOP_UNDEF, QOP_MOV, + QOP_FMOV, + QOP_MMOV, QOP_FADD, QOP_FSUB, QOP_FMUL, + QOP_V8MULD, + QOP_V8MIN, + QOP_V8MAX, + QOP_V8ADDS, + QOP_V8SUBS, QOP_MUL24, QOP_FMIN, QOP_FMAX, @@ -105,11 +113,6 @@ enum qop { QOP_LOG2, QOP_VW_SETUP, QOP_VR_SETUP, - QOP_PACK_8888_F, - QOP_PACK_8A_F, - QOP_PACK_8B_F, - QOP_PACK_8C_F, - QOP_PACK_8D_F, QOP_TLB_DISCARD_SETUP, QOP_TLB_STENCIL_SETUP, QOP_TLB_Z_WRITE, @@ -123,20 +126,6 @@ enum qop { QOP_FRAG_W, QOP_FRAG_REV_FLAG, - QOP_UNPACK_8A_F, - QOP_UNPACK_8B_F, - QOP_UNPACK_8C_F, - QOP_UNPACK_8D_F, - QOP_UNPACK_16A_F, - QOP_UNPACK_16B_F, - - QOP_UNPACK_8A_I, - QOP_UNPACK_8B_I, - QOP_UNPACK_8C_I, - QOP_UNPACK_8D_I, - QOP_UNPACK_16A_I, - QOP_UNPACK_16B_I, - /** Texture x coordinate parameter write */ QOP_TEX_S, /** Texture y coordinate parameter write */ @@ -248,6 +237,8 @@ enum quniform_contents { QUNIFORM_BLEND_CONST_COLOR_Y, QUNIFORM_BLEND_CONST_COLOR_Z, QUNIFORM_BLEND_CONST_COLOR_W, + QUNIFORM_BLEND_CONST_COLOR_RGBA, + QUNIFORM_BLEND_CONST_COLOR_AAAA, QUNIFORM_STENCIL, @@ -399,7 +390,6 @@ struct vc4_compile { uint32_t num_outputs; uint32_t num_texture_samples; uint32_t output_position_index; - uint32_t output_clipvertex_index; uint32_t output_color_index; uint32_t output_point_size_index; @@ -457,10 +447,11 @@ bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); bool qir_is_multi_instruction(struct qinst *inst); bool qir_is_mul(struct qinst *inst); +bool qir_is_raw_mov(struct qinst *inst); bool qir_is_tex(struct qinst *inst); +bool qir_is_float_input(struct qinst *inst); bool qir_depends_on_flags(struct qinst *inst); bool qir_writes_r4(struct qinst *inst); -bool qir_src_needs_a_file(struct qinst *inst); struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg); void qir_dump(struct vc4_compile *c); @@ -561,9 +552,16 @@ qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a) \ } QIR_ALU1(MOV) +QIR_ALU1(FMOV) +QIR_ALU1(MMOV) QIR_ALU2(FADD) QIR_ALU2(FSUB) QIR_ALU2(FMUL) +QIR_ALU2(V8MULD) +QIR_ALU2(V8MIN) +QIR_ALU2(V8MAX) +QIR_ALU2(V8ADDS) +QIR_ALU2(V8SUBS) QIR_ALU2(MUL24) QIR_ALU1(SEL_X_0_ZS) QIR_ALU1(SEL_X_0_ZC) @@ -596,11 +594,6 @@ QIR_ALU1(RCP) QIR_ALU1(RSQ) QIR_ALU1(EXP2) QIR_ALU1(LOG2) -QIR_ALU1(PACK_8888_F) -QIR_PACK(PACK_8A_F) -QIR_PACK(PACK_8B_F) -QIR_PACK(PACK_8C_F) -QIR_PACK(PACK_8D_F) QIR_ALU1(VARY_ADD_C) QIR_NODST_2(TEX_S) QIR_NODST_2(TEX_T) @@ -622,41 +615,50 @@ QIR_NODST_1(TLB_STENCIL_SETUP) static inline struct qreg qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) { - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef)); + struct qreg t = qir_FMOV(c, src); + c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i; return t; } static inline struct qreg qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) { - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef)); + struct qreg t = qir_MOV(c, src); + c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i; return t; } static inline struct qreg qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) { - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef)); + struct qreg t = qir_FMOV(c, src); + c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i; return t; } static inline struct qreg qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) { - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef)); + struct qreg t = qir_MOV(c, src); + c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i; return t; } -static inline struct qreg +static inline void qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan) { - qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef)); + assert(!dest.pack); + dest.pack = QPU_PACK_MUL_8A + chan; + qir_emit(c, qir_inst(QOP_MMOV, dest, val, c->undef)); if (dest.file == QFILE_TEMP) c->defs[dest.index] = NULL; +} + +static inline struct qreg +qir_PACK_8888_F(struct vc4_compile *c, struct qreg val) +{ + struct qreg dest = qir_MMOV(c, val); + c->defs[dest.index]->dst.pack = QPU_PACK_MUL_8888; return dest; } diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h index 0719d2828b5..866ca5c1300 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.h +++ b/src/gallium/drivers/vc4/vc4_qpu.h @@ -213,6 +213,9 @@ void vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack); void +vc4_qpu_disasm_unpack(FILE *out, uint32_t pack); + +void vc4_qpu_validate(uint64_t *insts, uint32_t num_inst); #endif /* VC4_QPU_H */ diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h index eb3dfb33827..626dc3be6be 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_defines.h +++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h @@ -200,8 +200,8 @@ enum qpu_pack_a { enum qpu_unpack { QPU_UNPACK_NOP, - QPU_UNPACK_16A_TO_F32, - QPU_UNPACK_16B_TO_F32, + QPU_UNPACK_16A, + QPU_UNPACK_16B, QPU_UNPACK_8D_REP, QPU_UNPACK_8A, QPU_UNPACK_8B, diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c index 0879787ec03..c46fd1a0e3f 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c +++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c @@ -98,8 +98,8 @@ static const char *qpu_pack_mul[] = { */ static const char *qpu_unpack[] = { [QPU_UNPACK_NOP] = "", - [QPU_UNPACK_16A_TO_F32] = "16a", - [QPU_UNPACK_16B_TO_F32] = "16b", + [QPU_UNPACK_16A] = "16a", + [QPU_UNPACK_16B] = "16b", [QPU_UNPACK_8D_REP] = "8d_rep", [QPU_UNPACK_8A] = "8a", [QPU_UNPACK_8B] = "8b", @@ -257,6 +257,13 @@ vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack) fprintf(out, "%s", DESC(qpu_pack_a, pack)); } +void +vc4_qpu_disasm_unpack(FILE *out, uint32_t unpack) +{ + if (unpack != QPU_UNPACK_NOP) + fprintf(out, ".%s", DESC(qpu_unpack, unpack)); +} + static void print_alu_dst(uint64_t inst, bool is_mul) { @@ -315,10 +322,9 @@ print_alu_src(uint64_t inst, uint32_t mux) fprintf(stderr, "%s", DESC(special_read_b, raddr - 32)); } - if (unpack != QPU_UNPACK_NOP && - ((mux == QPU_MUX_A && !(inst & QPU_PM)) || + if (((mux == QPU_MUX_A && !(inst & QPU_PM)) || (mux == QPU_MUX_R4 && (inst & QPU_PM)))) { - fprintf(stderr, ".%s", DESC(qpu_unpack, unpack)); + vc4_qpu_disasm_unpack(stderr, unpack); } } diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index adf3a8b3658..133e1385178 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -101,7 +101,8 @@ swap_file(struct qpu_reg *src) static void fixup_raddr_conflict(struct vc4_compile *c, struct qpu_reg dst, - struct qpu_reg *src0, struct qpu_reg *src1) + struct qpu_reg *src0, struct qpu_reg *src1, + struct qinst *inst, uint64_t *unpack) { uint32_t mux0 = src0->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src0->mux; uint32_t mux1 = src1->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src1->mux; @@ -117,7 +118,21 @@ fixup_raddr_conflict(struct vc4_compile *c, return; if (mux0 == QPU_MUX_A) { - queue(c, qpu_a_MOV(qpu_rb(31), *src0)); + /* Make sure we use the same type of MOV as the instruction, + * in case of unpacks. + */ + if (qir_is_float_input(inst)) + queue(c, qpu_a_FMAX(qpu_rb(31), *src0, *src0)); + else + queue(c, qpu_a_MOV(qpu_rb(31), *src0)); + + /* If we had an unpack on this A-file source, we need to put + * it into this MOV, not into the later move from regfile B. + */ + if (inst->src[0].pack) { + *last_inst(c) |= *unpack; + *unpack = 0; + } *src0 = qpu_rb(31); } else { queue(c, qpu_a_MOV(qpu_ra(31), *src0)); @@ -125,6 +140,27 @@ fixup_raddr_conflict(struct vc4_compile *c, } } +static void +set_last_dst_pack(struct vc4_compile *c, struct qinst *inst) +{ + bool had_pm = *last_inst(c) & QPU_PM; + bool had_ws = *last_inst(c) & QPU_WS; + uint32_t unpack = QPU_GET_FIELD(*last_inst(c), QPU_UNPACK); + + if (!inst->dst.pack) + return; + + *last_inst(c) |= QPU_SET_FIELD(inst->dst.pack, QPU_PACK); + + if (qir_is_mul(inst)) { + assert(!unpack || had_pm); + *last_inst(c) |= QPU_PM; + } else { + assert(!unpack || !had_pm); + assert(!had_ws); /* dst must be a-file to pack. */ + } +} + void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) { @@ -134,15 +170,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) uint32_t vpm_read_fifo_count = 0; uint32_t vpm_read_offset = 0; int last_vpm_read_index = -1; - /* Map from the QIR ops enum order to QPU unpack bits. */ - static const uint32_t unpack_map[] = { - QPU_UNPACK_8A, - QPU_UNPACK_8B, - QPU_UNPACK_8C, - QPU_UNPACK_8D, - QPU_UNPACK_16A_TO_F32, - QPU_UNPACK_16B_TO_F32, - }; list_inithead(&c->qpu_inst_list); @@ -203,9 +230,22 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) A(NOT), M(FMUL), + M(V8MULD), + M(V8MIN), + M(V8MAX), + M(V8ADDS), + M(V8SUBS), M(MUL24), + + /* If we replicate src[0] out to src[1], this works + * out the same as a MOV. + */ + [QOP_MOV] = { QPU_A_OR }, + [QOP_FMOV] = { QPU_A_FMAX }, + [QOP_MMOV] = { QPU_M_V8MIN }, }; + uint64_t unpack = 0; struct qpu_reg src[4]; for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) { int index = qinst->src[i].index; @@ -215,6 +255,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; case QFILE_TEMP: src[i] = temp_registers[index]; + if (qinst->src[i].pack) { + assert(!unpack || + unpack == qinst->src[i].pack); + unpack = QPU_SET_FIELD(qinst->src[i].pack, + QPU_UNPACK); + if (src[i].mux == QPU_MUX_R4) + unpack |= QPU_PM; + } break; case QFILE_UNIF: src[i] = qpu_unif(); @@ -259,19 +307,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) } switch (qinst->op) { - case QOP_MOV: - /* Skip emitting the MOV if it's a no-op. */ - if (dst.mux == QPU_MUX_A || dst.mux == QPU_MUX_B || - dst.mux != src[0].mux || dst.addr != src[0].addr) { - queue(c, qpu_a_MOV(dst, src[0])); - } - break; - case QOP_SEL_X_0_ZS: case QOP_SEL_X_0_ZC: case QOP_SEL_X_0_NS: case QOP_SEL_X_0_NC: - queue(c, qpu_a_MOV(dst, src[0])); + queue(c, qpu_a_MOV(dst, src[0]) | unpack); set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS + QPU_COND_ZS); @@ -285,10 +325,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: queue(c, qpu_a_MOV(dst, src[0])); + if (qinst->src[0].pack) + *(last_inst(c)) |= unpack; set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS + QPU_COND_ZS); queue(c, qpu_a_MOV(dst, src[1])); + if (qinst->src[1].pack) + *(last_inst(c)) |= unpack; set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^ 1) + QPU_COND_ZS); @@ -301,19 +345,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) switch (qinst->op) { case QOP_RCP: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP), - src[0])); + src[0]) | unpack); break; case QOP_RSQ: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIPSQRT), - src[0])); + src[0]) | unpack); break; case QOP_EXP2: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_EXP), - src[0])); + src[0]) | unpack); break; case QOP_LOG2: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_LOG), - src[0])); + src[0]) | unpack); break; default: abort(); @@ -324,25 +368,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; - case QOP_PACK_8888_F: - queue(c, qpu_m_MOV(dst, src[0])); - *last_inst(c) |= QPU_PM; - *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888, - QPU_PACK); - break; - - case QOP_PACK_8A_F: - case QOP_PACK_8B_F: - case QOP_PACK_8C_F: - case QOP_PACK_8D_F: - queue(c, - qpu_m_MOV(dst, src[0]) | - QPU_PM | - QPU_SET_FIELD(QPU_PACK_MUL_8A + - qinst->op - QOP_PACK_8A_F, - QPU_PACK)); - break; - case QOP_FRAG_X: queue(c, qpu_a_ITOF(dst, qpu_ra(QPU_R_XY_PIXEL_COORD))); @@ -367,16 +392,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_TLB_DISCARD_SETUP: discard = true; - queue(c, qpu_a_MOV(src[0], src[0])); + queue(c, qpu_a_MOV(src[0], src[0]) | unpack); *last_inst(c) |= QPU_SF; break; case QOP_TLB_STENCIL_SETUP: - queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), src[0])); + assert(!unpack); + queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), + src[0]) | unpack); break; case QOP_TLB_Z_WRITE: - queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0])); + queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), + src[0]) | unpack); if (discard) { set_last_cond_add(c, QPU_COND_ZS); } @@ -392,14 +420,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; case QOP_TLB_COLOR_WRITE: - queue(c, qpu_a_MOV(qpu_tlbc(), src[0])); + queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack); if (discard) { set_last_cond_add(c, QPU_COND_ZS); } break; case QOP_VARY_ADD_C: - queue(c, qpu_a_FADD(dst, src[0], qpu_r5())); + queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack); break; case QOP_TEX_S: @@ -408,12 +436,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_TEX_B: queue(c, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S + (qinst->op - QOP_TEX_S)), - src[0])); + src[0]) | unpack); break; case QOP_TEX_DIRECT: - fixup_raddr_conflict(c, dst, &src[0], &src[1]); - queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1])); + fixup_raddr_conflict(c, dst, &src[0], &src[1], + qinst, &unpack); + queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), + src[0], src[1]) | unpack); break; case QOP_TEX_RESULT: @@ -424,67 +454,16 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) queue(c, qpu_a_MOV(dst, qpu_r4())); break; - case QOP_UNPACK_8A_F: - case QOP_UNPACK_8B_F: - case QOP_UNPACK_8C_F: - case QOP_UNPACK_8D_F: - case QOP_UNPACK_16A_F: - case QOP_UNPACK_16B_F: { - if (src[0].mux == QPU_MUX_R4) { - queue(c, qpu_a_MOV(dst, src[0])); - *last_inst(c) |= QPU_PM; - *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A + - (qinst->op - - QOP_UNPACK_8A_F), - QPU_UNPACK); - } else { - assert(src[0].mux == QPU_MUX_A); - - /* Since we're setting the pack bits, if the - * destination is in A it would get re-packed. - */ - queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ? - qpu_rb(31) : dst), - src[0], src[0])); - *last_inst(c) |= - QPU_SET_FIELD(unpack_map[qinst->op - - QOP_UNPACK_8A_F], - QPU_UNPACK); - - if (dst.mux == QPU_MUX_A) { - queue(c, qpu_a_MOV(dst, qpu_rb(31))); - } - } - } - break; - - case QOP_UNPACK_8A_I: - case QOP_UNPACK_8B_I: - case QOP_UNPACK_8C_I: - case QOP_UNPACK_8D_I: - case QOP_UNPACK_16A_I: - case QOP_UNPACK_16B_I: { - assert(src[0].mux == QPU_MUX_A); - - /* Since we're setting the pack bits, if the - * destination is in A it would get re-packed. - */ - queue(c, qpu_a_MOV((dst.mux == QPU_MUX_A ? - qpu_rb(31) : dst), src[0])); - *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op - - QOP_UNPACK_8A_I], - QPU_UNPACK); - - if (dst.mux == QPU_MUX_A) { - queue(c, qpu_a_MOV(dst, qpu_rb(31))); - } - } - break; - default: assert(qinst->op < ARRAY_SIZE(translate)); assert(translate[qinst->op].op != 0); /* NOPs */ + /* Skip emitting the MOV if it's a no-op. */ + if (qir_is_raw_mov(qinst) && + dst.mux == src[0].mux && dst.addr == src[0].addr) { + break; + } + /* If we have only one source, put it in the second * argument slot as well so that we don't take up * another raddr just to get unused data. @@ -492,27 +471,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) if (qir_get_op_nsrc(qinst->op) == 1) src[1] = src[0]; - fixup_raddr_conflict(c, dst, &src[0], &src[1]); + fixup_raddr_conflict(c, dst, &src[0], &src[1], + qinst, &unpack); if (qir_is_mul(qinst)) { queue(c, qpu_m_alu2(translate[qinst->op].op, dst, - src[0], src[1])); - if (qinst->dst.pack) { - *last_inst(c) |= QPU_PM; - *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack, - QPU_PACK); - } + src[0], src[1]) | unpack); } else { queue(c, qpu_a_alu2(translate[qinst->op].op, dst, - src[0], src[1])); - if (qinst->dst.pack) { - assert(dst.mux == QPU_MUX_A); - *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack, - QPU_PACK); - } + src[0], src[1]) | unpack); } + set_last_dst_pack(c, qinst); break; } diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 3ced50f3a44..bca36c3e7f4 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -282,23 +282,23 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) class_bits[inst->dst.index] &= CLASS_BIT_A; } - if (qir_src_needs_a_file(inst)) { - switch (inst->op) { - case QOP_UNPACK_8A_F: - case QOP_UNPACK_8B_F: - case QOP_UNPACK_8C_F: - case QOP_UNPACK_8D_F: - /* Special case: these can be done as R4 - * unpacks, as well. - */ - class_bits[inst->src[0].index] &= (CLASS_BIT_A | - CLASS_BIT_R4); - break; - default: - class_bits[inst->src[0].index] &= CLASS_BIT_A; - break; + /* Apply restrictions for src unpacks. The integer unpacks + * can only be done from regfile A, while float unpacks can be + * either A or R4. + */ + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file == QFILE_TEMP && + inst->src[i].pack) { + if (qir_is_float_input(inst)) { + class_bits[inst->src[i].index] &= + CLASS_BIT_A | CLASS_BIT_R4; + } else { + class_bits[inst->src[i].index] &= + CLASS_BIT_A; + } } } + ip++; } diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 5d5166fd818..122bda0bac6 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -667,11 +667,16 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx, shadow_offset, &shadow_rsc, &data); uint16_t *dst = data; - struct pipe_transfer *src_transfer; - uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b, - ib->offset, - count * 4, - PIPE_TRANSFER_READ, &src_transfer); + struct pipe_transfer *src_transfer = NULL; + uint32_t *src; + if (ib->user_buffer) { + src = ib->user_buffer; + } else { + src = pipe_buffer_map_range(pctx, &orig->base.b, + ib->offset, + count * 4, + PIPE_TRANSFER_READ, &src_transfer); + } for (int i = 0; i < count; i++) { uint32_t src_index = src[i]; @@ -679,7 +684,8 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx, dst[i] = src_index; } - pctx->transfer_unmap(pctx, src_transfer); + if (src_transfer) + pctx->transfer_unmap(pctx, src_transfer); return shadow_rsc; } diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 774ec095652..bb867611804 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -94,6 +94,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_USER_INDEX_BUFFERS: return 1; /* lying for GL 2.0 */ @@ -152,7 +153,6 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_USER_VERTEX_BUFFERS: - case PIPE_CAP_USER_INDEX_BUFFERS: case PIPE_CAP_QUERY_PIPELINE_STATISTICS: case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: @@ -183,6 +183,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 76980ca32af..10dabd09f5e 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -32,6 +32,11 @@ #include "vc4_simulator_validate.h" #include "simpenrose/simpenrose.h" +/* A marker placed just after each BO, then checked after rendering to make + * sure it's still there. + */ +#define BO_SENTINEL 0xfedcba98 + #define OVERFLOW_SIZE (32 * 1024 * 1024) static struct drm_gem_cma_object * @@ -49,10 +54,12 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo) obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next; obj->paddr = simpenrose_hw_addr(obj->vaddr); - dev->simulator_mem_next += size; + dev->simulator_mem_next += size + sizeof(uint32_t); dev->simulator_mem_next = align(dev->simulator_mem_next, 4096); assert(dev->simulator_mem_next <= screen->simulator_mem_size); + *(uint32_t *)(obj->vaddr + bo->size) = BO_SENTINEL; + return obj; } @@ -109,6 +116,7 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec) struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base); struct vc4_bo *bo = drm_bo->bo; + assert(*(uint32_t *)(obj->vaddr + bo->size) == BO_SENTINEL); memcpy(bo->map, obj->vaddr, bo->size); if (drm_bo->validated_shader) { @@ -197,6 +205,8 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec.unref_list, unref_head) { list_del(&bo->unref_head); + assert(*(uint32_t *)(bo->base.vaddr + bo->bo->size) == + BO_SENTINEL); vc4_bo_unreference(&bo->bo); free(bo); } diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 8a759c2ca4c..78aa344ab1d 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -51,7 +51,9 @@ vc4_set_blend_color(struct pipe_context *pctx, const struct pipe_blend_color *blend_color) { struct vc4_context *vc4 = vc4_context(pctx); - vc4->blend_color = *blend_color; + vc4->blend_color.f = *blend_color; + for (int i = 0; i < 4; i++) + vc4->blend_color.ub[i] = float_to_ubyte(blend_color->color[i]); vc4->dirty |= VC4_DIRTY_BLEND_COLOR; } @@ -303,10 +305,10 @@ vc4_set_index_buffer(struct pipe_context *pctx, struct vc4_context *vc4 = vc4_context(pctx); if (ib) { - assert(!ib->user_buffer); pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer); vc4->indexbuf.index_size = ib->index_size; vc4->indexbuf.offset = ib->offset; + vc4->indexbuf.user_buffer = ib->user_buffer; } else { pipe_resource_reference(&vc4->indexbuf.buffer, NULL); } diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c index 85d6998205e..f5ad481f186 100644 --- a/src/gallium/drivers/vc4/vc4_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_uniforms.c @@ -262,11 +262,35 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, case QUNIFORM_BLEND_CONST_COLOR_Z: case QUNIFORM_BLEND_CONST_COLOR_W: cl_aligned_f(&uniforms, - CLAMP(vc4->blend_color.color[uinfo->contents[i] - - QUNIFORM_BLEND_CONST_COLOR_X], + CLAMP(vc4->blend_color.f.color[uinfo->contents[i] - + QUNIFORM_BLEND_CONST_COLOR_X], 0, 1)); break; + case QUNIFORM_BLEND_CONST_COLOR_RGBA: { + const uint8_t *format_swiz = + vc4_get_format_swizzle(vc4->framebuffer.cbufs[0]->format); + uint32_t color = 0; + for (int i = 0; i < 4; i++) { + if (format_swiz[i] >= 4) + continue; + + color |= (vc4->blend_color.ub[format_swiz[i]] << + (i * 8)); + } + cl_aligned_u32(&uniforms, color); + break; + } + + case QUNIFORM_BLEND_CONST_COLOR_AAAA: { + uint8_t a = vc4->blend_color.ub[3]; + cl_aligned_u32(&uniforms, ((a) | + (a << 8) | + (a << 16) | + (a << 24))); + break; + } + case QUNIFORM_STENCIL: cl_aligned_u32(&uniforms, vc4->zsa->stencil_uniforms[uinfo->data[i]] | @@ -330,6 +354,8 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader) case QUNIFORM_BLEND_CONST_COLOR_Y: case QUNIFORM_BLEND_CONST_COLOR_Z: case QUNIFORM_BLEND_CONST_COLOR_W: + case QUNIFORM_BLEND_CONST_COLOR_RGBA: + case QUNIFORM_BLEND_CONST_COLOR_AAAA: dirty |= VC4_DIRTY_BLEND_COLOR; break; diff --git a/src/gallium/drivers/virgl/Automake.inc b/src/gallium/drivers/virgl/Automake.inc new file mode 100644 index 00000000000..b05d3e314c8 --- /dev/null +++ b/src/gallium/drivers/virgl/Automake.inc @@ -0,0 +1,11 @@ +if HAVE_GALLIUM_VIRGL + +TARGET_DRIVERS += virtio_gpu +TARGET_CPPFLAGS += -DGALLIUM_VIRGL +TARGET_LIB_DEPS += \ + $(top_builddir)/src/gallium/drivers/virgl/libvirgl.la \ + $(top_builddir)/src/gallium/winsys/virgl/drm/libvirgldrm.la \ + $(top_builddir)/src/gallium/winsys/virgl/vtest/libvirglvtest.la \ + $(LIBDRM_LIBS) + +endif diff --git a/src/gallium/drivers/virgl/Makefile.am b/src/gallium/drivers/virgl/Makefile.am new file mode 100644 index 00000000000..82d9756143f --- /dev/null +++ b/src/gallium/drivers/virgl/Makefile.am @@ -0,0 +1,32 @@ +# Copyright © 2014, 2015 Red Hat. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CPPFLAGS = \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(LIBDRM_CFLAGS) + +noinst_LTLIBRARIES = libvirgl.la + +libvirgl_la_SOURCES = $(C_SOURCES) diff --git a/src/gallium/drivers/virgl/Makefile.sources b/src/gallium/drivers/virgl/Makefile.sources new file mode 100644 index 00000000000..c27d284e248 --- /dev/null +++ b/src/gallium/drivers/virgl/Makefile.sources @@ -0,0 +1,18 @@ +C_SOURCES := \ + virgl_buffer.c \ + virgl_context.c \ + virgl_context.h \ + virgl_encode.c \ + virgl_encode.h \ + virgl_hw.h \ + virgl_protocol.h \ + virgl_public.h \ + virgl_query.c \ + virgl_resource.c \ + virgl_resource.h \ + virgl_screen.c \ + virgl_screen.h \ + virgl_streamout.c \ + virgl_texture.c \ + virgl_tgsi.c \ + virgl_winsys.h diff --git a/src/gallium/drivers/virgl/virgl_buffer.c b/src/gallium/drivers/virgl/virgl_buffer.c new file mode 100644 index 00000000000..ce19fb949d0 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_buffer.c @@ -0,0 +1,172 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "virgl_context.h" +#include "virgl_resource.h" +#include "virgl_screen.h" + +static void virgl_buffer_destroy(struct pipe_screen *screen, + struct pipe_resource *buf) +{ + struct virgl_screen *vs = virgl_screen(screen); + struct virgl_buffer *vbuf = virgl_buffer(buf); + + util_range_destroy(&vbuf->valid_buffer_range); + vs->vws->resource_unref(vs->vws, vbuf->base.hw_res); + FREE(vbuf); +} + +static void *virgl_buffer_transfer_map(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *vs = virgl_screen(ctx->screen); + struct virgl_buffer *vbuf = virgl_buffer(resource); + struct virgl_transfer *trans; + void *ptr; + bool readback; + uint32_t offset; + bool doflushwait = false; + + if ((usage & PIPE_TRANSFER_READ) && (vbuf->on_list == TRUE)) + doflushwait = true; + else + doflushwait = virgl_res_needs_flush_wait(vctx, &vbuf->base, usage); + + if (doflushwait) + ctx->flush(ctx, NULL, 0); + + trans = util_slab_alloc(&vctx->texture_transfer_pool); + if (trans == NULL) + return NULL; + + trans->base.resource = resource; + trans->base.level = level; + trans->base.usage = usage; + trans->base.box = *box; + trans->base.stride = 0; + trans->base.layer_stride = 0; + + offset = box->x; + + readback = virgl_res_needs_readback(vctx, &vbuf->base, usage); + if (readback) + vs->vws->transfer_get(vs->vws, vbuf->base.hw_res, box, trans->base.stride, trans->base.layer_stride, offset, level); + + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) + doflushwait = true; + + if (doflushwait || readback) + vs->vws->resource_wait(vs->vws, vbuf->base.hw_res); + + ptr = vs->vws->resource_map(vs->vws, vbuf->base.hw_res); + if (!ptr) { + return NULL; + } + + trans->offset = offset; + *transfer = &trans->base; + + return ptr + trans->offset; +} + +static void virgl_buffer_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_transfer *trans = virgl_transfer(transfer); + struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); + + if (trans->base.usage & PIPE_TRANSFER_WRITE) { + if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { + struct virgl_screen *vs = virgl_screen(ctx->screen); + vbuf->base.clean = FALSE; + vctx->num_transfers++; + vs->vws->transfer_put(vs->vws, vbuf->base.hw_res, + &transfer->box, trans->base.stride, trans->base.layer_stride, trans->offset, transfer->level); + + } + } + + util_slab_free(&vctx->texture_transfer_pool, trans); +} + +static void virgl_buffer_transfer_flush_region(struct pipe_context *ctx, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); + + if (!vbuf->on_list) { + struct pipe_resource *res = NULL; + + list_addtail(&vbuf->flush_list, &vctx->to_flush_bufs); + vbuf->on_list = TRUE; + pipe_resource_reference(&res, &vbuf->base.u.b); + } + + util_range_add(&vbuf->valid_buffer_range, transfer->box.x + box->x, + transfer->box.x + box->x + box->width); + + vbuf->base.clean = FALSE; +} + +static const struct u_resource_vtbl virgl_buffer_vtbl = +{ + u_default_resource_get_handle, /* get_handle */ + virgl_buffer_destroy, /* resource_destroy */ + virgl_buffer_transfer_map, /* transfer_map */ + virgl_buffer_transfer_flush_region, /* transfer_flush_region */ + virgl_buffer_transfer_unmap, /* transfer_unmap */ + virgl_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs, + const struct pipe_resource *template) +{ + struct virgl_buffer *buf; + uint32_t size; + uint32_t vbind; + buf = CALLOC_STRUCT(virgl_buffer); + buf->base.clean = TRUE; + buf->base.u.b = *template; + buf->base.u.b.screen = &vs->base; + buf->base.u.vtbl = &virgl_buffer_vtbl; + pipe_reference_init(&buf->base.u.b.reference, 1); + util_range_init(&buf->valid_buffer_range); + + vbind = pipe_to_virgl_bind(template->bind); + size = template->width0; + + buf->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, 1, 1, 1, 0, 0, size); + + util_range_set_empty(&buf->valid_buffer_range); + return &buf->base.u.b; +} diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c new file mode 100644 index 00000000000..e4f02ba1096 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_context.c @@ -0,0 +1,963 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_shader_tokens.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_transfer.h" +#include "util/u_helpers.h" +#include "util/u_slab.h" +#include "util/u_upload_mgr.h" +#include "util/u_blitter.h" +#include "tgsi/tgsi_text.h" +#include "indices/u_primconvert.h" + +#include "pipebuffer/pb_buffer.h" +#include "state_tracker/graw.h" + +#include "virgl_encode.h" +#include "virgl_context.h" +#include "virgl_protocol.h" +#include "virgl_resource.h" +#include "virgl_screen.h" + +static uint32_t next_handle; +uint32_t virgl_object_assign_handle(void) +{ + return ++next_handle; +} + +static void virgl_buffer_flush(struct virgl_context *vctx, + struct virgl_buffer *vbuf) +{ + struct virgl_screen *rs = virgl_screen(vctx->base.screen); + struct pipe_box box; + + assert(vbuf->on_list); + + box.height = 1; + box.depth = 1; + box.y = 0; + box.z = 0; + + box.x = vbuf->valid_buffer_range.start; + box.width = MIN2(vbuf->valid_buffer_range.end - vbuf->valid_buffer_range.start, vbuf->base.u.b.width0); + + vctx->num_transfers++; + rs->vws->transfer_put(rs->vws, vbuf->base.hw_res, + &box, 0, 0, box.x, 0); + + util_range_set_empty(&vbuf->valid_buffer_range); +} + +static void virgl_attach_res_framebuffer(struct virgl_context *vctx) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct pipe_surface *surf; + struct virgl_resource *res; + unsigned i; + + surf = vctx->framebuffer.zsbuf; + if (surf) { + res = virgl_resource(surf->texture); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } + for (i = 0; i < vctx->framebuffer.nr_cbufs; i++) { + surf = vctx->framebuffer.cbufs[i]; + if (surf) { + res = virgl_resource(surf->texture); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } + } +} + +static void virgl_attach_res_sampler_views(struct virgl_context *vctx, + unsigned shader_type) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct virgl_textures_info *tinfo = &vctx->samplers[shader_type]; + struct virgl_resource *res; + uint32_t remaining_mask = tinfo->enabled_mask; + unsigned i; + while (remaining_mask) { + i = u_bit_scan(&remaining_mask); + assert(tinfo->views[i]); + + res = virgl_resource(tinfo->views[i]->base.texture); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } +} + +static void virgl_attach_res_vertex_buffers(struct virgl_context *vctx) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct virgl_resource *res; + unsigned i; + + for (i = 0; i < vctx->num_vertex_buffers; i++) { + res = virgl_resource(vctx->vertex_buffer[i].buffer); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } +} + +static void virgl_attach_res_index_buffer(struct virgl_context *vctx) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct virgl_resource *res; + + res = virgl_resource(vctx->index_buffer.buffer); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); +} + +static void virgl_attach_res_so_targets(struct virgl_context *vctx) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct virgl_resource *res; + unsigned i; + + for (i = 0; i < vctx->num_so_targets; i++) { + res = virgl_resource(vctx->so_targets[i].base.buffer); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } +} + +static void virgl_attach_res_uniform_buffers(struct virgl_context *vctx, + unsigned shader_type) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct virgl_resource *res; + unsigned i; + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + res = virgl_resource(vctx->ubos[shader_type][i]); + if (res) { + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } + } +} + +/* + * after flushing, the hw context still has a bunch of + * resources bound, so we need to rebind those here. + */ +static void virgl_reemit_res(struct virgl_context *vctx) +{ + unsigned shader_type; + + /* reattach any flushed resources */ + /* framebuffer, sampler views, vertex/index/uniform/stream buffers */ + virgl_attach_res_framebuffer(vctx); + + for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { + virgl_attach_res_sampler_views(vctx, shader_type); + virgl_attach_res_uniform_buffers(vctx, shader_type); + } + virgl_attach_res_index_buffer(vctx); + virgl_attach_res_vertex_buffers(vctx); + virgl_attach_res_so_targets(vctx); +} + +static struct pipe_surface *virgl_create_surface(struct pipe_context *ctx, + struct pipe_resource *resource, + const struct pipe_surface *templ) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_surface *surf; + struct virgl_resource *res = virgl_resource(resource); + uint32_t handle; + + surf = CALLOC_STRUCT(virgl_surface); + if (surf == NULL) + return NULL; + + res->clean = FALSE; + handle = virgl_object_assign_handle(); + pipe_reference_init(&surf->base.reference, 1); + pipe_resource_reference(&surf->base.texture, resource); + surf->base.context = ctx; + surf->base.format = templ->format; + if (resource->target != PIPE_BUFFER) { + surf->base.width = u_minify(resource->width0, templ->u.tex.level); + surf->base.height = u_minify(resource->height0, templ->u.tex.level); + surf->base.u.tex.level = templ->u.tex.level; + surf->base.u.tex.first_layer = templ->u.tex.first_layer; + surf->base.u.tex.last_layer = templ->u.tex.last_layer; + } else { + surf->base.width = templ->u.buf.last_element - templ->u.buf.first_element + 1; + surf->base.height = resource->height0; + surf->base.u.buf.first_element = templ->u.buf.first_element; + surf->base.u.buf.last_element = templ->u.buf.last_element; + } + virgl_encoder_create_surface(vctx, handle, res, &surf->base); + surf->handle = handle; + return &surf->base; +} + +static void virgl_surface_destroy(struct pipe_context *ctx, + struct pipe_surface *psurf) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_surface *surf = virgl_surface(psurf); + + pipe_resource_reference(&surf->base.texture, NULL); + virgl_encode_delete_object(vctx, surf->handle, VIRGL_OBJECT_SURFACE); + FREE(surf); +} + +static void *virgl_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *blend_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle; + handle = virgl_object_assign_handle(); + + virgl_encode_blend_state(vctx, handle, blend_state); + return (void *)(unsigned long)handle; + +} + +static void virgl_bind_blend_state(struct pipe_context *ctx, + void *blend_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)blend_state; + virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_BLEND); +} + +static void virgl_delete_blend_state(struct pipe_context *ctx, + void *blend_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)blend_state; + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_BLEND); +} + +static void *virgl_create_depth_stencil_alpha_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *blend_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle; + handle = virgl_object_assign_handle(); + + virgl_encode_dsa_state(vctx, handle, blend_state); + return (void *)(unsigned long)handle; +} + +static void virgl_bind_depth_stencil_alpha_state(struct pipe_context *ctx, + void *blend_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)blend_state; + virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_DSA); +} + +static void virgl_delete_depth_stencil_alpha_state(struct pipe_context *ctx, + void *dsa_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)dsa_state; + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_DSA); +} + +static void *virgl_create_rasterizer_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *rs_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle; + handle = virgl_object_assign_handle(); + + virgl_encode_rasterizer_state(vctx, handle, rs_state); + return (void *)(unsigned long)handle; +} + +static void virgl_bind_rasterizer_state(struct pipe_context *ctx, + void *rs_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)rs_state; + + virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_RASTERIZER); +} + +static void virgl_delete_rasterizer_state(struct pipe_context *ctx, + void *rs_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)rs_state; + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_RASTERIZER); +} + +static void virgl_set_framebuffer_state(struct pipe_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct virgl_context *vctx = virgl_context(ctx); + + vctx->framebuffer = *state; + virgl_encoder_set_framebuffer_state(vctx, state); + virgl_attach_res_framebuffer(vctx); +} + +static void virgl_set_viewport_states(struct pipe_context *ctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *state) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_viewport_states(vctx, start_slot, num_viewports, state); +} + +static void *virgl_create_vertex_elements_state(struct pipe_context *ctx, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = virgl_object_assign_handle(); + virgl_encoder_create_vertex_elements(vctx, handle, + num_elements, elements); + return (void*)(unsigned long)handle; + +} + +static void virgl_delete_vertex_elements_state(struct pipe_context *ctx, + void *ve) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)ve; + + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS); +} + +static void virgl_bind_vertex_elements_state(struct pipe_context *ctx, + void *ve) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)ve; + virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS); +} + +static void virgl_set_vertex_buffers(struct pipe_context *ctx, + unsigned start_slot, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers) +{ + struct virgl_context *vctx = virgl_context(ctx); + + util_set_vertex_buffers_count(vctx->vertex_buffer, + &vctx->num_vertex_buffers, + buffers, start_slot, num_buffers); + + vctx->vertex_array_dirty = TRUE; +} + +static void virgl_hw_set_vertex_buffers(struct pipe_context *ctx) +{ + struct virgl_context *vctx = virgl_context(ctx); + + if (vctx->vertex_array_dirty) { + virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer); + virgl_attach_res_vertex_buffers(vctx); + } +} + +static void virgl_set_stencil_ref(struct pipe_context *ctx, + const struct pipe_stencil_ref *ref) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_stencil_ref(vctx, ref); +} + +static void virgl_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *color) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_blend_color(vctx, color); +} + +static void virgl_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib) +{ + struct virgl_context *vctx = virgl_context(ctx); + + if (ib) { + pipe_resource_reference(&vctx->index_buffer.buffer, ib->buffer); + memcpy(&vctx->index_buffer, ib, sizeof(*ib)); + } else { + pipe_resource_reference(&vctx->index_buffer.buffer, NULL); + } +} + +static void virgl_hw_set_index_buffer(struct pipe_context *ctx, + struct pipe_index_buffer *ib) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_index_buffer(vctx, ib); + virgl_attach_res_index_buffer(vctx); +} + +static void virgl_set_constant_buffer(struct pipe_context *ctx, + uint shader, uint index, + struct pipe_constant_buffer *buf) +{ + struct virgl_context *vctx = virgl_context(ctx); + + if (buf) { + if (!buf->user_buffer){ + struct virgl_resource *res = virgl_resource(buf->buffer); + virgl_encoder_set_uniform_buffer(vctx, shader, index, buf->buffer_offset, + buf->buffer_size, res); + pipe_resource_reference(&vctx->ubos[shader][index], buf->buffer); + return; + } + pipe_resource_reference(&vctx->ubos[shader][index], NULL); + virgl_encoder_write_constant_buffer(vctx, shader, index, buf->buffer_size / 4, buf->user_buffer); + } else { + virgl_encoder_write_constant_buffer(vctx, shader, index, 0, NULL); + pipe_resource_reference(&vctx->ubos[shader][index], NULL); + } +} + +void virgl_transfer_inline_write(struct pipe_context *ctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *vs = virgl_screen(ctx->screen); + struct virgl_resource *grres = virgl_resource(res); + struct virgl_buffer *vbuf = virgl_buffer(res); + + grres->clean = FALSE; + + if (virgl_res_needs_flush_wait(vctx, &vbuf->base, usage)) { + ctx->flush(ctx, NULL, 0); + + vs->vws->resource_wait(vs->vws, vbuf->base.hw_res); + } + + virgl_encoder_inline_write(vctx, grres, level, usage, + box, data, stride, layer_stride); +} + +static void *virgl_shader_encoder(struct pipe_context *ctx, + const struct pipe_shader_state *shader, + unsigned type) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle; + struct tgsi_token *new_tokens; + int ret; + + new_tokens = virgl_tgsi_transform(shader->tokens); + if (!new_tokens) + return NULL; + + handle = virgl_object_assign_handle(); + /* encode VS state */ + ret = virgl_encode_shader_state(vctx, handle, type, + &shader->stream_output, + new_tokens); + if (ret) { + return NULL; + } + + FREE(new_tokens); + return (void *)(unsigned long)handle; + +} +static void *virgl_create_vs_state(struct pipe_context *ctx, + const struct pipe_shader_state *shader) +{ + return virgl_shader_encoder(ctx, shader, PIPE_SHADER_VERTEX); +} + +static void *virgl_create_gs_state(struct pipe_context *ctx, + const struct pipe_shader_state *shader) +{ + return virgl_shader_encoder(ctx, shader, PIPE_SHADER_GEOMETRY); +} + +static void *virgl_create_fs_state(struct pipe_context *ctx, + const struct pipe_shader_state *shader) +{ + return virgl_shader_encoder(ctx, shader, PIPE_SHADER_FRAGMENT); +} + +static void +virgl_delete_fs_state(struct pipe_context *ctx, + void *fs) +{ + uint32_t handle = (unsigned long)fs; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); +} + +static void +virgl_delete_gs_state(struct pipe_context *ctx, + void *gs) +{ + uint32_t handle = (unsigned long)gs; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); +} + +static void +virgl_delete_vs_state(struct pipe_context *ctx, + void *vs) +{ + uint32_t handle = (unsigned long)vs; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); +} + +static void virgl_bind_vs_state(struct pipe_context *ctx, + void *vss) +{ + uint32_t handle = (unsigned long)vss; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_VERTEX); +} + +static void virgl_bind_gs_state(struct pipe_context *ctx, + void *vss) +{ + uint32_t handle = (unsigned long)vss; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_GEOMETRY); +} + + +static void virgl_bind_fs_state(struct pipe_context *ctx, + void *vss) +{ + uint32_t handle = (unsigned long)vss; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_FRAGMENT); +} + +static void virgl_clear(struct pipe_context *ctx, + unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_clear(vctx, buffers, color, depth, stencil); +} + +static void virgl_draw_vbo(struct pipe_context *ctx, + const struct pipe_draw_info *dinfo) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *rs = virgl_screen(ctx->screen); + struct pipe_index_buffer ib = {}; + struct pipe_draw_info info = *dinfo; + + if (!(rs->caps.caps.v1.prim_mask & (1 << dinfo->mode))) { + util_primconvert_save_index_buffer(vctx->primconvert, &vctx->index_buffer); + util_primconvert_draw_vbo(vctx->primconvert, dinfo); + return; + } + if (info.indexed) { + pipe_resource_reference(&ib.buffer, vctx->index_buffer.buffer); + ib.user_buffer = vctx->index_buffer.user_buffer; + ib.index_size = vctx->index_buffer.index_size; + ib.offset = vctx->index_buffer.offset + info.start * ib.index_size; + + if (ib.user_buffer) { + u_upload_data(vctx->uploader, 0, info.count * ib.index_size, + ib.user_buffer, &ib.offset, &ib.buffer); + ib.user_buffer = NULL; + } + } + + u_upload_unmap(vctx->uploader); + + vctx->num_draws++; + virgl_hw_set_vertex_buffers(ctx); + if (info.indexed) + virgl_hw_set_index_buffer(ctx, &ib); + + virgl_encoder_draw_vbo(vctx, &info); + + pipe_resource_reference(&ib.buffer, NULL); + +} + +static void virgl_flush_eq(struct virgl_context *ctx, void *closure) +{ + struct virgl_screen *rs = virgl_screen(ctx->base.screen); + + /* send the buffer to the remote side for decoding */ + ctx->num_transfers = ctx->num_draws = 0; + rs->vws->submit_cmd(rs->vws, ctx->cbuf); + + virgl_encoder_set_sub_ctx(ctx, ctx->hw_sub_ctx_id); + + /* add back current framebuffer resources to reference list? */ + virgl_reemit_res(ctx); +} + +static void virgl_flush_from_st(struct pipe_context *ctx, + struct pipe_fence_handle **fence, + enum pipe_flush_flags flags) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *rs = virgl_screen(ctx->screen); + struct virgl_buffer *buf, *tmp; + + if (fence) + *fence = rs->vws->cs_create_fence(rs->vws); + + LIST_FOR_EACH_ENTRY_SAFE(buf, tmp, &vctx->to_flush_bufs, flush_list) { + struct pipe_resource *res = &buf->base.u.b; + virgl_buffer_flush(vctx, buf); + list_del(&buf->flush_list); + buf->on_list = FALSE; + pipe_resource_reference(&res, NULL); + + } + virgl_flush_eq(vctx, vctx); +} + +static struct pipe_sampler_view *virgl_create_sampler_view(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_sampler_view *grview; + uint32_t handle; + struct virgl_resource *res; + + if (state == NULL) + return NULL; + + grview = CALLOC_STRUCT(virgl_sampler_view); + if (!grview) + return NULL; + + res = virgl_resource(texture); + handle = virgl_object_assign_handle(); + virgl_encode_sampler_view(vctx, handle, res, state); + + grview->base = *state; + grview->base.reference.count = 1; + + grview->base.texture = NULL; + grview->base.context = ctx; + pipe_resource_reference(&grview->base.texture, texture); + grview->handle = handle; + return &grview->base; +} + +static void virgl_set_sampler_views(struct pipe_context *ctx, + unsigned shader_type, + unsigned start_slot, + unsigned num_views, + struct pipe_sampler_view **views) +{ + struct virgl_context *vctx = virgl_context(ctx); + int i; + uint32_t disable_mask = ~((1ull << num_views) - 1); + struct virgl_textures_info *tinfo = &vctx->samplers[shader_type]; + uint32_t new_mask = 0; + uint32_t remaining_mask; + + remaining_mask = tinfo->enabled_mask & disable_mask; + + while (remaining_mask) { + i = u_bit_scan(&remaining_mask); + assert(tinfo->views[i]); + + pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL); + } + + for (i = 0; i < num_views; i++) { + struct virgl_sampler_view *grview = virgl_sampler_view(views[i]); + + if (views[i] == (struct pipe_sampler_view *)tinfo->views[i]) + continue; + + if (grview) { + new_mask |= 1 << i; + pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], views[i]); + } else { + pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL); + disable_mask |= 1 << i; + } + } + + tinfo->enabled_mask &= ~disable_mask; + tinfo->enabled_mask |= new_mask; + virgl_encode_set_sampler_views(vctx, shader_type, start_slot, num_views, tinfo->views); + virgl_attach_res_sampler_views(vctx, shader_type); +} + +static void virgl_destroy_sampler_view(struct pipe_context *ctx, + struct pipe_sampler_view *view) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_sampler_view *grview = virgl_sampler_view(view); + + virgl_encode_delete_object(vctx, grview->handle, VIRGL_OBJECT_SAMPLER_VIEW); + pipe_resource_reference(&view->texture, NULL); + FREE(view); +} + +static void *virgl_create_sampler_state(struct pipe_context *ctx, + const struct pipe_sampler_state *state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle; + + handle = virgl_object_assign_handle(); + + virgl_encode_sampler_state(vctx, handle, state); + return (void *)(unsigned long)handle; +} + +static void virgl_delete_sampler_state(struct pipe_context *ctx, + void *ss) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)ss; + + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SAMPLER_STATE); +} + +static void virgl_bind_sampler_states(struct pipe_context *ctx, + unsigned shader, unsigned start_slot, + unsigned num_samplers, + void **samplers) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handles[32]; + int i; + for (i = 0; i < num_samplers; i++) { + handles[i] = (unsigned long)(samplers[i]); + } + virgl_encode_bind_sampler_states(vctx, shader, start_slot, num_samplers, handles); +} + +static void virgl_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *ps) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_polygon_stipple(vctx, ps); +} + +static void virgl_set_scissor_states(struct pipe_context *ctx, + unsigned start_slot, + unsigned num_scissor, + const struct pipe_scissor_state *ss) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_scissor_state(vctx, start_slot, num_scissor, ss); +} + +static void virgl_set_sample_mask(struct pipe_context *ctx, + unsigned sample_mask) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_sample_mask(vctx, sample_mask); +} + +static void virgl_set_clip_state(struct pipe_context *ctx, + const struct pipe_clip_state *clip) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_clip_state(vctx, clip); +} + +static void virgl_resource_copy_region(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_resource *dres = virgl_resource(dst); + struct virgl_resource *sres = virgl_resource(src); + + dres->clean = FALSE; + virgl_encode_resource_copy_region(vctx, dres, + dst_level, dstx, dsty, dstz, + sres, src_level, + src_box); +} + +static void +virgl_flush_resource(struct pipe_context *pipe, + struct pipe_resource *resource) +{ +} + +static void virgl_blit(struct pipe_context *ctx, + const struct pipe_blit_info *blit) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_resource *dres = virgl_resource(blit->dst.resource); + struct virgl_resource *sres = virgl_resource(blit->src.resource); + + dres->clean = FALSE; + virgl_encode_blit(vctx, dres, sres, + blit); +} + +static void +virgl_context_destroy( struct pipe_context *ctx ) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *rs = virgl_screen(ctx->screen); + + vctx->framebuffer.zsbuf = NULL; + vctx->framebuffer.nr_cbufs = 0; + virgl_encoder_destroy_sub_ctx(vctx, vctx->hw_sub_ctx_id); + virgl_flush_eq(vctx, vctx); + + rs->vws->cmd_buf_destroy(vctx->cbuf); + if (vctx->uploader) + u_upload_destroy(vctx->uploader); + util_primconvert_destroy(vctx->primconvert); + + util_slab_destroy(&vctx->texture_transfer_pool); + FREE(vctx); +} + +struct pipe_context *virgl_context_create(struct pipe_screen *pscreen, + void *priv, + unsigned flags) +{ + struct virgl_context *vctx; + struct virgl_screen *rs = virgl_screen(pscreen); + vctx = CALLOC_STRUCT(virgl_context); + + vctx->cbuf = rs->vws->cmd_buf_create(rs->vws); + if (!vctx->cbuf) { + FREE(vctx); + return NULL; + } + + vctx->base.destroy = virgl_context_destroy; + vctx->base.create_surface = virgl_create_surface; + vctx->base.surface_destroy = virgl_surface_destroy; + vctx->base.set_framebuffer_state = virgl_set_framebuffer_state; + vctx->base.create_blend_state = virgl_create_blend_state; + vctx->base.bind_blend_state = virgl_bind_blend_state; + vctx->base.delete_blend_state = virgl_delete_blend_state; + vctx->base.create_depth_stencil_alpha_state = virgl_create_depth_stencil_alpha_state; + vctx->base.bind_depth_stencil_alpha_state = virgl_bind_depth_stencil_alpha_state; + vctx->base.delete_depth_stencil_alpha_state = virgl_delete_depth_stencil_alpha_state; + vctx->base.create_rasterizer_state = virgl_create_rasterizer_state; + vctx->base.bind_rasterizer_state = virgl_bind_rasterizer_state; + vctx->base.delete_rasterizer_state = virgl_delete_rasterizer_state; + + vctx->base.set_viewport_states = virgl_set_viewport_states; + vctx->base.create_vertex_elements_state = virgl_create_vertex_elements_state; + vctx->base.bind_vertex_elements_state = virgl_bind_vertex_elements_state; + vctx->base.delete_vertex_elements_state = virgl_delete_vertex_elements_state; + vctx->base.set_vertex_buffers = virgl_set_vertex_buffers; + vctx->base.set_index_buffer = virgl_set_index_buffer; + vctx->base.set_constant_buffer = virgl_set_constant_buffer; + vctx->base.transfer_inline_write = virgl_transfer_inline_write; + + vctx->base.create_vs_state = virgl_create_vs_state; + vctx->base.create_gs_state = virgl_create_gs_state; + vctx->base.create_fs_state = virgl_create_fs_state; + + vctx->base.bind_vs_state = virgl_bind_vs_state; + vctx->base.bind_gs_state = virgl_bind_gs_state; + vctx->base.bind_fs_state = virgl_bind_fs_state; + + vctx->base.delete_vs_state = virgl_delete_vs_state; + vctx->base.delete_gs_state = virgl_delete_gs_state; + vctx->base.delete_fs_state = virgl_delete_fs_state; + + vctx->base.clear = virgl_clear; + vctx->base.draw_vbo = virgl_draw_vbo; + vctx->base.flush = virgl_flush_from_st; + vctx->base.screen = pscreen; + vctx->base.create_sampler_view = virgl_create_sampler_view; + vctx->base.sampler_view_destroy = virgl_destroy_sampler_view; + vctx->base.set_sampler_views = virgl_set_sampler_views; + + vctx->base.create_sampler_state = virgl_create_sampler_state; + vctx->base.delete_sampler_state = virgl_delete_sampler_state; + vctx->base.bind_sampler_states = virgl_bind_sampler_states; + + vctx->base.set_polygon_stipple = virgl_set_polygon_stipple; + vctx->base.set_scissor_states = virgl_set_scissor_states; + vctx->base.set_sample_mask = virgl_set_sample_mask; + vctx->base.set_stencil_ref = virgl_set_stencil_ref; + vctx->base.set_clip_state = virgl_set_clip_state; + + vctx->base.set_blend_color = virgl_set_blend_color; + + vctx->base.resource_copy_region = virgl_resource_copy_region; + vctx->base.flush_resource = virgl_flush_resource; + vctx->base.blit = virgl_blit; + + virgl_init_context_resource_functions(&vctx->base); + virgl_init_query_functions(vctx); + virgl_init_so_functions(vctx); + + list_inithead(&vctx->to_flush_bufs); + util_slab_create(&vctx->texture_transfer_pool, sizeof(struct virgl_transfer), + 16, UTIL_SLAB_SINGLETHREADED); + + vctx->primconvert = util_primconvert_create(&vctx->base, rs->caps.caps.v1.prim_mask); + vctx->uploader = u_upload_create(&vctx->base, 1024 * 1024, 256, + PIPE_BIND_INDEX_BUFFER); + if (!vctx->uploader) + goto fail; + + vctx->hw_sub_ctx_id = rs->sub_ctx_id++; + virgl_encoder_create_sub_ctx(vctx, vctx->hw_sub_ctx_id); + + virgl_encoder_set_sub_ctx(vctx, vctx->hw_sub_ctx_id); + return &vctx->base; +fail: + return NULL; +} diff --git a/src/gallium/drivers/virgl/virgl_context.h b/src/gallium/drivers/virgl/virgl_context.h new file mode 100644 index 00000000000..adb8adef33c --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_context.h @@ -0,0 +1,115 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_CONTEXT_H +#define VIRGL_CONTEXT_H + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "util/u_slab.h" +#include "util/list.h" + +struct pipe_screen; +struct tgsi_token; +struct u_upload_mgr; +struct virgl_cmd_buf; + +struct virgl_sampler_view { + struct pipe_sampler_view base; + uint32_t handle; +}; + +struct virgl_so_target { + struct pipe_stream_output_target base; + uint32_t handle; +}; + +struct virgl_textures_info { + struct virgl_sampler_view *views[16]; + uint32_t enabled_mask; +}; + +struct virgl_context { + struct pipe_context base; + struct virgl_cmd_buf *cbuf; + + struct virgl_textures_info samplers[PIPE_SHADER_TYPES]; + + struct pipe_framebuffer_state framebuffer; + + struct util_slab_mempool texture_transfer_pool; + + struct pipe_index_buffer index_buffer; + struct u_upload_mgr *uploader; + + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_buffers; + boolean vertex_array_dirty; + + struct virgl_so_target so_targets[PIPE_MAX_SO_BUFFERS]; + unsigned num_so_targets; + + struct pipe_resource *ubos[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; + int num_transfers; + int num_draws; + struct list_head to_flush_bufs; + + struct primconvert_context *primconvert; + uint32_t hw_sub_ctx_id; +}; + +static inline struct virgl_sampler_view * +virgl_sampler_view(struct pipe_sampler_view *view) +{ + return (struct virgl_sampler_view *)view; +}; + +static inline struct virgl_so_target * +virgl_so_target(struct pipe_stream_output_target *target) +{ + return (struct virgl_so_target *)target; +} + +static inline struct virgl_context *virgl_context(struct pipe_context *ctx) +{ + return (struct virgl_context *)ctx; +} + +struct pipe_context *virgl_context_create(struct pipe_screen *pscreen, + void *priv, unsigned flags); + +void virgl_init_blit_functions(struct virgl_context *vctx); +void virgl_init_query_functions(struct virgl_context *vctx); +void virgl_init_so_functions(struct virgl_context *vctx); + +void virgl_transfer_inline_write(struct pipe_context *ctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride); + +struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in); + +#endif diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c new file mode 100644 index 00000000000..22fb5292819 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_encode.c @@ -0,0 +1,867 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdint.h> + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "virgl_context.h" +#include "virgl_encode.h" +#include "virgl_protocol.h" +#include "virgl_resource.h" +#include "virgl_screen.h" + +static int virgl_encoder_write_cmd_dword(struct virgl_context *ctx, + uint32_t dword) +{ + int len = (dword >> 16); + + if ((ctx->cbuf->cdw + len + 1) > VIRGL_MAX_CMDBUF_DWORDS) + ctx->base.flush(&ctx->base, NULL, 0); + + virgl_encoder_write_dword(ctx->cbuf, dword); + return 0; +} + +static void virgl_encoder_write_res(struct virgl_context *ctx, + struct virgl_resource *res) +{ + struct virgl_winsys *vws = virgl_screen(ctx->base.screen)->vws; + + if (res && res->hw_res) + vws->emit_res(vws, ctx->cbuf, res->hw_res, TRUE); + else { + virgl_encoder_write_dword(ctx->cbuf, 0); + } +} + +int virgl_encode_bind_object(struct virgl_context *ctx, + uint32_t handle, uint32_t object) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_OBJECT, object, 1)); + virgl_encoder_write_dword(ctx->cbuf, handle); + return 0; +} + +int virgl_encode_delete_object(struct virgl_context *ctx, + uint32_t handle, uint32_t object) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DESTROY_OBJECT, object, 1)); + virgl_encoder_write_dword(ctx->cbuf, handle); + return 0; +} + +int virgl_encode_blend_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_blend_state *blend_state) +{ + uint32_t tmp; + int i; + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_BLEND, VIRGL_OBJ_BLEND_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + + tmp = + VIRGL_OBJ_BLEND_S0_INDEPENDENT_BLEND_ENABLE(blend_state->independent_blend_enable) | + VIRGL_OBJ_BLEND_S0_LOGICOP_ENABLE(blend_state->logicop_enable) | + VIRGL_OBJ_BLEND_S0_DITHER(blend_state->dither) | + VIRGL_OBJ_BLEND_S0_ALPHA_TO_COVERAGE(blend_state->alpha_to_coverage) | + VIRGL_OBJ_BLEND_S0_ALPHA_TO_ONE(blend_state->alpha_to_one); + + virgl_encoder_write_dword(ctx->cbuf, tmp); + + tmp = VIRGL_OBJ_BLEND_S1_LOGICOP_FUNC(blend_state->logicop_func); + virgl_encoder_write_dword(ctx->cbuf, tmp); + + for (i = 0; i < VIRGL_MAX_COLOR_BUFS; i++) { + tmp = + VIRGL_OBJ_BLEND_S2_RT_BLEND_ENABLE(blend_state->rt[i].blend_enable) | + VIRGL_OBJ_BLEND_S2_RT_RGB_FUNC(blend_state->rt[i].rgb_func) | + VIRGL_OBJ_BLEND_S2_RT_RGB_SRC_FACTOR(blend_state->rt[i].rgb_src_factor) | + VIRGL_OBJ_BLEND_S2_RT_RGB_DST_FACTOR(blend_state->rt[i].rgb_dst_factor)| + VIRGL_OBJ_BLEND_S2_RT_ALPHA_FUNC(blend_state->rt[i].alpha_func) | + VIRGL_OBJ_BLEND_S2_RT_ALPHA_SRC_FACTOR(blend_state->rt[i].alpha_src_factor) | + VIRGL_OBJ_BLEND_S2_RT_ALPHA_DST_FACTOR(blend_state->rt[i].alpha_dst_factor) | + VIRGL_OBJ_BLEND_S2_RT_COLORMASK(blend_state->rt[i].colormask); + virgl_encoder_write_dword(ctx->cbuf, tmp); + } + return 0; +} + +int virgl_encode_dsa_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_depth_stencil_alpha_state *dsa_state) +{ + uint32_t tmp; + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_DSA, VIRGL_OBJ_DSA_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + + tmp = VIRGL_OBJ_DSA_S0_DEPTH_ENABLE(dsa_state->depth.enabled) | + VIRGL_OBJ_DSA_S0_DEPTH_WRITEMASK(dsa_state->depth.writemask) | + VIRGL_OBJ_DSA_S0_DEPTH_FUNC(dsa_state->depth.func) | + VIRGL_OBJ_DSA_S0_ALPHA_ENABLED(dsa_state->alpha.enabled) | + VIRGL_OBJ_DSA_S0_ALPHA_FUNC(dsa_state->alpha.func); + virgl_encoder_write_dword(ctx->cbuf, tmp); + + for (i = 0; i < 2; i++) { + tmp = VIRGL_OBJ_DSA_S1_STENCIL_ENABLED(dsa_state->stencil[i].enabled) | + VIRGL_OBJ_DSA_S1_STENCIL_FUNC(dsa_state->stencil[i].func) | + VIRGL_OBJ_DSA_S1_STENCIL_FAIL_OP(dsa_state->stencil[i].fail_op) | + VIRGL_OBJ_DSA_S1_STENCIL_ZPASS_OP(dsa_state->stencil[i].zpass_op) | + VIRGL_OBJ_DSA_S1_STENCIL_ZFAIL_OP(dsa_state->stencil[i].zfail_op) | + VIRGL_OBJ_DSA_S1_STENCIL_VALUEMASK(dsa_state->stencil[i].valuemask) | + VIRGL_OBJ_DSA_S1_STENCIL_WRITEMASK(dsa_state->stencil[i].writemask); + virgl_encoder_write_dword(ctx->cbuf, tmp); + } + + virgl_encoder_write_dword(ctx->cbuf, fui(dsa_state->alpha.ref_value)); + return 0; +} +int virgl_encode_rasterizer_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_rasterizer_state *state) +{ + uint32_t tmp; + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_RASTERIZER, VIRGL_OBJ_RS_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + + tmp = VIRGL_OBJ_RS_S0_FLATSHADE(state->flatshade) | + VIRGL_OBJ_RS_S0_DEPTH_CLIP(state->depth_clip) | + VIRGL_OBJ_RS_S0_CLIP_HALFZ(state->clip_halfz) | + VIRGL_OBJ_RS_S0_RASTERIZER_DISCARD(state->rasterizer_discard) | + VIRGL_OBJ_RS_S0_FLATSHADE_FIRST(state->flatshade_first) | + VIRGL_OBJ_RS_S0_LIGHT_TWOSIZE(state->light_twoside) | + VIRGL_OBJ_RS_S0_SPRITE_COORD_MODE(state->sprite_coord_mode) | + VIRGL_OBJ_RS_S0_POINT_QUAD_RASTERIZATION(state->point_quad_rasterization) | + VIRGL_OBJ_RS_S0_CULL_FACE(state->cull_face) | + VIRGL_OBJ_RS_S0_FILL_FRONT(state->fill_front) | + VIRGL_OBJ_RS_S0_FILL_BACK(state->fill_back) | + VIRGL_OBJ_RS_S0_SCISSOR(state->scissor) | + VIRGL_OBJ_RS_S0_FRONT_CCW(state->front_ccw) | + VIRGL_OBJ_RS_S0_CLAMP_VERTEX_COLOR(state->clamp_vertex_color) | + VIRGL_OBJ_RS_S0_CLAMP_FRAGMENT_COLOR(state->clamp_fragment_color) | + VIRGL_OBJ_RS_S0_OFFSET_LINE(state->offset_line) | + VIRGL_OBJ_RS_S0_OFFSET_POINT(state->offset_point) | + VIRGL_OBJ_RS_S0_OFFSET_TRI(state->offset_tri) | + VIRGL_OBJ_RS_S0_POLY_SMOOTH(state->poly_smooth) | + VIRGL_OBJ_RS_S0_POLY_STIPPLE_ENABLE(state->poly_stipple_enable) | + VIRGL_OBJ_RS_S0_POINT_SMOOTH(state->point_smooth) | + VIRGL_OBJ_RS_S0_POINT_SIZE_PER_VERTEX(state->point_size_per_vertex) | + VIRGL_OBJ_RS_S0_MULTISAMPLE(state->multisample) | + VIRGL_OBJ_RS_S0_LINE_SMOOTH(state->line_smooth) | + VIRGL_OBJ_RS_S0_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | + VIRGL_OBJ_RS_S0_LINE_LAST_PIXEL(state->line_last_pixel) | + VIRGL_OBJ_RS_S0_HALF_PIXEL_CENTER(state->half_pixel_center) | + VIRGL_OBJ_RS_S0_BOTTOM_EDGE_RULE(state->bottom_edge_rule); + + virgl_encoder_write_dword(ctx->cbuf, tmp); /* S0 */ + virgl_encoder_write_dword(ctx->cbuf, fui(state->point_size)); /* S1 */ + virgl_encoder_write_dword(ctx->cbuf, state->sprite_coord_enable); /* S2 */ + tmp = VIRGL_OBJ_RS_S3_LINE_STIPPLE_PATTERN(state->line_stipple_pattern) | + VIRGL_OBJ_RS_S3_LINE_STIPPLE_FACTOR(state->line_stipple_factor) | + VIRGL_OBJ_RS_S3_CLIP_PLANE_ENABLE(state->clip_plane_enable); + virgl_encoder_write_dword(ctx->cbuf, tmp); /* S3 */ + virgl_encoder_write_dword(ctx->cbuf, fui(state->line_width)); /* S4 */ + virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_units)); /* S5 */ + virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_scale)); /* S6 */ + virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_clamp)); /* S7 */ + return 0; +} + +static void virgl_emit_shader_header(struct virgl_context *ctx, + uint32_t handle, uint32_t len, + uint32_t type, uint32_t offlen, + uint32_t num_tokens) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SHADER, len)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_dword(ctx->cbuf, type); + virgl_encoder_write_dword(ctx->cbuf, offlen); + virgl_encoder_write_dword(ctx->cbuf, num_tokens); +} + +static void virgl_emit_shader_streamout(struct virgl_context *ctx, + const struct pipe_stream_output_info *so_info) +{ + int num_outputs = 0; + int i; + uint32_t tmp; + + if (so_info) + num_outputs = so_info->num_outputs; + + virgl_encoder_write_dword(ctx->cbuf, num_outputs); + if (num_outputs) { + for (i = 0; i < 4; i++) + virgl_encoder_write_dword(ctx->cbuf, so_info->stride[i]); + + for (i = 0; i < so_info->num_outputs; i++) { + tmp = + VIRGL_OBJ_SHADER_SO_OUTPUT_REGISTER_INDEX(so_info->output[i].register_index) | + VIRGL_OBJ_SHADER_SO_OUTPUT_START_COMPONENT(so_info->output[i].start_component) | + VIRGL_OBJ_SHADER_SO_OUTPUT_NUM_COMPONENTS(so_info->output[i].num_components) | + VIRGL_OBJ_SHADER_SO_OUTPUT_BUFFER(so_info->output[i].output_buffer) | + VIRGL_OBJ_SHADER_SO_OUTPUT_DST_OFFSET(so_info->output[i].dst_offset); + virgl_encoder_write_dword(ctx->cbuf, tmp); + virgl_encoder_write_dword(ctx->cbuf, 0); + } + } +} + +int virgl_encode_shader_state(struct virgl_context *ctx, + uint32_t handle, + uint32_t type, + const struct pipe_stream_output_info *so_info, + const struct tgsi_token *tokens) +{ + char *str, *sptr; + uint32_t shader_len, len; + bool bret; + int num_tokens = tgsi_num_tokens(tokens); + int str_total_size = 65536; + int retry_size = 1; + uint32_t left_bytes, base_hdr_size, strm_hdr_size, thispass; + bool first_pass; + str = CALLOC(1, str_total_size); + if (!str) + return -1; + + do { + int old_size; + + bret = tgsi_dump_str(tokens, TGSI_DUMP_FLOAT_AS_HEX, str, str_total_size); + if (bret == false) { + fprintf(stderr, "Failed to translate shader in available space - trying again\n"); + old_size = str_total_size; + str_total_size = 65536 * ++retry_size; + str = REALLOC(str, old_size, str_total_size); + if (!str) + return -1; + } + } while (bret == false && retry_size < 10); + + if (bret == false) + return -1; + + shader_len = strlen(str) + 1; + + left_bytes = shader_len; + + base_hdr_size = 5; + strm_hdr_size = so_info->num_outputs ? so_info->num_outputs * 2 + 4 : 0; + first_pass = true; + sptr = str; + while (left_bytes) { + uint32_t length, offlen; + int hdr_len = base_hdr_size + (first_pass ? strm_hdr_size : 0); + if (ctx->cbuf->cdw + hdr_len + 1 > VIRGL_MAX_CMDBUF_DWORDS) + ctx->base.flush(&ctx->base, NULL, 0); + + thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - hdr_len - 1) * 4; + + length = MIN2(thispass, left_bytes); + len = ((length + 3) / 4) + hdr_len; + + if (first_pass) + offlen = VIRGL_OBJ_SHADER_OFFSET_VAL(shader_len); + else + offlen = VIRGL_OBJ_SHADER_OFFSET_VAL((uintptr_t)sptr - (uintptr_t)str) | VIRGL_OBJ_SHADER_OFFSET_CONT; + + virgl_emit_shader_header(ctx, handle, len, type, offlen, num_tokens); + + virgl_emit_shader_streamout(ctx, first_pass ? so_info : NULL); + + virgl_encoder_write_block(ctx->cbuf, (uint8_t *)sptr, length); + + sptr += length; + first_pass = false; + left_bytes -= length; + } + + FREE(str); + return 0; +} + + +int virgl_encode_clear(struct virgl_context *ctx, + unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + int i; + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CLEAR, 0, VIRGL_OBJ_CLEAR_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, buffers); + for (i = 0; i < 4; i++) + virgl_encoder_write_dword(ctx->cbuf, color->ui[i]); + virgl_encoder_write_qword(ctx->cbuf, *(uint64_t *)&depth); + virgl_encoder_write_dword(ctx->cbuf, stencil); + return 0; +} + +int virgl_encoder_set_framebuffer_state(struct virgl_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct virgl_surface *zsurf = virgl_surface(state->zsbuf); + int i; + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_FRAMEBUFFER_STATE, 0, VIRGL_SET_FRAMEBUFFER_STATE_SIZE(state->nr_cbufs))); + virgl_encoder_write_dword(ctx->cbuf, state->nr_cbufs); + virgl_encoder_write_dword(ctx->cbuf, zsurf ? zsurf->handle : 0); + for (i = 0; i < state->nr_cbufs; i++) { + struct virgl_surface *surf = virgl_surface(state->cbufs[i]); + virgl_encoder_write_dword(ctx->cbuf, surf ? surf->handle : 0); + } + + return 0; +} + +int virgl_encoder_set_viewport_states(struct virgl_context *ctx, + int start_slot, + int num_viewports, + const struct pipe_viewport_state *states) +{ + int i,v; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_VIEWPORT_STATE, 0, VIRGL_SET_VIEWPORT_STATE_SIZE(num_viewports))); + virgl_encoder_write_dword(ctx->cbuf, start_slot); + for (v = 0; v < num_viewports; v++) { + for (i = 0; i < 3; i++) + virgl_encoder_write_dword(ctx->cbuf, fui(states[v].scale[i])); + for (i = 0; i < 3; i++) + virgl_encoder_write_dword(ctx->cbuf, fui(states[v].translate[i])); + } + return 0; +} + +int virgl_encoder_create_vertex_elements(struct virgl_context *ctx, + uint32_t handle, + unsigned num_elements, + const struct pipe_vertex_element *element) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_VERTEX_ELEMENTS, VIRGL_OBJ_VERTEX_ELEMENTS_SIZE(num_elements))); + virgl_encoder_write_dword(ctx->cbuf, handle); + for (i = 0; i < num_elements; i++) { + virgl_encoder_write_dword(ctx->cbuf, element[i].src_offset); + virgl_encoder_write_dword(ctx->cbuf, element[i].instance_divisor); + virgl_encoder_write_dword(ctx->cbuf, element[i].vertex_buffer_index); + virgl_encoder_write_dword(ctx->cbuf, element[i].src_format); + } + return 0; +} + +int virgl_encoder_set_vertex_buffers(struct virgl_context *ctx, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_VERTEX_BUFFERS, 0, VIRGL_SET_VERTEX_BUFFERS_SIZE(num_buffers))); + for (i = 0; i < num_buffers; i++) { + struct virgl_resource *res = virgl_resource(buffers[i].buffer); + virgl_encoder_write_dword(ctx->cbuf, buffers[i].stride); + virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset); + virgl_encoder_write_res(ctx, res); + } + return 0; +} + +int virgl_encoder_set_index_buffer(struct virgl_context *ctx, + const struct pipe_index_buffer *ib) +{ + int length = VIRGL_SET_INDEX_BUFFER_SIZE(ib); + struct virgl_resource *res = NULL; + if (ib) + res = virgl_resource(ib->buffer); + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_INDEX_BUFFER, 0, length)); + virgl_encoder_write_res(ctx, res); + if (ib) { + virgl_encoder_write_dword(ctx->cbuf, ib->index_size); + virgl_encoder_write_dword(ctx->cbuf, ib->offset); + } + return 0; +} + +int virgl_encoder_draw_vbo(struct virgl_context *ctx, + const struct pipe_draw_info *info) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DRAW_VBO, 0, VIRGL_DRAW_VBO_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, info->start); + virgl_encoder_write_dword(ctx->cbuf, info->count); + virgl_encoder_write_dword(ctx->cbuf, info->mode); + virgl_encoder_write_dword(ctx->cbuf, info->indexed); + virgl_encoder_write_dword(ctx->cbuf, info->instance_count); + virgl_encoder_write_dword(ctx->cbuf, info->index_bias); + virgl_encoder_write_dword(ctx->cbuf, info->start_instance); + virgl_encoder_write_dword(ctx->cbuf, info->primitive_restart); + virgl_encoder_write_dword(ctx->cbuf, info->restart_index); + virgl_encoder_write_dword(ctx->cbuf, info->min_index); + virgl_encoder_write_dword(ctx->cbuf, info->max_index); + if (info->count_from_stream_output) + virgl_encoder_write_dword(ctx->cbuf, info->count_from_stream_output->buffer_size); + else + virgl_encoder_write_dword(ctx->cbuf, 0); + return 0; +} + +int virgl_encoder_create_surface(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + const struct pipe_surface *templat) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SURFACE, VIRGL_OBJ_SURFACE_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_res(ctx, res); + virgl_encoder_write_dword(ctx->cbuf, templat->format); + if (templat->texture->target == PIPE_BUFFER) { + virgl_encoder_write_dword(ctx->cbuf, templat->u.buf.first_element); + virgl_encoder_write_dword(ctx->cbuf, templat->u.buf.last_element); + + } else { + virgl_encoder_write_dword(ctx->cbuf, templat->u.tex.level); + virgl_encoder_write_dword(ctx->cbuf, templat->u.tex.first_layer | (templat->u.tex.last_layer << 16)); + } + return 0; +} + +int virgl_encoder_create_so_target(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + unsigned buffer_offset, + unsigned buffer_size) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_STREAMOUT_TARGET, VIRGL_OBJ_STREAMOUT_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_res(ctx, res); + virgl_encoder_write_dword(ctx->cbuf, buffer_offset); + virgl_encoder_write_dword(ctx->cbuf, buffer_size); + return 0; +} + +static void virgl_encoder_iw_emit_header_1d(struct virgl_context *ctx, + struct virgl_resource *res, + unsigned level, unsigned usage, + const struct pipe_box *box, + unsigned stride, unsigned layer_stride) +{ + virgl_encoder_write_res(ctx, res); + virgl_encoder_write_dword(ctx->cbuf, level); + virgl_encoder_write_dword(ctx->cbuf, usage); + virgl_encoder_write_dword(ctx->cbuf, stride); + virgl_encoder_write_dword(ctx->cbuf, layer_stride); + virgl_encoder_write_dword(ctx->cbuf, box->x); + virgl_encoder_write_dword(ctx->cbuf, box->y); + virgl_encoder_write_dword(ctx->cbuf, box->z); + virgl_encoder_write_dword(ctx->cbuf, box->width); + virgl_encoder_write_dword(ctx->cbuf, box->height); + virgl_encoder_write_dword(ctx->cbuf, box->depth); +} + +int virgl_encoder_inline_write(struct virgl_context *ctx, + struct virgl_resource *res, + unsigned level, unsigned usage, + const struct pipe_box *box, + const void *data, unsigned stride, + unsigned layer_stride) +{ + uint32_t size = (stride ? stride : box->width) * box->height; + uint32_t length, thispass, left_bytes; + struct pipe_box mybox = *box; + + length = 11 + (size + 3) / 4; + if ((ctx->cbuf->cdw + length + 1) > VIRGL_MAX_CMDBUF_DWORDS) { + if (box->height > 1 || box->depth > 1) { + debug_printf("inline transfer failed due to multi dimensions and too large\n"); + assert(0); + } + } + + left_bytes = size; + while (left_bytes) { + if (ctx->cbuf->cdw + 12 > VIRGL_MAX_CMDBUF_DWORDS) + ctx->base.flush(&ctx->base, NULL, 0); + + thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - 12) * 4; + + length = MIN2(thispass, left_bytes); + + mybox.width = length; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_RESOURCE_INLINE_WRITE, 0, ((length + 3) / 4) + 11)); + virgl_encoder_iw_emit_header_1d(ctx, res, level, usage, &mybox, stride, layer_stride); + virgl_encoder_write_block(ctx->cbuf, data, length); + left_bytes -= length; + mybox.x += length; + data += length; + } + return 0; +} + +int virgl_encoder_flush_frontbuffer(struct virgl_context *ctx, + struct virgl_resource *res) +{ +// virgl_encoder_write_dword(ctx->cbuf, VIRGL_CMD0(VIRGL_CCMD_FLUSH_FRONTUBFFER, 0, 1)); +// virgl_encoder_write_dword(ctx->cbuf, res_handle); + return 0; +} + +int virgl_encode_sampler_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_sampler_state *state) +{ + uint32_t tmp; + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SAMPLER_STATE, VIRGL_OBJ_SAMPLER_STATE_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + + tmp = VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_S(state->wrap_s) | + VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_T(state->wrap_t) | + VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_R(state->wrap_r) | + VIRGL_OBJ_SAMPLE_STATE_S0_MIN_IMG_FILTER(state->min_img_filter) | + VIRGL_OBJ_SAMPLE_STATE_S0_MIN_MIP_FILTER(state->min_mip_filter) | + VIRGL_OBJ_SAMPLE_STATE_S0_MAG_IMG_FILTER(state->mag_img_filter) | + VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_MODE(state->compare_mode) | + VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_FUNC(state->compare_func); + + virgl_encoder_write_dword(ctx->cbuf, tmp); + virgl_encoder_write_dword(ctx->cbuf, fui(state->lod_bias)); + virgl_encoder_write_dword(ctx->cbuf, fui(state->min_lod)); + virgl_encoder_write_dword(ctx->cbuf, fui(state->max_lod)); + for (i = 0; i < 4; i++) + virgl_encoder_write_dword(ctx->cbuf, state->border_color.ui[i]); + return 0; +} + + +int virgl_encode_sampler_view(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + const struct pipe_sampler_view *state) +{ + uint32_t tmp; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SAMPLER_VIEW, VIRGL_OBJ_SAMPLER_VIEW_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_res(ctx, res); + virgl_encoder_write_dword(ctx->cbuf, state->format); + if (res->u.b.target == PIPE_BUFFER) { + virgl_encoder_write_dword(ctx->cbuf, state->u.buf.first_element); + virgl_encoder_write_dword(ctx->cbuf, state->u.buf.last_element); + } else { + virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_layer | state->u.tex.last_layer << 16); + virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_level | state->u.tex.last_level << 8); + } + tmp = VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_R(state->swizzle_r) | + VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_G(state->swizzle_g) | + VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_B(state->swizzle_b) | + VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_A(state->swizzle_a); + virgl_encoder_write_dword(ctx->cbuf, tmp); + return 0; +} + +int virgl_encode_set_sampler_views(struct virgl_context *ctx, + uint32_t shader_type, + uint32_t start_slot, + uint32_t num_views, + struct virgl_sampler_view **views) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SAMPLER_VIEWS, 0, VIRGL_SET_SAMPLER_VIEWS_SIZE(num_views))); + virgl_encoder_write_dword(ctx->cbuf, shader_type); + virgl_encoder_write_dword(ctx->cbuf, start_slot); + for (i = 0; i < num_views; i++) { + uint32_t handle = views[i] ? views[i]->handle : 0; + virgl_encoder_write_dword(ctx->cbuf, handle); + } + return 0; +} + +int virgl_encode_bind_sampler_states(struct virgl_context *ctx, + uint32_t shader_type, + uint32_t start_slot, + uint32_t num_handles, + uint32_t *handles) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_SAMPLER_STATES, 0, VIRGL_BIND_SAMPLER_STATES(num_handles))); + virgl_encoder_write_dword(ctx->cbuf, shader_type); + virgl_encoder_write_dword(ctx->cbuf, start_slot); + for (i = 0; i < num_handles; i++) + virgl_encoder_write_dword(ctx->cbuf, handles[i]); + return 0; +} + +int virgl_encoder_write_constant_buffer(struct virgl_context *ctx, + uint32_t shader, + uint32_t index, + uint32_t size, + const void *data) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_CONSTANT_BUFFER, 0, size + 2)); + virgl_encoder_write_dword(ctx->cbuf, shader); + virgl_encoder_write_dword(ctx->cbuf, index); + if (data) + virgl_encoder_write_block(ctx->cbuf, data, size * 4); + return 0; +} + +int virgl_encoder_set_uniform_buffer(struct virgl_context *ctx, + uint32_t shader, + uint32_t index, + uint32_t offset, + uint32_t length, + struct virgl_resource *res) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_UNIFORM_BUFFER, 0, VIRGL_SET_UNIFORM_BUFFER_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, shader); + virgl_encoder_write_dword(ctx->cbuf, index); + virgl_encoder_write_dword(ctx->cbuf, offset); + virgl_encoder_write_dword(ctx->cbuf, length); + virgl_encoder_write_res(ctx, res); + return 0; +} + + +int virgl_encoder_set_stencil_ref(struct virgl_context *ctx, + const struct pipe_stencil_ref *ref) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_STENCIL_REF, 0, VIRGL_SET_STENCIL_REF_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, VIRGL_STENCIL_REF_VAL(ref->ref_value[0] , (ref->ref_value[1]))); + return 0; +} + +int virgl_encoder_set_blend_color(struct virgl_context *ctx, + const struct pipe_blend_color *color) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_BLEND_COLOR, 0, VIRGL_SET_BLEND_COLOR_SIZE)); + for (i = 0; i < 4; i++) + virgl_encoder_write_dword(ctx->cbuf, fui(color->color[i])); + return 0; +} + +int virgl_encoder_set_scissor_state(struct virgl_context *ctx, + unsigned start_slot, + int num_scissors, + const struct pipe_scissor_state *ss) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SCISSOR_STATE, 0, VIRGL_SET_SCISSOR_STATE_SIZE(num_scissors))); + virgl_encoder_write_dword(ctx->cbuf, start_slot); + for (i = 0; i < num_scissors; i++) { + virgl_encoder_write_dword(ctx->cbuf, (ss[i].minx | ss[i].miny << 16)); + virgl_encoder_write_dword(ctx->cbuf, (ss[i].maxx | ss[i].maxy << 16)); + } + return 0; +} + +void virgl_encoder_set_polygon_stipple(struct virgl_context *ctx, + const struct pipe_poly_stipple *ps) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_POLYGON_STIPPLE, 0, VIRGL_POLYGON_STIPPLE_SIZE)); + for (i = 0; i < VIRGL_POLYGON_STIPPLE_SIZE; i++) { + virgl_encoder_write_dword(ctx->cbuf, ps->stipple[i]); + } +} + +void virgl_encoder_set_sample_mask(struct virgl_context *ctx, + unsigned sample_mask) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SAMPLE_MASK, 0, VIRGL_SET_SAMPLE_MASK_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, sample_mask); +} + +void virgl_encoder_set_clip_state(struct virgl_context *ctx, + const struct pipe_clip_state *clip) +{ + int i, j; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_CLIP_STATE, 0, VIRGL_SET_CLIP_STATE_SIZE)); + for (i = 0; i < VIRGL_MAX_CLIP_PLANES; i++) { + for (j = 0; j < 4; j++) { + virgl_encoder_write_dword(ctx->cbuf, fui(clip->ucp[i][j])); + } + } +} + +int virgl_encode_resource_copy_region(struct virgl_context *ctx, + struct virgl_resource *dst_res, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct virgl_resource *src_res, + unsigned src_level, + const struct pipe_box *src_box) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_RESOURCE_COPY_REGION, 0, VIRGL_CMD_RESOURCE_COPY_REGION_SIZE)); + virgl_encoder_write_res(ctx, dst_res); + virgl_encoder_write_dword(ctx->cbuf, dst_level); + virgl_encoder_write_dword(ctx->cbuf, dstx); + virgl_encoder_write_dword(ctx->cbuf, dsty); + virgl_encoder_write_dword(ctx->cbuf, dstz); + virgl_encoder_write_res(ctx, src_res); + virgl_encoder_write_dword(ctx->cbuf, src_level); + virgl_encoder_write_dword(ctx->cbuf, src_box->x); + virgl_encoder_write_dword(ctx->cbuf, src_box->y); + virgl_encoder_write_dword(ctx->cbuf, src_box->z); + virgl_encoder_write_dword(ctx->cbuf, src_box->width); + virgl_encoder_write_dword(ctx->cbuf, src_box->height); + virgl_encoder_write_dword(ctx->cbuf, src_box->depth); + return 0; +} + +int virgl_encode_blit(struct virgl_context *ctx, + struct virgl_resource *dst_res, + struct virgl_resource *src_res, + const struct pipe_blit_info *blit) +{ + uint32_t tmp; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BLIT, 0, VIRGL_CMD_BLIT_SIZE)); + tmp = VIRGL_CMD_BLIT_S0_MASK(blit->mask) | + VIRGL_CMD_BLIT_S0_FILTER(blit->filter) | + VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(blit->scissor_enable); + virgl_encoder_write_dword(ctx->cbuf, tmp); + virgl_encoder_write_dword(ctx->cbuf, (blit->scissor.minx | blit->scissor.miny << 16)); + virgl_encoder_write_dword(ctx->cbuf, (blit->scissor.maxx | blit->scissor.maxy << 16)); + + virgl_encoder_write_res(ctx, dst_res); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.level); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.format); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.x); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.y); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.z); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.width); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.height); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.depth); + + virgl_encoder_write_res(ctx, src_res); + virgl_encoder_write_dword(ctx->cbuf, blit->src.level); + virgl_encoder_write_dword(ctx->cbuf, blit->src.format); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.x); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.y); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.z); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.width); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.height); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.depth); + return 0; +} + +int virgl_encoder_create_query(struct virgl_context *ctx, + uint32_t handle, + uint query_type, + uint query_index, + struct virgl_resource *res, + uint32_t offset) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_QUERY, VIRGL_OBJ_QUERY_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_dword(ctx->cbuf, ((query_type & 0xffff) | (query_index << 16))); + virgl_encoder_write_dword(ctx->cbuf, offset); + virgl_encoder_write_res(ctx, res); + return 0; +} + +int virgl_encoder_begin_query(struct virgl_context *ctx, + uint32_t handle) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BEGIN_QUERY, 0, 1)); + virgl_encoder_write_dword(ctx->cbuf, handle); + return 0; +} + +int virgl_encoder_end_query(struct virgl_context *ctx, + uint32_t handle) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_END_QUERY, 0, 1)); + virgl_encoder_write_dword(ctx->cbuf, handle); + return 0; +} + +int virgl_encoder_get_query_result(struct virgl_context *ctx, + uint32_t handle, boolean wait) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_GET_QUERY_RESULT, 0, 2)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_dword(ctx->cbuf, wait ? 1 : 0); + return 0; +} + +int virgl_encoder_render_condition(struct virgl_context *ctx, + uint32_t handle, boolean condition, + uint mode) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_RENDER_CONDITION, 0, VIRGL_RENDER_CONDITION_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_dword(ctx->cbuf, condition); + virgl_encoder_write_dword(ctx->cbuf, mode); + return 0; +} + +int virgl_encoder_set_so_targets(struct virgl_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_bitmask) +{ + int i; + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_STREAMOUT_TARGETS, 0, num_targets + 1)); + virgl_encoder_write_dword(ctx->cbuf, append_bitmask); + for (i = 0; i < num_targets; i++) { + struct virgl_so_target *tg = virgl_so_target(targets[i]); + virgl_encoder_write_dword(ctx->cbuf, tg->handle); + } + return 0; +} + + +int virgl_encoder_set_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SUB_CTX, 0, 1)); + virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id); + return 0; +} + +int virgl_encoder_create_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_SUB_CTX, 0, 1)); + virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id); + return 0; +} + +int virgl_encoder_destroy_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DESTROY_SUB_CTX, 0, 1)); + virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id); + return 0; +} + +int virgl_encode_bind_shader(struct virgl_context *ctx, + uint32_t handle, uint32_t type) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_SHADER, 0, 2)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_dword(ctx->cbuf, type); + return 0; +} diff --git a/src/gallium/drivers/virgl/virgl_encode.h b/src/gallium/drivers/virgl/virgl_encode.h new file mode 100644 index 00000000000..030bcd6d16e --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_encode.h @@ -0,0 +1,247 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_ENCODE_H +#define VIRGL_ENCODE_H + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "virgl_winsys.h" + +struct tgsi_token; + +struct virgl_context; +struct virgl_resource; +struct virgl_sampler_view; + +struct virgl_surface { + struct pipe_surface base; + uint32_t handle; +}; + +static inline struct virgl_surface *virgl_surface(struct pipe_surface *surf) +{ + return (struct virgl_surface *)surf; +} + +static inline void virgl_encoder_write_dword(struct virgl_cmd_buf *state, + uint32_t dword) +{ + state->buf[state->cdw++] = dword; +} + +static inline void virgl_encoder_write_qword(struct virgl_cmd_buf *state, + uint64_t qword) +{ + memcpy(state->buf + state->cdw, &qword, sizeof(uint64_t)); + state->cdw += 2; +} + +static inline void virgl_encoder_write_block(struct virgl_cmd_buf *state, + const uint8_t *ptr, uint32_t len) +{ + int x; + memcpy(state->buf + state->cdw, ptr, len); + x = (len % 4); +// fprintf(stderr, "[%d] block %d x is %d\n", state->cdw, len, x); + if (x) { + uint8_t *mp = (uint8_t *)(state->buf + state->cdw); + mp += len; + memset(mp, 0, x); + } + state->cdw += (len + 3) / 4; +} + +extern int virgl_encode_blend_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_blend_state *blend_state); +extern int virgl_encode_rasterizer_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_rasterizer_state *state); + +extern int virgl_encode_shader_state(struct virgl_context *ctx, + uint32_t handle, + uint32_t type, + const struct pipe_stream_output_info *so_info, + const struct tgsi_token *tokens); + +int virgl_encode_stream_output_info(struct virgl_context *ctx, + uint32_t handle, + uint32_t type, + const struct pipe_shader_state *shader); + +int virgl_encoder_set_so_targets(struct virgl_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_bitmask); + +int virgl_encoder_create_so_target(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + unsigned buffer_offset, + unsigned buffer_size); + +int virgl_encode_clear(struct virgl_context *ctx, + unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil); + +int virgl_encode_bind_object(struct virgl_context *ctx, + uint32_t handle, uint32_t object); +int virgl_encode_delete_object(struct virgl_context *ctx, + uint32_t handle, uint32_t object); + +int virgl_encoder_set_framebuffer_state(struct virgl_context *ctx, + const struct pipe_framebuffer_state *state); +int virgl_encoder_set_viewport_states(struct virgl_context *ctx, + int start_slot, + int num_viewports, + const struct pipe_viewport_state *states); + +int virgl_encoder_draw_vbo(struct virgl_context *ctx, + const struct pipe_draw_info *info); + + +int virgl_encoder_create_surface(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + const struct pipe_surface *templat); + +int virgl_encoder_flush_frontbuffer(struct virgl_context *ctx, + struct virgl_resource *res); + +int virgl_encoder_create_vertex_elements(struct virgl_context *ctx, + uint32_t handle, + unsigned num_elements, + const struct pipe_vertex_element *element); + +int virgl_encoder_set_vertex_buffers(struct virgl_context *ctx, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers); + + +int virgl_encoder_inline_write(struct virgl_context *ctx, + struct virgl_resource *res, + unsigned level, unsigned usage, + const struct pipe_box *box, + const void *data, unsigned stride, + unsigned layer_stride); +int virgl_encode_sampler_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_sampler_state *state); +int virgl_encode_sampler_view(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + const struct pipe_sampler_view *state); + +int virgl_encode_set_sampler_views(struct virgl_context *ctx, + uint32_t shader_type, + uint32_t start_slot, + uint32_t num_views, + struct virgl_sampler_view **views); + +int virgl_encode_bind_sampler_states(struct virgl_context *ctx, + uint32_t shader_type, + uint32_t start_slot, + uint32_t num_handles, + uint32_t *handles); + +int virgl_encoder_set_index_buffer(struct virgl_context *ctx, + const struct pipe_index_buffer *ib); + +uint32_t virgl_object_assign_handle(void); + +int virgl_encoder_write_constant_buffer(struct virgl_context *ctx, + uint32_t shader, + uint32_t index, + uint32_t size, + const void *data); + +int virgl_encoder_set_uniform_buffer(struct virgl_context *ctx, + uint32_t shader, + uint32_t index, + uint32_t offset, + uint32_t length, + struct virgl_resource *res); +int virgl_encode_dsa_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_depth_stencil_alpha_state *dsa_state); + +int virgl_encoder_set_stencil_ref(struct virgl_context *ctx, + const struct pipe_stencil_ref *ref); + +int virgl_encoder_set_blend_color(struct virgl_context *ctx, + const struct pipe_blend_color *color); + +int virgl_encoder_set_scissor_state(struct virgl_context *ctx, + unsigned start_slot, + int num_scissors, + const struct pipe_scissor_state *ss); + +void virgl_encoder_set_polygon_stipple(struct virgl_context *ctx, + const struct pipe_poly_stipple *ps); + +void virgl_encoder_set_sample_mask(struct virgl_context *ctx, + unsigned sample_mask); + +void virgl_encoder_set_clip_state(struct virgl_context *ctx, + const struct pipe_clip_state *clip); + +int virgl_encode_resource_copy_region(struct virgl_context *ctx, + struct virgl_resource *dst_res, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct virgl_resource *src_res, + unsigned src_level, + const struct pipe_box *src_box); + +int virgl_encode_blit(struct virgl_context *ctx, + struct virgl_resource *dst_res, + struct virgl_resource *src_res, + const struct pipe_blit_info *blit); + +int virgl_encoder_create_query(struct virgl_context *ctx, + uint32_t handle, + uint query_type, + uint query_index, + struct virgl_resource *res, + uint32_t offset); + +int virgl_encoder_begin_query(struct virgl_context *ctx, + uint32_t handle); +int virgl_encoder_end_query(struct virgl_context *ctx, + uint32_t handle); +int virgl_encoder_get_query_result(struct virgl_context *ctx, + uint32_t handle, boolean wait); + +int virgl_encoder_render_condition(struct virgl_context *ctx, + uint32_t handle, boolean condition, + uint mode); + +int virgl_encoder_set_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id); +int virgl_encoder_create_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id); +int virgl_encoder_destroy_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id); + +int virgl_encode_bind_shader(struct virgl_context *ctx, + uint32_t handle, uint32_t type); +#endif diff --git a/src/gallium/drivers/virgl/virgl_hw.h b/src/gallium/drivers/virgl/virgl_hw.h new file mode 100644 index 00000000000..e3c56db2ac6 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_hw.h @@ -0,0 +1,286 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_HW_H +#define VIRGL_HW_H + +struct virgl_box { + uint32_t x, y, z; + uint32_t w, h, d; +}; + +/* formats known by the HW device - based on gallium subset */ +enum virgl_formats { + VIRGL_FORMAT_B8G8R8A8_UNORM = 1, + VIRGL_FORMAT_B8G8R8X8_UNORM = 2, + VIRGL_FORMAT_A8R8G8B8_UNORM = 3, + VIRGL_FORMAT_X8R8G8B8_UNORM = 4, + VIRGL_FORMAT_B5G5R5A1_UNORM = 5, + VIRGL_FORMAT_B4G4R4A4_UNORM = 6, + VIRGL_FORMAT_B5G6R5_UNORM = 7, + VIRGL_FORMAT_L8_UNORM = 9, /**< ubyte luminance */ + VIRGL_FORMAT_A8_UNORM = 10, /**< ubyte alpha */ + VIRGL_FORMAT_L8A8_UNORM = 12, /**< ubyte alpha, luminance */ + VIRGL_FORMAT_L16_UNORM = 13, /**< ushort luminance */ + + VIRGL_FORMAT_Z16_UNORM = 16, + VIRGL_FORMAT_Z32_UNORM = 17, + VIRGL_FORMAT_Z32_FLOAT = 18, + VIRGL_FORMAT_Z24_UNORM_S8_UINT = 19, + VIRGL_FORMAT_S8_UINT_Z24_UNORM = 20, + VIRGL_FORMAT_Z24X8_UNORM = 21, + VIRGL_FORMAT_S8_UINT = 23, /**< ubyte stencil */ + + VIRGL_FORMAT_R32_FLOAT = 28, + VIRGL_FORMAT_R32G32_FLOAT = 29, + VIRGL_FORMAT_R32G32B32_FLOAT = 30, + VIRGL_FORMAT_R32G32B32A32_FLOAT = 31, + + VIRGL_FORMAT_R16_UNORM = 48, + VIRGL_FORMAT_R16G16_UNORM = 49, + + VIRGL_FORMAT_R16G16B16A16_UNORM = 51, + + VIRGL_FORMAT_R16_SNORM = 56, + VIRGL_FORMAT_R16G16_SNORM = 57, + VIRGL_FORMAT_R16G16B16A16_SNORM = 59, + + VIRGL_FORMAT_R8_UNORM = 64, + VIRGL_FORMAT_R8G8_UNORM = 65, + + VIRGL_FORMAT_R8G8B8A8_UNORM = 67, + + VIRGL_FORMAT_R8_SNORM = 74, + VIRGL_FORMAT_R8G8_SNORM = 75, + VIRGL_FORMAT_R8G8B8_SNORM = 76, + VIRGL_FORMAT_R8G8B8A8_SNORM = 77, + + VIRGL_FORMAT_R16_FLOAT = 91, + VIRGL_FORMAT_R16G16_FLOAT = 92, + VIRGL_FORMAT_R16G16B16_FLOAT = 93, + VIRGL_FORMAT_R16G16B16A16_FLOAT = 94, + + VIRGL_FORMAT_L8_SRGB = 95, + VIRGL_FORMAT_L8A8_SRGB = 96, + VIRGL_FORMAT_B8G8R8A8_SRGB = 100, + VIRGL_FORMAT_B8G8R8X8_SRGB = 101, + + /* compressed formats */ + VIRGL_FORMAT_DXT1_RGB = 105, + VIRGL_FORMAT_DXT1_RGBA = 106, + VIRGL_FORMAT_DXT3_RGBA = 107, + VIRGL_FORMAT_DXT5_RGBA = 108, + + /* sRGB, compressed */ + VIRGL_FORMAT_DXT1_SRGB = 109, + VIRGL_FORMAT_DXT1_SRGBA = 110, + VIRGL_FORMAT_DXT3_SRGBA = 111, + VIRGL_FORMAT_DXT5_SRGBA = 112, + + /* rgtc compressed */ + VIRGL_FORMAT_RGTC1_UNORM = 113, + VIRGL_FORMAT_RGTC1_SNORM = 114, + VIRGL_FORMAT_RGTC2_UNORM = 115, + VIRGL_FORMAT_RGTC2_SNORM = 116, + + VIRGL_FORMAT_A8B8G8R8_UNORM = 121, + VIRGL_FORMAT_B5G5R5X1_UNORM = 122, + VIRGL_FORMAT_R11G11B10_FLOAT = 124, + VIRGL_FORMAT_R9G9B9E5_FLOAT = 125, + VIRGL_FORMAT_Z32_FLOAT_S8X24_UINT = 126, + + VIRGL_FORMAT_B10G10R10A2_UNORM = 131, + VIRGL_FORMAT_R8G8B8X8_UNORM = 134, + VIRGL_FORMAT_B4G4R4X4_UNORM = 135, + VIRGL_FORMAT_B2G3R3_UNORM = 139, + + VIRGL_FORMAT_L16A16_UNORM = 140, + VIRGL_FORMAT_A16_UNORM = 141, + + VIRGL_FORMAT_A8_SNORM = 147, + VIRGL_FORMAT_L8_SNORM = 148, + VIRGL_FORMAT_L8A8_SNORM = 149, + + VIRGL_FORMAT_A16_SNORM = 151, + VIRGL_FORMAT_L16_SNORM = 152, + VIRGL_FORMAT_L16A16_SNORM = 153, + + VIRGL_FORMAT_A16_FLOAT = 155, + VIRGL_FORMAT_L16_FLOAT = 156, + VIRGL_FORMAT_L16A16_FLOAT = 157, + + VIRGL_FORMAT_A32_FLOAT = 159, + VIRGL_FORMAT_L32_FLOAT = 160, + VIRGL_FORMAT_L32A32_FLOAT = 161, + + VIRGL_FORMAT_R8_UINT = 177, + VIRGL_FORMAT_R8G8_UINT = 178, + VIRGL_FORMAT_R8G8B8_UINT = 179, + VIRGL_FORMAT_R8G8B8A8_UINT = 180, + + VIRGL_FORMAT_R8_SINT = 181, + VIRGL_FORMAT_R8G8_SINT = 182, + VIRGL_FORMAT_R8G8B8_SINT = 183, + VIRGL_FORMAT_R8G8B8A8_SINT = 184, + + VIRGL_FORMAT_R16_UINT = 185, + VIRGL_FORMAT_R16G16_UINT = 186, + VIRGL_FORMAT_R16G16B16_UINT = 187, + VIRGL_FORMAT_R16G16B16A16_UINT = 188, + + VIRGL_FORMAT_R16_SINT = 189, + VIRGL_FORMAT_R16G16_SINT = 190, + VIRGL_FORMAT_R16G16B16_SINT = 191, + VIRGL_FORMAT_R16G16B16A16_SINT = 192, + VIRGL_FORMAT_R32_UINT = 193, + VIRGL_FORMAT_R32G32_UINT = 194, + VIRGL_FORMAT_R32G32B32_UINT = 195, + VIRGL_FORMAT_R32G32B32A32_UINT = 196, + + VIRGL_FORMAT_R32_SINT = 197, + VIRGL_FORMAT_R32G32_SINT = 198, + VIRGL_FORMAT_R32G32B32_SINT = 199, + VIRGL_FORMAT_R32G32B32A32_SINT = 200, + + VIRGL_FORMAT_A8_UINT = 201, + VIRGL_FORMAT_L8_UINT = 203, + VIRGL_FORMAT_L8A8_UINT = 204, + + VIRGL_FORMAT_A8_SINT = 205, + VIRGL_FORMAT_L8_SINT = 207, + VIRGL_FORMAT_L8A8_SINT = 208, + + VIRGL_FORMAT_A16_UINT = 209, + VIRGL_FORMAT_L16_UINT = 211, + VIRGL_FORMAT_L16A16_UINT = 212, + + VIRGL_FORMAT_A16_SINT = 213, + VIRGL_FORMAT_L16_SINT = 215, + VIRGL_FORMAT_L16A16_SINT = 216, + + VIRGL_FORMAT_A32_UINT = 217, + VIRGL_FORMAT_L32_UINT = 219, + VIRGL_FORMAT_L32A32_UINT = 220, + + VIRGL_FORMAT_A32_SINT = 221, + VIRGL_FORMAT_L32_SINT = 223, + VIRGL_FORMAT_L32A32_SINT = 224, + + VIRGL_FORMAT_B10G10R10A2_UINT = 225, + VIRGL_FORMAT_R8G8B8X8_SNORM = 229, + + VIRGL_FORMAT_R8G8B8X8_SRGB = 230, + + VIRGL_FORMAT_B10G10R10X2_UNORM = 233, + VIRGL_FORMAT_R16G16B16X16_UNORM = 234, + VIRGL_FORMAT_R16G16B16X16_SNORM = 235, + VIRGL_FORMAT_MAX, +}; + +#define VIRGL_BIND_DEPTH_STENCIL (1 << 0) +#define VIRGL_BIND_RENDER_TARGET (1 << 1) +#define VIRGL_BIND_SAMPLER_VIEW (1 << 3) +#define VIRGL_BIND_VERTEX_BUFFER (1 << 4) +#define VIRGL_BIND_INDEX_BUFFER (1 << 5) +#define VIRGL_BIND_CONSTANT_BUFFER (1 << 6) +#define VIRGL_BIND_DISPLAY_TARGET (1 << 7) +#define VIRGL_BIND_STREAM_OUTPUT (1 << 11) +#define VIRGL_BIND_CURSOR (1 << 16) +#define VIRGL_BIND_CUSTOM (1 << 17) +#define VIRGL_BIND_SCANOUT (1 << 18) + +struct virgl_caps_bool_set1 { + unsigned indep_blend_enable:1; + unsigned indep_blend_func:1; + unsigned cube_map_array:1; + unsigned shader_stencil_export:1; + unsigned conditional_render:1; + unsigned start_instance:1; + unsigned primitive_restart:1; + unsigned blend_eq_sep:1; + unsigned instanceid:1; + unsigned vertex_element_instance_divisor:1; + unsigned seamless_cube_map:1; + unsigned occlusion_query:1; + unsigned timer_query:1; + unsigned streamout_pause_resume:1; + unsigned texture_multisample:1; + unsigned fragment_coord_conventions:1; + unsigned depth_clip_disable:1; + unsigned seamless_cube_map_per_texture:1; + unsigned ubo:1; + unsigned color_clamping:1; /* not in GL 3.1 core profile */ + unsigned poly_stipple:1; /* not in GL 3.1 core profile */ + unsigned mirror_clamp:1; + unsigned texture_query_lod:1; +}; + +/* endless expansion capabilites - current gallium has 252 formats */ +struct virgl_supported_format_mask { + uint32_t bitmask[16]; +}; +/* capabilities set 2 - version 1 - 32-bit and float values */ +struct virgl_caps_v1 { + uint32_t max_version; + struct virgl_supported_format_mask sampler; + struct virgl_supported_format_mask render; + struct virgl_supported_format_mask depthstencil; + struct virgl_supported_format_mask vertexbuffer; + struct virgl_caps_bool_set1 bset; + uint32_t glsl_level; + uint32_t max_texture_array_layers; + uint32_t max_streamout_buffers; + uint32_t max_dual_source_render_targets; + uint32_t max_render_targets; + uint32_t max_samples; + uint32_t prim_mask; + uint32_t max_tbo_size; + uint32_t max_uniform_blocks; + uint32_t max_viewports; + uint32_t max_texture_gather_components; +}; + +union virgl_caps { + uint32_t max_version; + struct virgl_caps_v1 v1; +}; + +enum virgl_errors { + VIRGL_ERROR_NONE, + VIRGL_ERROR_UNKNOWN, + VIRGL_ERROR_UNKNOWN_RESOURCE_FORMAT, +}; + +enum virgl_ctx_errors { + VIRGL_ERROR_CTX_NONE, + VIRGL_ERROR_CTX_UNKNOWN, + VIRGL_ERROR_CTX_ILLEGAL_SHADER, + VIRGL_ERROR_CTX_ILLEGAL_HANDLE, + VIRGL_ERROR_CTX_ILLEGAL_RESOURCE, + VIRGL_ERROR_CTX_ILLEGAL_SURFACE, + VIRGL_ERROR_CTX_ILLEGAL_VERTEX_FORMAT, + VIRGL_ERROR_CTX_ILLEGAL_CMD_BUFFER, +}; + + +#define VIRGL_RESOURCE_Y_0_TOP (1 << 0) +#endif diff --git a/src/gallium/drivers/virgl/virgl_protocol.h b/src/gallium/drivers/virgl/virgl_protocol.h new file mode 100644 index 00000000000..ca3142f5f72 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_protocol.h @@ -0,0 +1,468 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_PROTOCOL_H +#define VIRGL_PROTOCOL_H + +#define VIRGL_QUERY_STATE_NEW 0 +#define VIRGL_QUERY_STATE_DONE 1 +#define VIRGL_QUERY_STATE_WAIT_HOST 2 + +struct virgl_host_query_state { + uint32_t query_state; + uint32_t result_size; + uint64_t result; +}; + +enum virgl_object_type { + VIRGL_OBJECT_NULL, + VIRGL_OBJECT_BLEND, + VIRGL_OBJECT_RASTERIZER, + VIRGL_OBJECT_DSA, + VIRGL_OBJECT_SHADER, + VIRGL_OBJECT_VERTEX_ELEMENTS, + VIRGL_OBJECT_SAMPLER_VIEW, + VIRGL_OBJECT_SAMPLER_STATE, + VIRGL_OBJECT_SURFACE, + VIRGL_OBJECT_QUERY, + VIRGL_OBJECT_STREAMOUT_TARGET, + VIRGL_MAX_OBJECTS, +}; + +/* context cmds to be encoded in the command stream */ +enum virgl_context_cmd { + VIRGL_CCMD_NOP = 0, + VIRGL_CCMD_CREATE_OBJECT = 1, + VIRGL_CCMD_BIND_OBJECT, + VIRGL_CCMD_DESTROY_OBJECT, + VIRGL_CCMD_SET_VIEWPORT_STATE, + VIRGL_CCMD_SET_FRAMEBUFFER_STATE, + VIRGL_CCMD_SET_VERTEX_BUFFERS, + VIRGL_CCMD_CLEAR, + VIRGL_CCMD_DRAW_VBO, + VIRGL_CCMD_RESOURCE_INLINE_WRITE, + VIRGL_CCMD_SET_SAMPLER_VIEWS, + VIRGL_CCMD_SET_INDEX_BUFFER, + VIRGL_CCMD_SET_CONSTANT_BUFFER, + VIRGL_CCMD_SET_STENCIL_REF, + VIRGL_CCMD_SET_BLEND_COLOR, + VIRGL_CCMD_SET_SCISSOR_STATE, + VIRGL_CCMD_BLIT, + VIRGL_CCMD_RESOURCE_COPY_REGION, + VIRGL_CCMD_BIND_SAMPLER_STATES, + VIRGL_CCMD_BEGIN_QUERY, + VIRGL_CCMD_END_QUERY, + VIRGL_CCMD_GET_QUERY_RESULT, + VIRGL_CCMD_SET_POLYGON_STIPPLE, + VIRGL_CCMD_SET_CLIP_STATE, + VIRGL_CCMD_SET_SAMPLE_MASK, + VIRGL_CCMD_SET_STREAMOUT_TARGETS, + VIRGL_CCMD_SET_RENDER_CONDITION, + VIRGL_CCMD_SET_UNIFORM_BUFFER, + + VIRGL_CCMD_SET_SUB_CTX, + VIRGL_CCMD_CREATE_SUB_CTX, + VIRGL_CCMD_DESTROY_SUB_CTX, + VIRGL_CCMD_BIND_SHADER, +}; + +/* + 8-bit cmd headers + 8-bit object type + 16-bit length +*/ + +#define VIRGL_CMD0(cmd, obj, len) ((cmd) | ((obj) << 8) | ((len) << 16)) + +/* hw specification */ +#define VIRGL_MAX_COLOR_BUFS 8 +#define VIRGL_MAX_CLIP_PLANES 8 + +#define VIRGL_OBJ_CREATE_HEADER 0 +#define VIRGL_OBJ_CREATE_HANDLE 1 + +#define VIRGL_OBJ_BIND_HEADER 0 +#define VIRGL_OBJ_BIND_HANDLE 1 + +#define VIRGL_OBJ_DESTROY_HANDLE 1 + +/* some of these defines are a specification - not used in the code */ +/* bit offsets for blend state object */ +#define VIRGL_OBJ_BLEND_SIZE (VIRGL_MAX_COLOR_BUFS + 3) +#define VIRGL_OBJ_BLEND_HANDLE 1 +#define VIRGL_OBJ_BLEND_S0 2 +#define VIRGL_OBJ_BLEND_S0_INDEPENDENT_BLEND_ENABLE(x) ((x) & 0x1 << 0) +#define VIRGL_OBJ_BLEND_S0_LOGICOP_ENABLE(x) (((x) & 0x1) << 1) +#define VIRGL_OBJ_BLEND_S0_DITHER(x) (((x) & 0x1) << 2) +#define VIRGL_OBJ_BLEND_S0_ALPHA_TO_COVERAGE(x) (((x) & 0x1) << 3) +#define VIRGL_OBJ_BLEND_S0_ALPHA_TO_ONE(x) (((x) & 0x1) << 4) +#define VIRGL_OBJ_BLEND_S1 3 +#define VIRGL_OBJ_BLEND_S1_LOGICOP_FUNC(x) (((x) & 0xf) << 0) +/* repeated once per number of cbufs */ + +#define VIRGL_OBJ_BLEND_S2(cbuf) (4 + (cbuf)) +#define VIRGL_OBJ_BLEND_S2_RT_BLEND_ENABLE(x) (((x) & 0x1) << 0) +#define VIRGL_OBJ_BLEND_S2_RT_RGB_FUNC(x) (((x) & 0x7) << 1) +#define VIRGL_OBJ_BLEND_S2_RT_RGB_SRC_FACTOR(x) (((x) & 0x1f) << 4) +#define VIRGL_OBJ_BLEND_S2_RT_RGB_DST_FACTOR(x) (((x) & 0x1f) << 9) +#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_FUNC(x) (((x) & 0x7) << 14) +#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_SRC_FACTOR(x) (((x) & 0x1f) << 17) +#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_DST_FACTOR(x) (((x) & 0x1f) << 22) +#define VIRGL_OBJ_BLEND_S2_RT_COLORMASK(x) (((x) & 0xf) << 27) + +/* bit offsets for DSA state */ +#define VIRGL_OBJ_DSA_SIZE 5 +#define VIRGL_OBJ_DSA_HANDLE 1 +#define VIRGL_OBJ_DSA_S0 2 +#define VIRGL_OBJ_DSA_S0_DEPTH_ENABLE(x) (((x) & 0x1) << 0) +#define VIRGL_OBJ_DSA_S0_DEPTH_WRITEMASK(x) (((x) & 0x1) << 1) +#define VIRGL_OBJ_DSA_S0_DEPTH_FUNC(x) (((x) & 0x7) << 2) +#define VIRGL_OBJ_DSA_S0_ALPHA_ENABLED(x) (((x) & 0x1) << 8) +#define VIRGL_OBJ_DSA_S0_ALPHA_FUNC(x) (((x) & 0x7) << 9) +#define VIRGL_OBJ_DSA_S1 3 +#define VIRGL_OBJ_DSA_S2 4 +#define VIRGL_OBJ_DSA_S1_STENCIL_ENABLED(x) (((x) & 0x1) << 0) +#define VIRGL_OBJ_DSA_S1_STENCIL_FUNC(x) (((x) & 0x7) << 1) +#define VIRGL_OBJ_DSA_S1_STENCIL_FAIL_OP(x) (((x) & 0x7) << 4) +#define VIRGL_OBJ_DSA_S1_STENCIL_ZPASS_OP(x) (((x) & 0x7) << 7) +#define VIRGL_OBJ_DSA_S1_STENCIL_ZFAIL_OP(x) (((x) & 0x7) << 10) +#define VIRGL_OBJ_DSA_S1_STENCIL_VALUEMASK(x) (((x) & 0xff) << 13) +#define VIRGL_OBJ_DSA_S1_STENCIL_WRITEMASK(x) (((x) & 0xff) << 21) +#define VIRGL_OBJ_DSA_ALPHA_REF 5 + +/* offsets for rasterizer state */ +#define VIRGL_OBJ_RS_SIZE 9 +#define VIRGL_OBJ_RS_HANDLE 1 +#define VIRGL_OBJ_RS_S0 2 +#define VIRGL_OBJ_RS_S0_FLATSHADE(x) (((x) & 0x1) << 0) +#define VIRGL_OBJ_RS_S0_DEPTH_CLIP(x) (((x) & 0x1) << 1) +#define VIRGL_OBJ_RS_S0_CLIP_HALFZ(x) (((x) & 0x1) << 2) +#define VIRGL_OBJ_RS_S0_RASTERIZER_DISCARD(x) (((x) & 0x1) << 3) +#define VIRGL_OBJ_RS_S0_FLATSHADE_FIRST(x) (((x) & 0x1) << 4) +#define VIRGL_OBJ_RS_S0_LIGHT_TWOSIZE(x) (((x) & 0x1) << 5) +#define VIRGL_OBJ_RS_S0_SPRITE_COORD_MODE(x) (((x) & 0x1) << 6) +#define VIRGL_OBJ_RS_S0_POINT_QUAD_RASTERIZATION(x) (((x) & 0x1) << 7) +#define VIRGL_OBJ_RS_S0_CULL_FACE(x) (((x) & 0x3) << 8) +#define VIRGL_OBJ_RS_S0_FILL_FRONT(x) (((x) & 0x3) << 10) +#define VIRGL_OBJ_RS_S0_FILL_BACK(x) (((x) & 0x3) << 12) +#define VIRGL_OBJ_RS_S0_SCISSOR(x) (((x) & 0x1) << 14) +#define VIRGL_OBJ_RS_S0_FRONT_CCW(x) (((x) & 0x1) << 15) +#define VIRGL_OBJ_RS_S0_CLAMP_VERTEX_COLOR(x) (((x) & 0x1) << 16) +#define VIRGL_OBJ_RS_S0_CLAMP_FRAGMENT_COLOR(x) (((x) & 0x1) << 17) +#define VIRGL_OBJ_RS_S0_OFFSET_LINE(x) (((x) & 0x1) << 18) +#define VIRGL_OBJ_RS_S0_OFFSET_POINT(x) (((x) & 0x1) << 19) +#define VIRGL_OBJ_RS_S0_OFFSET_TRI(x) (((x) & 0x1) << 20) +#define VIRGL_OBJ_RS_S0_POLY_SMOOTH(x) (((x) & 0x1) << 21) +#define VIRGL_OBJ_RS_S0_POLY_STIPPLE_ENABLE(x) (((x) & 0x1) << 22) +#define VIRGL_OBJ_RS_S0_POINT_SMOOTH(x) (((x) & 0x1) << 23) +#define VIRGL_OBJ_RS_S0_POINT_SIZE_PER_VERTEX(x) (((x) & 0x1) << 24) +#define VIRGL_OBJ_RS_S0_MULTISAMPLE(x) (((x) & 0x1) << 25) +#define VIRGL_OBJ_RS_S0_LINE_SMOOTH(x) (((x) & 0x1) << 26) +#define VIRGL_OBJ_RS_S0_LINE_STIPPLE_ENABLE(x) (((x) & 0x1) << 27) +#define VIRGL_OBJ_RS_S0_LINE_LAST_PIXEL(x) (((x) & 0x1) << 28) +#define VIRGL_OBJ_RS_S0_HALF_PIXEL_CENTER(x) (((x) & 0x1) << 29) +#define VIRGL_OBJ_RS_S0_BOTTOM_EDGE_RULE(x) (((x) & 0x1) << 30) + +#define VIRGL_OBJ_RS_POINT_SIZE 3 +#define VIRGL_OBJ_RS_SPRITE_COORD_ENABLE 4 +#define VIRGL_OBJ_RS_S3 5 + +#define VIRGL_OBJ_RS_S3_LINE_STIPPLE_PATTERN(x) (((x) & 0xffff) << 0) +#define VIRGL_OBJ_RS_S3_LINE_STIPPLE_FACTOR(x) (((x) & 0xff) << 16) +#define VIRGL_OBJ_RS_S3_CLIP_PLANE_ENABLE(x) (((x) & 0xff) << 24) +#define VIRGL_OBJ_RS_LINE_WIDTH 6 +#define VIRGL_OBJ_RS_OFFSET_UNITS 7 +#define VIRGL_OBJ_RS_OFFSET_SCALE 8 +#define VIRGL_OBJ_RS_OFFSET_CLAMP 9 + +#define VIRGL_OBJ_CLEAR_SIZE 8 +#define VIRGL_OBJ_CLEAR_BUFFERS 1 +#define VIRGL_OBJ_CLEAR_COLOR_0 2 /* color is 4 * u32/f32/i32 */ +#define VIRGL_OBJ_CLEAR_COLOR_1 3 +#define VIRGL_OBJ_CLEAR_COLOR_2 4 +#define VIRGL_OBJ_CLEAR_COLOR_3 5 +#define VIRGL_OBJ_CLEAR_DEPTH_0 6 /* depth is a double precision float */ +#define VIRGL_OBJ_CLEAR_DEPTH_1 7 +#define VIRGL_OBJ_CLEAR_STENCIL 8 + +/* shader object */ +#define VIRGL_OBJ_SHADER_HDR_SIZE(nso) (5 + ((nso) ? (2 * nso) + 4 : 0)) +#define VIRGL_OBJ_SHADER_HANDLE 1 +#define VIRGL_OBJ_SHADER_TYPE 2 +#define VIRGL_OBJ_SHADER_OFFSET 3 +#define VIRGL_OBJ_SHADER_OFFSET_VAL(x) (((x) & 0x7fffffff) << 0) +/* start contains full length in VAL - also implies continuations */ +/* continuation contains offset in VAL */ +#define VIRGL_OBJ_SHADER_OFFSET_CONT (0x1 << 31) +#define VIRGL_OBJ_SHADER_NUM_TOKENS 4 +#define VIRGL_OBJ_SHADER_SO_NUM_OUTPUTS 5 +#define VIRGL_OBJ_SHADER_SO_STRIDE(x) (6 + (x)) +#define VIRGL_OBJ_SHADER_SO_OUTPUT0(x) (10 + (x * 2)) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_REGISTER_INDEX(x) (((x) & 0xff) << 0) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_START_COMPONENT(x) (((x) & 0x3) << 8) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_NUM_COMPONENTS(x) (((x) & 0x7) << 10) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_BUFFER(x) (((x) & 0x7) << 13) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_DST_OFFSET(x) (((x) & 0xffff) << 16) +#define VIRGL_OBJ_SHADER_SO_OUTPUT0_SO(x) (11 + (x * 2)) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_STREAM(x) (((x) & 0x03) << 0) + +/* viewport state */ +#define VIRGL_SET_VIEWPORT_STATE_SIZE(num_viewports) ((6 * num_viewports) + 1) +#define VIRGL_SET_VIEWPORT_START_SLOT 1 +#define VIRGL_SET_VIEWPORT_STATE_SCALE_0(x) (2 + (x * 6)) +#define VIRGL_SET_VIEWPORT_STATE_SCALE_1(x) (3 + (x * 6)) +#define VIRGL_SET_VIEWPORT_STATE_SCALE_2(x) (4 + (x * 6)) +#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_0(x) (5 + (x * 6)) +#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_1(x) (6 + (x * 6)) +#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_2(x) (7 + (x * 6)) + +/* framebuffer state */ +#define VIRGL_SET_FRAMEBUFFER_STATE_SIZE(nr_cbufs) (nr_cbufs + 2) +#define VIRGL_SET_FRAMEBUFFER_STATE_NR_CBUFS 1 +#define VIRGL_SET_FRAMEBUFFER_STATE_NR_ZSURF_HANDLE 2 +#define VIRGL_SET_FRAMEBUFFER_STATE_CBUF_HANDLE(x) ((x) + 3) + +/* vertex elements object */ +#define VIRGL_OBJ_VERTEX_ELEMENTS_SIZE(num_elements) (((num_elements) * 4) + 1) +#define VIRGL_OBJ_VERTEX_ELEMENTS_HANDLE 1 +#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_SRC_OFFSET(x) (((x) * 4) + 2) /* repeated per VE */ +#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_INSTANCE_DIVISOR(x) (((x) * 4) + 3) +#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_VERTEX_BUFFER_INDEX(x) (((x) * 4) + 4) +#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_SRC_FORMAT(x) (((x) * 4) + 5) + +/* vertex buffers */ +#define VIRGL_SET_VERTEX_BUFFERS_SIZE(num_buffers) ((num_buffers) * 3) +#define VIRGL_SET_VERTEX_BUFFER_STRIDE(x) (((x) * 3) + 1) +#define VIRGL_SET_VERTEX_BUFFER_OFFSET(x) (((x) * 3) + 2) +#define VIRGL_SET_VERTEX_BUFFER_HANDLE(x) (((x) * 3) + 3) + +/* index buffer */ +#define VIRGL_SET_INDEX_BUFFER_SIZE(ib) (((ib) ? 2 : 0) + 1) +#define VIRGL_SET_INDEX_BUFFER_HANDLE 1 +#define VIRGL_SET_INDEX_BUFFER_INDEX_SIZE 2 /* only if sending an IB handle */ +#define VIRGL_SET_INDEX_BUFFER_OFFSET 3 /* only if sending an IB handle */ + +/* constant buffer */ +#define VIRGL_SET_CONSTANT_BUFFER_SHADER_TYPE 1 +#define VIRGL_SET_CONSTANT_BUFFER_INDEX 2 +#define VIRGL_SET_CONSTANT_BUFFER_DATA_START 3 + +#define VIRGL_SET_UNIFORM_BUFFER_SIZE 5 +#define VIRGL_SET_UNIFORM_BUFFER_SHADER_TYPE 1 +#define VIRGL_SET_UNIFORM_BUFFER_INDEX 2 +#define VIRGL_SET_UNIFORM_BUFFER_OFFSET 3 +#define VIRGL_SET_UNIFORM_BUFFER_LENGTH 4 +#define VIRGL_SET_UNIFORM_BUFFER_RES_HANDLE 5 + +/* draw VBO */ +#define VIRGL_DRAW_VBO_SIZE 12 +#define VIRGL_DRAW_VBO_START 1 +#define VIRGL_DRAW_VBO_COUNT 2 +#define VIRGL_DRAW_VBO_MODE 3 +#define VIRGL_DRAW_VBO_INDEXED 4 +#define VIRGL_DRAW_VBO_INSTANCE_COUNT 5 +#define VIRGL_DRAW_VBO_INDEX_BIAS 6 +#define VIRGL_DRAW_VBO_START_INSTANCE 7 +#define VIRGL_DRAW_VBO_PRIMITIVE_RESTART 8 +#define VIRGL_DRAW_VBO_RESTART_INDEX 9 +#define VIRGL_DRAW_VBO_MIN_INDEX 10 +#define VIRGL_DRAW_VBO_MAX_INDEX 11 +#define VIRGL_DRAW_VBO_COUNT_FROM_SO 12 + +/* create surface */ +#define VIRGL_OBJ_SURFACE_SIZE 5 +#define VIRGL_OBJ_SURFACE_HANDLE 1 +#define VIRGL_OBJ_SURFACE_RES_HANDLE 2 +#define VIRGL_OBJ_SURFACE_FORMAT 3 +#define VIRGL_OBJ_SURFACE_BUFFER_FIRST_ELEMENT 4 +#define VIRGL_OBJ_SURFACE_BUFFER_LAST_ELEMENT 5 +#define VIRGL_OBJ_SURFACE_TEXTURE_LEVEL 4 +#define VIRGL_OBJ_SURFACE_TEXTURE_LAYERS 5 + +/* create streamout target */ +#define VIRGL_OBJ_STREAMOUT_SIZE 4 +#define VIRGL_OBJ_STREAMOUT_HANDLE 1 +#define VIRGL_OBJ_STREAMOUT_RES_HANDLE 2 +#define VIRGL_OBJ_STREAMOUT_BUFFER_OFFSET 3 +#define VIRGL_OBJ_STREAMOUT_BUFFER_SIZE 4 + +/* sampler state */ +#define VIRGL_OBJ_SAMPLER_STATE_SIZE 9 +#define VIRGL_OBJ_SAMPLER_STATE_HANDLE 1 +#define VIRGL_OBJ_SAMPLER_STATE_S0 2 +#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_S(x) (((x) & 0x7) << 0) +#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_T(x) (((x) & 0x7) << 3) +#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_R(x) (((x) & 0x7) << 6) +#define VIRGL_OBJ_SAMPLE_STATE_S0_MIN_IMG_FILTER(x) (((x) & 0x3) << 9) +#define VIRGL_OBJ_SAMPLE_STATE_S0_MIN_MIP_FILTER(x) (((x) & 0x3) << 11) +#define VIRGL_OBJ_SAMPLE_STATE_S0_MAG_IMG_FILTER(x) (((x) & 0x3) << 13) +#define VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_MODE(x) (((x) & 0x1) << 15) +#define VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_FUNC(x) (((x) & 0x7) << 16) + +#define VIRGL_OBJ_SAMPLER_STATE_LOD_BIAS 3 +#define VIRGL_OBJ_SAMPLER_STATE_MIN_LOD 4 +#define VIRGL_OBJ_SAMPLER_STATE_MAX_LOD 5 +#define VIRGL_OBJ_SAMPLER_STATE_BORDER_COLOR(x) ((x) + 6) /* 6 - 9 */ + + +/* sampler view */ +#define VIRGL_OBJ_SAMPLER_VIEW_SIZE 6 +#define VIRGL_OBJ_SAMPLER_VIEW_HANDLE 1 +#define VIRGL_OBJ_SAMPLER_VIEW_RES_HANDLE 2 +#define VIRGL_OBJ_SAMPLER_VIEW_FORMAT 3 +#define VIRGL_OBJ_SAMPLER_VIEW_BUFFER_FIRST_ELEMENT 4 +#define VIRGL_OBJ_SAMPLER_VIEW_BUFFER_LAST_ELEMENT 5 +#define VIRGL_OBJ_SAMPLER_VIEW_TEXTURE_LAYER 4 +#define VIRGL_OBJ_SAMPLER_VIEW_TEXTURE_LEVEL 5 +#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE 6 +#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_R(x) (((x) & 0x7) << 0) +#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_G(x) (((x) & 0x7) << 3) +#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_B(x) (((x) & 0x7) << 6) +#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_A(x) (((x) & 0x7) << 9) + +/* set sampler views */ +#define VIRGL_SET_SAMPLER_VIEWS_SIZE(num_views) ((num_views) + 2) +#define VIRGL_SET_SAMPLER_VIEWS_SHADER_TYPE 1 +#define VIRGL_SET_SAMPLER_VIEWS_START_SLOT 2 +#define VIRGL_SET_SAMPLER_VIEWS_V0_HANDLE 3 + +/* bind sampler states */ +#define VIRGL_BIND_SAMPLER_STATES(num_states) ((num_states) + 2) +#define VIRGL_BIND_SAMPLER_STATES_SHADER_TYPE 1 +#define VIRGL_BIND_SAMPLER_STATES_START_SLOT 2 +#define VIRGL_BIND_SAMPLER_STATES_S0_HANDLE 3 + +/* set stencil reference */ +#define VIRGL_SET_STENCIL_REF_SIZE 1 +#define VIRGL_SET_STENCIL_REF 1 +#define VIRGL_STENCIL_REF_VAL(f, s) ((f & 0xff) | (((s & 0xff) << 8))) + +/* set blend color */ +#define VIRGL_SET_BLEND_COLOR_SIZE 4 +#define VIRGL_SET_BLEND_COLOR(x) ((x) + 1) + +/* set scissor state */ +#define VIRGL_SET_SCISSOR_STATE_SIZE(x) (1 + 2 * x) +#define VIRGL_SET_SCISSOR_START_SLOT 1 +#define VIRGL_SET_SCISSOR_MINX_MINY(x) (2 + (x * 2)) +#define VIRGL_SET_SCISSOR_MAXX_MAXY(x) (3 + (x * 2)) + +/* resource copy region */ +#define VIRGL_CMD_RESOURCE_COPY_REGION_SIZE 13 +#define VIRGL_CMD_RCR_DST_RES_HANDLE 1 +#define VIRGL_CMD_RCR_DST_LEVEL 2 +#define VIRGL_CMD_RCR_DST_X 3 +#define VIRGL_CMD_RCR_DST_Y 4 +#define VIRGL_CMD_RCR_DST_Z 5 +#define VIRGL_CMD_RCR_SRC_RES_HANDLE 6 +#define VIRGL_CMD_RCR_SRC_LEVEL 7 +#define VIRGL_CMD_RCR_SRC_X 8 +#define VIRGL_CMD_RCR_SRC_Y 9 +#define VIRGL_CMD_RCR_SRC_Z 10 +#define VIRGL_CMD_RCR_SRC_W 11 +#define VIRGL_CMD_RCR_SRC_H 12 +#define VIRGL_CMD_RCR_SRC_D 13 + +/* blit */ +#define VIRGL_CMD_BLIT_SIZE 21 +#define VIRGL_CMD_BLIT_S0 1 +#define VIRGL_CMD_BLIT_S0_MASK(x) (((x) & 0xff) << 0) +#define VIRGL_CMD_BLIT_S0_FILTER(x) (((x) & 0x3) << 8) +#define VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(x) (((x) & 0x1) << 10) +#define VIRGL_CMD_BLIT_SCISSOR_MINX_MINY 2 +#define VIRGL_CMD_BLIT_SCISSOR_MAXX_MAXY 3 +#define VIRGL_CMD_BLIT_DST_RES_HANDLE 4 +#define VIRGL_CMD_BLIT_DST_LEVEL 5 +#define VIRGL_CMD_BLIT_DST_FORMAT 6 +#define VIRGL_CMD_BLIT_DST_X 7 +#define VIRGL_CMD_BLIT_DST_Y 8 +#define VIRGL_CMD_BLIT_DST_Z 9 +#define VIRGL_CMD_BLIT_DST_W 10 +#define VIRGL_CMD_BLIT_DST_H 11 +#define VIRGL_CMD_BLIT_DST_D 12 +#define VIRGL_CMD_BLIT_SRC_RES_HANDLE 13 +#define VIRGL_CMD_BLIT_SRC_LEVEL 14 +#define VIRGL_CMD_BLIT_SRC_FORMAT 15 +#define VIRGL_CMD_BLIT_SRC_X 16 +#define VIRGL_CMD_BLIT_SRC_Y 17 +#define VIRGL_CMD_BLIT_SRC_Z 18 +#define VIRGL_CMD_BLIT_SRC_W 19 +#define VIRGL_CMD_BLIT_SRC_H 20 +#define VIRGL_CMD_BLIT_SRC_D 21 + +/* query object */ +#define VIRGL_OBJ_QUERY_SIZE 4 +#define VIRGL_OBJ_QUERY_HANDLE 1 +#define VIRGL_OBJ_QUERY_TYPE_INDEX 2 +#define VIRGL_OBJ_QUERY_TYPE(x) (x & 0xffff) +#define VIRGL_OBJ_QUERY_INDEX(x) ((x & 0xffff) << 16) +#define VIRGL_OBJ_QUERY_OFFSET 3 +#define VIRGL_OBJ_QUERY_RES_HANDLE 4 + +#define VIRGL_QUERY_BEGIN_HANDLE 1 + +#define VIRGL_QUERY_END_HANDLE 1 + +#define VIRGL_QUERY_RESULT_HANDLE 1 +#define VIRGL_QUERY_RESULT_WAIT 2 + +/* render condition */ +#define VIRGL_RENDER_CONDITION_SIZE 3 +#define VIRGL_RENDER_CONDITION_HANDLE 1 +#define VIRGL_RENDER_CONDITION_CONDITION 2 +#define VIRGL_RENDER_CONDITION_MODE 3 + +/* resource inline write */ +#define VIRGL_RESOURCE_IW_RES_HANDLE 1 +#define VIRGL_RESOURCE_IW_LEVEL 2 +#define VIRGL_RESOURCE_IW_USAGE 3 +#define VIRGL_RESOURCE_IW_STRIDE 4 +#define VIRGL_RESOURCE_IW_LAYER_STRIDE 5 +#define VIRGL_RESOURCE_IW_X 6 +#define VIRGL_RESOURCE_IW_Y 7 +#define VIRGL_RESOURCE_IW_Z 8 +#define VIRGL_RESOURCE_IW_W 9 +#define VIRGL_RESOURCE_IW_H 10 +#define VIRGL_RESOURCE_IW_D 11 +#define VIRGL_RESOURCE_IW_DATA_START 12 + +/* set streamout targets */ +#define VIRGL_SET_STREAMOUT_TARGETS_APPEND_BITMASK 1 +#define VIRGL_SET_STREAMOUT_TARGETS_H0 2 + +/* set sample mask */ +#define VIRGL_SET_SAMPLE_MASK_SIZE 1 +#define VIRGL_SET_SAMPLE_MASK_MASK 1 + +/* set clip state */ +#define VIRGL_SET_CLIP_STATE_SIZE 32 +#define VIRGL_SET_CLIP_STATE_C0 1 + +/* polygon stipple */ +#define VIRGL_POLYGON_STIPPLE_SIZE 32 +#define VIRGL_POLYGON_STIPPLE_P0 1 + +#define VIRGL_BIND_SHADER_SIZE 2 +#define VIRGL_BIND_SHADER_HANDLE 1 +#define VIRGL_BIND_SHADER_TYPE 2 + +#endif diff --git a/src/gallium/drivers/virgl/virgl_public.h b/src/gallium/drivers/virgl/virgl_public.h new file mode 100644 index 00000000000..a3ea560df7b --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_public.h @@ -0,0 +1,31 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_PUBLIC_H +#define VIRGL_PUBLIC_H + +struct pipe_screen; +struct virgl_winsys; + +struct pipe_screen * +virgl_create_screen(struct virgl_winsys *vws); +#endif diff --git a/src/gallium/drivers/virgl/virgl_query.c b/src/gallium/drivers/virgl/virgl_query.c new file mode 100644 index 00000000000..b0200556342 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_query.c @@ -0,0 +1,175 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "virgl_context.h" +#include "virgl_encode.h" +#include "virgl_protocol.h" +#include "virgl_resource.h" + +struct virgl_query { + uint32_t handle; + struct virgl_resource *buf; + + unsigned index; + unsigned type; + unsigned result_size; + unsigned result_gotten_sent; +}; + +static inline struct virgl_query *virgl_query(struct pipe_query *q) +{ + return (struct virgl_query *)q; +} + +static void virgl_render_condition(struct pipe_context *ctx, + struct pipe_query *q, + boolean condition, + uint mode) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query = virgl_query(q); + uint32_t handle = 0; + if (q) + handle = query->handle; + virgl_encoder_render_condition(vctx, handle, condition, mode); +} + +static struct pipe_query *virgl_create_query(struct pipe_context *ctx, + unsigned query_type, unsigned index) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query; + uint32_t handle; + + query = CALLOC_STRUCT(virgl_query); + if (!query) + return NULL; + + query->buf = (struct virgl_resource *)pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, sizeof(struct virgl_host_query_state)); + if (!query->buf) { + FREE(query); + return NULL; + } + + handle = virgl_object_assign_handle(); + query->type = query_type; + query->index = index; + query->handle = handle; + query->buf->clean = FALSE; + virgl_encoder_create_query(vctx, handle, query_type, index, query->buf, 0); + + return (struct pipe_query *)query; +} + +static void virgl_destroy_query(struct pipe_context *ctx, + struct pipe_query *q) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query = virgl_query(q); + + virgl_encode_delete_object(vctx, query->handle, VIRGL_OBJECT_QUERY); + + pipe_resource_reference((struct pipe_resource **)&query->buf, NULL); + FREE(query); +} + +static boolean virgl_begin_query(struct pipe_context *ctx, + struct pipe_query *q) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query = virgl_query(q); + + query->buf->clean = FALSE; + virgl_encoder_begin_query(vctx, query->handle); + return true; +} + +static void virgl_end_query(struct pipe_context *ctx, + struct pipe_query *q) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query = virgl_query(q); + struct pipe_box box; + + uint32_t qs = VIRGL_QUERY_STATE_WAIT_HOST; + u_box_1d(0, 4, &box); + virgl_transfer_inline_write(ctx, &query->buf->u.b, 0, PIPE_TRANSFER_WRITE, + &box, &qs, 0, 0); + + + virgl_encoder_end_query(vctx, query->handle); +} + +static boolean virgl_get_query_result(struct pipe_context *ctx, + struct pipe_query *q, + boolean wait, + union pipe_query_result *result) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query = virgl_query(q); + struct pipe_transfer *transfer; + struct virgl_host_query_state *host_state; + + /* ask host for query result */ + if (!query->result_gotten_sent) { + query->result_gotten_sent = 1; + virgl_encoder_get_query_result(vctx, query->handle, 0); + ctx->flush(ctx, NULL, 0); + } + + /* do we have to flush? */ + /* now we can do the transfer to get the result back? */ + remap: + host_state = pipe_buffer_map(ctx, &query->buf->u.b, + PIPE_TRANSFER_READ, &transfer); + + if (host_state->query_state != VIRGL_QUERY_STATE_DONE) { + pipe_buffer_unmap(ctx, transfer); + if (wait) + goto remap; + else + return FALSE; + } + + if (query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED) + result->u64 = host_state->result; + else + result->u64 = (uint32_t)host_state->result; + + pipe_buffer_unmap(ctx, transfer); + query->result_gotten_sent = 0; + return TRUE; +} + +void virgl_init_query_functions(struct virgl_context *vctx) +{ + vctx->base.render_condition = virgl_render_condition; + vctx->base.create_query = virgl_create_query; + vctx->base.destroy_query = virgl_destroy_query; + vctx->base.begin_query = virgl_begin_query; + vctx->base.end_query = virgl_end_query; + vctx->base.get_query_result = virgl_get_query_result; +} diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c new file mode 100644 index 00000000000..0b2fc4ec497 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_resource.c @@ -0,0 +1,90 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "util/u_inlines.h" +#include "virgl_context.h" +#include "virgl_resource.h" +#include "virgl_screen.h" + +bool virgl_res_needs_flush_wait(struct virgl_context *vctx, + struct virgl_resource *res, + unsigned usage) +{ + struct virgl_screen *vs = virgl_screen(vctx->base.screen); + + if ((!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) && vs->vws->res_is_referenced(vs->vws, vctx->cbuf, res->hw_res)) { + return true; + } + return false; +} + +bool virgl_res_needs_readback(struct virgl_context *vctx, + struct virgl_resource *res, + unsigned usage) +{ + bool readback = true; + if (res->clean) + readback = false; + else if (usage & PIPE_TRANSFER_DISCARD_RANGE) + readback = false; + else if ((usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) == + (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) + readback = false; + return readback; +} + +static struct pipe_resource *virgl_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct virgl_screen *vs = virgl_screen(screen); + if (templ->target == PIPE_BUFFER) + return virgl_buffer_create(vs, templ); + else + return virgl_texture_create(vs, templ); +} + +static struct pipe_resource *virgl_resource_from_handle(struct pipe_screen *screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + struct virgl_screen *vs = virgl_screen(screen); + if (templ->target == PIPE_BUFFER) + return NULL; + else + return virgl_texture_from_handle(vs, templ, whandle); +} + +void virgl_init_screen_resource_functions(struct pipe_screen *screen) +{ + screen->resource_create = virgl_resource_create; + screen->resource_from_handle = virgl_resource_from_handle; + screen->resource_get_handle = u_resource_get_handle_vtbl; + screen->resource_destroy = u_resource_destroy_vtbl; +} + +void virgl_init_context_resource_functions(struct pipe_context *ctx) +{ + ctx->transfer_map = u_transfer_map_vtbl; + ctx->transfer_flush_region = u_transfer_flush_region_vtbl; + ctx->transfer_unmap = u_transfer_unmap_vtbl; + ctx->transfer_inline_write = u_transfer_inline_write_vtbl; +} diff --git a/src/gallium/drivers/virgl/virgl_resource.h b/src/gallium/drivers/virgl/virgl_resource.h new file mode 100644 index 00000000000..bab9bcb9b4e --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_resource.h @@ -0,0 +1,146 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef VIRGL_RESOURCE_H +#define VIRGL_RESOURCE_H + +#include "util/u_resource.h" +#include "util/u_range.h" +#include "util/list.h" +#include "util/u_transfer.h" + +#include "virgl_hw.h" +#define VR_MAX_TEXTURE_2D_LEVELS 15 + +struct winsys_handle; +struct virgl_screen; +struct virgl_context; + +struct virgl_resource { + struct u_resource u; + struct virgl_hw_res *hw_res; + boolean clean; +}; + +struct virgl_buffer { + struct virgl_resource base; + + struct list_head flush_list; + boolean on_list; + + /* The buffer range which is initialized (with a write transfer, + * streamout, DMA, or as a random access target). The rest of + * the buffer is considered invalid and can be mapped unsynchronized. + * + * This allows unsychronized mapping of a buffer range which hasn't + * been used yet. It's for applications which forget to use + * the unsynchronized map flag and expect the driver to figure it out. + */ + struct util_range valid_buffer_range; +}; + +struct virgl_texture { + struct virgl_resource base; + + unsigned long level_offset[VR_MAX_TEXTURE_2D_LEVELS]; + unsigned stride[VR_MAX_TEXTURE_2D_LEVELS]; +}; + +struct virgl_transfer { + struct pipe_transfer base; + uint32_t offset; + struct virgl_resource *resolve_tmp; +}; + +void virgl_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *resource); + +void virgl_init_screen_resource_functions(struct pipe_screen *screen); + +void virgl_init_context_resource_functions(struct pipe_context *ctx); + +struct pipe_resource *virgl_texture_create(struct virgl_screen *vs, + const struct pipe_resource *templ); + +struct pipe_resource *virgl_texture_from_handle(struct virgl_screen *vs, + const struct pipe_resource *templ, + struct winsys_handle *whandle); + +static inline struct virgl_resource *virgl_resource(struct pipe_resource *r) +{ + return (struct virgl_resource *)r; +} + +static inline struct virgl_buffer *virgl_buffer(struct pipe_resource *r) +{ + return (struct virgl_buffer *)r; +} + +static inline struct virgl_texture *virgl_texture(struct pipe_resource *r) +{ + return (struct virgl_texture *)r; +} + +static inline struct virgl_transfer *virgl_transfer(struct pipe_transfer *trans) +{ + return (struct virgl_transfer *)trans; +} + +struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs, + const struct pipe_resource *templ); + +static inline unsigned pipe_to_virgl_bind(unsigned pbind) +{ + unsigned outbind = 0; + if (pbind & PIPE_BIND_DEPTH_STENCIL) + outbind |= VIRGL_BIND_DEPTH_STENCIL; + if (pbind & PIPE_BIND_RENDER_TARGET) + outbind |= VIRGL_BIND_RENDER_TARGET; + if (pbind & PIPE_BIND_SAMPLER_VIEW) + outbind |= VIRGL_BIND_SAMPLER_VIEW; + if (pbind & PIPE_BIND_VERTEX_BUFFER) + outbind |= VIRGL_BIND_VERTEX_BUFFER; + if (pbind & PIPE_BIND_INDEX_BUFFER) + outbind |= VIRGL_BIND_INDEX_BUFFER; + if (pbind & PIPE_BIND_CONSTANT_BUFFER) + outbind |= VIRGL_BIND_CONSTANT_BUFFER; + if (pbind & PIPE_BIND_DISPLAY_TARGET) + outbind |= VIRGL_BIND_DISPLAY_TARGET; + if (pbind & PIPE_BIND_STREAM_OUTPUT) + outbind |= VIRGL_BIND_STREAM_OUTPUT; + if (pbind & PIPE_BIND_CURSOR) + outbind |= VIRGL_BIND_CURSOR; + if (pbind & PIPE_BIND_CUSTOM) + outbind |= VIRGL_BIND_CUSTOM; + if (pbind & PIPE_BIND_SCANOUT) + outbind |= VIRGL_BIND_SCANOUT; + return outbind; +} + +bool virgl_res_needs_flush_wait(struct virgl_context *vctx, + struct virgl_resource *res, + unsigned usage); +bool virgl_res_needs_readback(struct virgl_context *vctx, + struct virgl_resource *res, + unsigned usage); +#endif diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c new file mode 100644 index 00000000000..cca379d47ab --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_screen.c @@ -0,0 +1,553 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_format_s3tc.h" +#include "util/u_video.h" +#include "os/os_time.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "draw/draw_context.h" + +#include "tgsi/tgsi_exec.h" + +#include "virgl_screen.h" +#include "virgl_resource.h" +#include "virgl_public.h" +#include "virgl_context.h" + +#define SP_MAX_TEXTURE_2D_LEVELS 15 /* 16K x 16K */ +#define SP_MAX_TEXTURE_3D_LEVELS 9 /* 512 x 512 x 512 */ +#define SP_MAX_TEXTURE_CUBE_LEVELS 13 /* 4K x 4K */ + +static const char * +virgl_get_vendor(struct pipe_screen *screen) +{ + return "Red Hat"; +} + + +static const char * +virgl_get_name(struct pipe_screen *screen) +{ + return "virgl"; +} + +static int +virgl_get_param(struct pipe_screen *screen, enum pipe_cap param) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + switch (param) { + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return vscreen->caps.caps.v1.max_render_targets; + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return vscreen->caps.caps.v1.max_dual_source_render_targets; + case PIPE_CAP_OCCLUSION_QUERY: + return vscreen->caps.caps.v1.bset.occlusion_query; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return vscreen->caps.caps.v1.bset.mirror_clamp; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return SP_MAX_TEXTURE_2D_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return SP_MAX_TEXTURE_3D_LEVELS; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return SP_MAX_TEXTURE_CUBE_LEVELS; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + return vscreen->caps.caps.v1.bset.indep_blend_enable; + case PIPE_CAP_INDEP_BLEND_FUNC: + return vscreen->caps.caps.v1.bset.indep_blend_func; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return vscreen->caps.caps.v1.bset.fragment_coord_conventions; + case PIPE_CAP_DEPTH_CLIP_DISABLE: + return vscreen->caps.caps.v1.bset.depth_clip_disable; + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return vscreen->caps.caps.v1.max_streamout_buffers; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 16*4; + case PIPE_CAP_PRIMITIVE_RESTART: + return vscreen->caps.caps.v1.bset.primitive_restart; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return vscreen->caps.caps.v1.bset.shader_stencil_export; + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + return 1; + case PIPE_CAP_SEAMLESS_CUBE_MAP: + return vscreen->caps.caps.v1.bset.seamless_cube_map; + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return vscreen->caps.caps.v1.bset.seamless_cube_map_per_texture; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return vscreen->caps.caps.v1.max_texture_array_layers; + case PIPE_CAP_MIN_TEXEL_OFFSET: + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: + return 7; + case PIPE_CAP_CONDITIONAL_RENDER: + return vscreen->caps.caps.v1.bset.conditional_render; + case PIPE_CAP_TEXTURE_BARRIER: + return 0; + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + return 1; + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + return vscreen->caps.caps.v1.bset.color_clamping; + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + return 1; + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return vscreen->caps.caps.v1.glsl_level; + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + return 0; + case PIPE_CAP_COMPUTE: + return 0; + case PIPE_CAP_USER_VERTEX_BUFFERS: + return 0; + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_CONSTANT_BUFFERS: + return 1; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 16; + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return vscreen->caps.caps.v1.bset.streamout_pause_resume; + case PIPE_CAP_START_INSTANCE: + return vscreen->caps.caps.v1.bset.start_instance; + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 0; + case PIPE_CAP_QUERY_TIMESTAMP: + return 1; + case PIPE_CAP_QUERY_TIME_ELAPSED: + return 0; + case PIPE_CAP_TGSI_TEXCOORD: + return 0; + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return VIRGL_MAP_BUFFER_ALIGNMENT; + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + return vscreen->caps.caps.v1.max_tbo_size > 0; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 0; + case PIPE_CAP_CUBE_MAP_ARRAY: + return vscreen->caps.caps.v1.bset.cube_map_array; + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return vscreen->caps.caps.v1.bset.texture_multisample; + case PIPE_CAP_MAX_VIEWPORTS: + return vscreen->caps.caps.v1.max_viewports; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return vscreen->caps.caps.v1.max_tbo_size; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + case PIPE_CAP_ENDIANNESS: + return 0; + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + return 1; + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + return 0; + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + return 1024; + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 16384; + case PIPE_CAP_TEXTURE_QUERY_LOD: + return vscreen->caps.caps.v1.bset.texture_query_lod; + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + return vscreen->caps.caps.v1.max_texture_gather_components; + case PIPE_CAP_TEXTURE_GATHER_SM5: + case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + case PIPE_CAP_MAX_VERTEX_STREAMS: + case PIPE_CAP_DRAW_INDIRECT: + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_CLIP_HALFZ: + case PIPE_CAP_VERTEXID_NOBASE: + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_DEPTH_BOUNDS_TEST: + case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: + return 0; + case PIPE_CAP_VENDOR_ID: + return 0x1af4; + case PIPE_CAP_DEVICE_ID: + return 0x1010; + case PIPE_CAP_ACCELERATED: + return 1; + case PIPE_CAP_UMA: + case PIPE_CAP_VIDEO_MEMORY: + return 0; + } + /* should only get here on unhandled cases */ + debug_printf("Unexpected PIPE_CAP %d query\n", param); + return 0; +} + +static int +virgl_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + switch(shader) + { + case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_GEOMETRY: + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return INT_MAX; + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_MAX_INPUTS: + if (vscreen->caps.caps.v1.glsl_level < 150) + return 16; + return shader == PIPE_SHADER_VERTEX ? 16 : 32; + case PIPE_SHADER_CAP_MAX_OUTPUTS: + return 128; + // case PIPE_SHADER_CAP_MAX_CONSTS: + // return 4096; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return vscreen->caps.caps.v1.max_uniform_blocks; + // case PIPE_SHADER_CAP_MAX_ADDRS: + // return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; + case PIPE_SHADER_CAP_SUBROUTINES: + return 1; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return 16; + case PIPE_SHADER_CAP_INTEGERS: + return vscreen->caps.caps.v1.glsl_level >= 130; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 32; + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 4096 * sizeof(float[4]); + default: + return 0; + } + default: + return 0; + } +} + +static float +virgl_get_paramf(struct pipe_screen *screen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAPF_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0; + } + /* should only get here on unhandled cases */ + debug_printf("Unexpected PIPE_CAPF %d query\n", param); + return 0.0; +} + +static boolean +virgl_is_vertex_format_supported(struct pipe_screen *screen, + enum pipe_format format) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + const struct util_format_description *format_desc; + int i; + + format_desc = util_format_description(format); + if (!format_desc) + return FALSE; + + if (format == PIPE_FORMAT_R11G11B10_FLOAT) { + int vformat = VIRGL_FORMAT_R11G11B10_FLOAT; + int big = vformat / 32; + int small = vformat % 32; + if (!(vscreen->caps.caps.v1.vertexbuffer.bitmask[big] & (1 << small))) + return FALSE; + return TRUE; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + return FALSE; + + if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + return FALSE; + + if (format_desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED) + return FALSE; + return TRUE; +} + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +virgl_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bind) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + const struct util_format_description *format_desc; + int i; + + assert(target == PIPE_BUFFER || + target == PIPE_TEXTURE_1D || + target == PIPE_TEXTURE_1D_ARRAY || + target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_2D_ARRAY || + target == PIPE_TEXTURE_RECT || + target == PIPE_TEXTURE_3D || + target == PIPE_TEXTURE_CUBE || + target == PIPE_TEXTURE_CUBE_ARRAY); + + format_desc = util_format_description(format); + if (!format_desc) + return FALSE; + + if (util_format_is_intensity(format)) + return FALSE; + + if (sample_count > 1) { + if (!vscreen->caps.caps.v1.bset.texture_multisample) + return FALSE; + if (sample_count > vscreen->caps.caps.v1.max_samples) + return FALSE; + } + + if (bind & PIPE_BIND_VERTEX_BUFFER) { + return virgl_is_vertex_format_supported(screen, format); + } + + if (bind & PIPE_BIND_RENDER_TARGET) { + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + /* + * Although possible, it is unnatural to render into compressed or YUV + * surfaces. So disable these here to avoid going into weird paths + * inside the state trackers. + */ + if (format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + { + int big = format / 32; + int small = format % 32; + if (!(vscreen->caps.caps.v1.render.bitmask[big] & (1 << small))) + return FALSE; + } + } + + if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + } + + /* + * All other operations (sampling, transfer, etc). + */ + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + if (util_format_s3tc_enabled) + goto out_lookup; + return FALSE; + } + if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + goto out_lookup; + } + + if (format == PIPE_FORMAT_R11G11B10_FLOAT) { + goto out_lookup; + } else if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { + goto out_lookup; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + return FALSE; + + /* no L4A4 */ + if (format_desc->nr_channels < 4 && format_desc->channel[i].size == 4) + return FALSE; + + out_lookup: + { + int big = format / 32; + int small = format % 32; + if (!(vscreen->caps.caps.v1.sampler.bitmask[big] & (1 << small))) + return FALSE; + } + /* + * Everything else should be supported by u_format. + */ + return TRUE; +} + +static void virgl_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_resource *res, + unsigned level, unsigned layer, + void *winsys_drawable_handle, struct pipe_box *sub_box) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + struct virgl_winsys *vws = vscreen->vws; + struct virgl_resource *vres = virgl_resource(res); + + if (vws->flush_frontbuffer) + vws->flush_frontbuffer(vws, vres->hw_res, level, layer, winsys_drawable_handle, + sub_box); +} + +static void virgl_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + struct virgl_winsys *vws = vscreen->vws; + + vws->fence_reference(vws, ptr, fence); +} + +static boolean virgl_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + struct virgl_winsys *vws = vscreen->vws; + + return vws->fence_wait(vws, fence, timeout); +} + +static uint64_t +virgl_get_timestamp(struct pipe_screen *_screen) +{ + return os_time_get_nano(); +} + +static void +virgl_destroy_screen(struct pipe_screen *screen) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + struct virgl_winsys *vws = vscreen->vws; + + if (vws) + vws->destroy(vws); + FREE(vscreen); +} + +struct pipe_screen * +virgl_create_screen(struct virgl_winsys *vws) +{ + struct virgl_screen *screen = CALLOC_STRUCT(virgl_screen); + + if (!screen) + return NULL; + + screen->vws = vws; + screen->base.get_name = virgl_get_name; + screen->base.get_vendor = virgl_get_vendor; + screen->base.get_param = virgl_get_param; + screen->base.get_shader_param = virgl_get_shader_param; + screen->base.get_paramf = virgl_get_paramf; + screen->base.is_format_supported = virgl_is_format_supported; + screen->base.destroy = virgl_destroy_screen; + screen->base.context_create = virgl_context_create; + screen->base.flush_frontbuffer = virgl_flush_frontbuffer; + screen->base.get_timestamp = virgl_get_timestamp; + screen->base.fence_reference = virgl_fence_reference; + //screen->base.fence_signalled = virgl_fence_signalled; + screen->base.fence_finish = virgl_fence_finish; + + virgl_init_screen_resource_functions(&screen->base); + + vws->get_caps(vws, &screen->caps); + + + util_format_s3tc_init(); + return &screen->base; +} diff --git a/src/gallium/drivers/virgl/virgl_screen.h b/src/gallium/drivers/virgl/virgl_screen.h new file mode 100644 index 00000000000..52e72ca4958 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_screen.h @@ -0,0 +1,47 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_H +#define VIRGL_H + +#include "pipe/p_screen.h" +#include "virgl_winsys.h" + +struct virgl_screen { + struct pipe_screen base; + struct virgl_winsys *vws; + + struct virgl_drm_caps caps; + + uint32_t sub_ctx_id; +}; + + +static inline struct virgl_screen * +virgl_screen(struct pipe_screen *pipe) +{ + return (struct virgl_screen *)pipe; +} + +#define VIRGL_MAP_BUFFER_ALIGNMENT 64 + +#endif diff --git a/src/gallium/drivers/virgl/virgl_streamout.c b/src/gallium/drivers/virgl/virgl_streamout.c new file mode 100644 index 00000000000..b6a65fff29e --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_streamout.c @@ -0,0 +1,88 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "virgl_context.h" +#include "virgl_encode.h" +#include "virgl_protocol.h" +#include "virgl_resource.h" + +static struct pipe_stream_output_target *virgl_create_so_target( + struct pipe_context *ctx, + struct pipe_resource *buffer, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_resource *res = virgl_resource(buffer); + struct virgl_so_target *t = CALLOC_STRUCT(virgl_so_target); + uint32_t handle; + + if (!t) + return NULL; + handle = virgl_object_assign_handle(); + + t->base.reference.count = 1; + t->base.context = ctx; + pipe_resource_reference(&t->base.buffer, buffer); + t->base.buffer_offset = buffer_offset; + t->base.buffer_size = buffer_size; + t->handle = handle; + res->clean = FALSE; + virgl_encoder_create_so_target(vctx, handle, res, buffer_offset, buffer_size); + return &t->base; +} + +static void virgl_destroy_so_target(struct pipe_context *ctx, + struct pipe_stream_output_target *target) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_so_target *t = virgl_so_target(target); + + pipe_resource_reference(&t->base.buffer, NULL); + virgl_encode_delete_object(vctx, t->handle, VIRGL_OBJECT_STREAMOUT_TARGET); + FREE(t); +} + +static void virgl_set_so_targets(struct pipe_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offset) +{ + struct virgl_context *vctx = virgl_context(ctx); + int i; + for (i = 0; i < num_targets; i++) { + pipe_resource_reference(&vctx->so_targets[i].base.buffer, targets[i]->buffer); + } + for (i = num_targets; i < vctx->num_so_targets; i++) + pipe_resource_reference(&vctx->so_targets[i].base.buffer, NULL); + vctx->num_so_targets = num_targets; + virgl_encoder_set_so_targets(vctx, num_targets, targets, 0);//append_bitmask); +} + +void virgl_init_so_functions(struct virgl_context *vctx) +{ + vctx->base.create_stream_output_target = virgl_create_so_target; + vctx->base.stream_output_target_destroy = virgl_destroy_so_target; + vctx->base.set_stream_output_targets = virgl_set_so_targets; +} diff --git a/src/gallium/drivers/virgl/virgl_texture.c b/src/gallium/drivers/virgl/virgl_texture.c new file mode 100644 index 00000000000..31189626144 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_texture.c @@ -0,0 +1,351 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" + +#include "virgl_context.h" +#include "virgl_resource.h" +#include "virgl_screen.h" + +static void virgl_copy_region_with_blit(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct pipe_blit_info blit; + + memset(&blit, 0, sizeof(blit)); + blit.src.resource = src; + blit.src.format = src->format; + blit.src.level = src_level; + blit.src.box = *src_box; + blit.dst.resource = dst; + blit.dst.format = dst->format; + blit.dst.level = dst_level; + blit.dst.box.x = dstx; + blit.dst.box.y = dsty; + blit.dst.box.z = dstz; + blit.dst.box.width = src_box->width; + blit.dst.box.height = src_box->height; + blit.dst.box.depth = src_box->depth; + blit.mask = util_format_get_mask(src->format) & + util_format_get_mask(dst->format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + + if (blit.mask) { + pipe->blit(pipe, &blit); + } +} +static void virgl_init_temp_resource_from_box(struct pipe_resource *res, + struct pipe_resource *orig, + const struct pipe_box *box, + unsigned level, unsigned flags) +{ + memset(res, 0, sizeof(*res)); + res->format = orig->format; + res->width0 = box->width; + res->height0 = box->height; + res->depth0 = 1; + res->array_size = 1; + res->usage = PIPE_USAGE_STAGING; + res->flags = flags; + + /* We must set the correct texture target and dimensions for a 3D box. */ + if (box->depth > 1 && util_max_layer(orig, level) > 0) + res->target = orig->target; + else + res->target = PIPE_TEXTURE_2D; + + switch (res->target) { + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE_ARRAY: + res->array_size = box->depth; + break; + case PIPE_TEXTURE_3D: + res->depth0 = box->depth; + break; + default: + break; + } +} + +static unsigned +vrend_get_tex_image_offset(const struct virgl_texture *res, + unsigned level, unsigned layer) +{ + const struct pipe_resource *pres = &res->base.u.b; + const unsigned hgt = u_minify(pres->height0, level); + const unsigned nblocksy = util_format_get_nblocksy(pres->format, hgt); + unsigned offset = res->level_offset[level]; + + if (pres->target == PIPE_TEXTURE_CUBE || + pres->target == PIPE_TEXTURE_CUBE_ARRAY || + pres->target == PIPE_TEXTURE_3D || + pres->target == PIPE_TEXTURE_2D_ARRAY) { + offset += layer * nblocksy * res->stride[level]; + } + else if (pres->target == PIPE_TEXTURE_1D_ARRAY) { + offset += layer * res->stride[level]; + } + else { + assert(layer == 0); + } + + return offset; +} + +static void *virgl_texture_transfer_map(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *vs = virgl_screen(ctx->screen); + struct virgl_texture *vtex = virgl_texture(resource); + enum pipe_format format = resource->format; + struct virgl_transfer *trans; + void *ptr; + boolean readback = TRUE; + uint32_t offset; + struct virgl_hw_res *hw_res; + const unsigned h = u_minify(vtex->base.u.b.height0, level); + const unsigned nblocksy = util_format_get_nblocksy(format, h); + bool is_depth = util_format_has_depth(util_format_description(resource->format)); + uint32_t l_stride; + bool doflushwait; + + doflushwait = virgl_res_needs_flush_wait(vctx, &vtex->base, usage); + if (doflushwait) + ctx->flush(ctx, NULL, 0); + + trans = util_slab_alloc(&vctx->texture_transfer_pool); + if (trans == NULL) + return NULL; + + trans->base.resource = resource; + trans->base.level = level; + trans->base.usage = usage; + trans->base.box = *box; + trans->base.stride = vtex->stride[level]; + trans->base.layer_stride = trans->base.stride * nblocksy; + + if (resource->target != PIPE_TEXTURE_3D && + resource->target != PIPE_TEXTURE_CUBE && + resource->target != PIPE_TEXTURE_1D_ARRAY && + resource->target != PIPE_TEXTURE_2D_ARRAY && + resource->target != PIPE_TEXTURE_CUBE_ARRAY) + l_stride = 0; + else + l_stride = trans->base.layer_stride; + + if (is_depth && resource->nr_samples > 1) { + struct pipe_resource tmp_resource; + virgl_init_temp_resource_from_box(&tmp_resource, resource, box, + level, 0); + + trans->resolve_tmp = (struct virgl_resource *)ctx->screen->resource_create(ctx->screen, &tmp_resource); + + virgl_copy_region_with_blit(ctx, &trans->resolve_tmp->u.b, 0, 0, 0, 0, resource, level, box); + ctx->flush(ctx, NULL, 0); + /* we want to do a resolve blit into the temporary */ + hw_res = trans->resolve_tmp->hw_res; + offset = 0; + } else { + offset = vrend_get_tex_image_offset(vtex, level, box->z); + + offset += box->y / util_format_get_blockheight(format) * trans->base.stride + + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); + hw_res = vtex->base.hw_res; + trans->resolve_tmp = NULL; + } + + readback = virgl_res_needs_readback(vctx, &vtex->base, usage); + if (readback) + vs->vws->transfer_get(vs->vws, hw_res, box, trans->base.stride, l_stride, offset, level); + + if (doflushwait || readback) + vs->vws->resource_wait(vs->vws, vtex->base.hw_res); + + ptr = vs->vws->resource_map(vs->vws, hw_res); + if (!ptr) { + return NULL; + } + + trans->offset = offset; + *transfer = &trans->base; + + return ptr + trans->offset; +} + +static void virgl_texture_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_transfer *trans = virgl_transfer(transfer); + struct virgl_texture *vtex = virgl_texture(transfer->resource); + uint32_t l_stride; + + if (transfer->resource->target != PIPE_TEXTURE_3D && + transfer->resource->target != PIPE_TEXTURE_CUBE && + transfer->resource->target != PIPE_TEXTURE_1D_ARRAY && + transfer->resource->target != PIPE_TEXTURE_2D_ARRAY && + transfer->resource->target != PIPE_TEXTURE_CUBE_ARRAY) + l_stride = 0; + else + l_stride = trans->base.layer_stride; + + if (trans->base.usage & PIPE_TRANSFER_WRITE) { + if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { + struct virgl_screen *vs = virgl_screen(ctx->screen); + vtex->base.clean = FALSE; + vctx->num_transfers++; + vs->vws->transfer_put(vs->vws, vtex->base.hw_res, + &transfer->box, trans->base.stride, l_stride, trans->offset, transfer->level); + + } + } + + if (trans->resolve_tmp) + pipe_resource_reference((struct pipe_resource **)&trans->resolve_tmp, NULL); + + util_slab_free(&vctx->texture_transfer_pool, trans); +} + + +static boolean +vrend_resource_layout(struct virgl_texture *res, + uint32_t *total_size) +{ + struct pipe_resource *pt = &res->base.u.b; + unsigned level; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; + unsigned buffer_size = 0; + + for (level = 0; level <= pt->last_level; level++) { + unsigned slices; + + if (pt->target == PIPE_TEXTURE_CUBE) + slices = 6; + else if (pt->target == PIPE_TEXTURE_3D) + slices = depth; + else + slices = pt->array_size; + + res->stride[level] = util_format_get_stride(pt->format, width); + res->level_offset[level] = buffer_size; + + buffer_size += (util_format_get_nblocksy(pt->format, height) * + slices * res->stride[level]); + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + if (pt->nr_samples <= 1) + *total_size = buffer_size; + else /* don't create guest backing store for MSAA */ + *total_size = 0; + return TRUE; +} + +static boolean virgl_texture_get_handle(struct pipe_screen *screen, + struct pipe_resource *ptex, + struct winsys_handle *whandle) +{ + struct virgl_screen *vs = virgl_screen(screen); + struct virgl_texture *vtex = virgl_texture(ptex); + + return vs->vws->resource_get_handle(vs->vws, vtex->base.hw_res, vtex->stride[0], whandle); +} + +static void virgl_texture_destroy(struct pipe_screen *screen, + struct pipe_resource *res) +{ + struct virgl_screen *vs = virgl_screen(screen); + struct virgl_texture *vtex = virgl_texture(res); + vs->vws->resource_unref(vs->vws, vtex->base.hw_res); + FREE(vtex); +} + +static const struct u_resource_vtbl virgl_texture_vtbl = +{ + virgl_texture_get_handle, /* get_handle */ + virgl_texture_destroy, /* resource_destroy */ + virgl_texture_transfer_map, /* transfer_map */ + NULL, /* transfer_flush_region */ + virgl_texture_transfer_unmap, /* transfer_unmap */ + NULL /* transfer_inline_write */ +}; + +struct pipe_resource * +virgl_texture_from_handle(struct virgl_screen *vs, + const struct pipe_resource *template, + struct winsys_handle *whandle) +{ + struct virgl_texture *tex; + uint32_t size; + + tex = CALLOC_STRUCT(virgl_texture); + tex->base.u.b = *template; + tex->base.u.b.screen = &vs->base; + pipe_reference_init(&tex->base.u.b.reference, 1); + tex->base.u.vtbl = &virgl_texture_vtbl; + vrend_resource_layout(tex, &size); + + tex->base.hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle); + return &tex->base.u.b; +} + +struct pipe_resource *virgl_texture_create(struct virgl_screen *vs, + const struct pipe_resource *template) +{ + struct virgl_texture *tex; + uint32_t size; + unsigned vbind; + + tex = CALLOC_STRUCT(virgl_texture); + tex->base.clean = TRUE; + tex->base.u.b = *template; + tex->base.u.b.screen = &vs->base; + pipe_reference_init(&tex->base.u.b.reference, 1); + tex->base.u.vtbl = &virgl_texture_vtbl; + vrend_resource_layout(tex, &size); + + vbind = pipe_to_virgl_bind(template->bind); + tex->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, template->height0, template->depth0, template->array_size, template->last_level, template->nr_samples, size); + if (!tex->base.hw_res) { + FREE(tex); + return NULL; + } + return &tex->base.u.b; +} diff --git a/src/gallium/drivers/virgl/virgl_tgsi.c b/src/gallium/drivers/virgl/virgl_tgsi.c new file mode 100644 index 00000000000..641b0b3e3b5 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_tgsi.c @@ -0,0 +1,66 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* the virgl hw tgsi vs what the current gallium want will diverge over time. + so add a transform stage to remove things we don't want to send unless + the receiver supports it. +*/ +#include "tgsi/tgsi_transform.h" +#include "virgl_context.h" +struct virgl_transform_context { + struct tgsi_transform_context base; +}; + +/* for now just strip out the new properties the remote doesn't understand + yet */ +static void +virgl_tgsi_transform_property(struct tgsi_transform_context *ctx, + struct tgsi_full_property *prop) +{ + switch (prop->Property.PropertyName) { + case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: + case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: + break; + default: + ctx->emit_property(ctx, prop); + break; + } +} + +struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in) +{ + + struct virgl_transform_context transform; + const uint newLen = tgsi_num_tokens(tokens_in); + struct tgsi_token *new_tokens; + + new_tokens = tgsi_alloc_tokens(newLen); + if (!new_tokens) + return NULL; + + memset(&transform, 0, sizeof(transform)); + transform.base.transform_property = virgl_tgsi_transform_property; + tgsi_transform_shader(tokens_in, new_tokens, newLen, &transform.base); + + return new_tokens; +} diff --git a/src/gallium/drivers/virgl/virgl_winsys.h b/src/gallium/drivers/virgl/virgl_winsys.h new file mode 100644 index 00000000000..ea21f2b6712 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_winsys.h @@ -0,0 +1,113 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_WINSYS_H +#define VIRGL_WINSYS_H + +#include "pipe/p_defines.h" +#include "virgl_hw.h" + +struct pipe_box; +struct pipe_fence_handle; +struct winsys_handle; +struct virgl_hw_res; + +#define VIRGL_MAX_CMDBUF_DWORDS (16*1024) + +struct virgl_drm_caps { + union virgl_caps caps; +}; + +struct virgl_cmd_buf { + unsigned cdw; + uint32_t *buf; +}; + +struct virgl_winsys { + unsigned pci_id; + + void (*destroy)(struct virgl_winsys *vws); + + int (*transfer_put)(struct virgl_winsys *vws, + struct virgl_hw_res *res, + const struct pipe_box *box, + uint32_t stride, uint32_t layer_stride, + uint32_t buf_offset, uint32_t level); + + int (*transfer_get)(struct virgl_winsys *vws, + struct virgl_hw_res *res, + const struct pipe_box *box, + uint32_t stride, uint32_t layer_stride, + uint32_t buf_offset, uint32_t level); + + struct virgl_hw_res *(*resource_create)(struct virgl_winsys *vws, + enum pipe_texture_target target, + uint32_t format, uint32_t bind, + uint32_t width, uint32_t height, + uint32_t depth, uint32_t array_size, + uint32_t last_level, uint32_t nr_samples, + uint32_t size); + + void (*resource_unref)(struct virgl_winsys *vws, struct virgl_hw_res *res); + + void *(*resource_map)(struct virgl_winsys *vws, struct virgl_hw_res *res); + void (*resource_wait)(struct virgl_winsys *vws, struct virgl_hw_res *res); + + struct virgl_hw_res *(*resource_create_from_handle)(struct virgl_winsys *vws, + struct winsys_handle *whandle); + boolean (*resource_get_handle)(struct virgl_winsys *vws, + struct virgl_hw_res *res, + uint32_t stride, + struct winsys_handle *whandle); + + struct virgl_cmd_buf *(*cmd_buf_create)(struct virgl_winsys *ws); + void (*cmd_buf_destroy)(struct virgl_cmd_buf *buf); + + void (*emit_res)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf, struct virgl_hw_res *res, boolean write_buffer); + int (*submit_cmd)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf); + + boolean (*res_is_referenced)(struct virgl_winsys *vws, + struct virgl_cmd_buf *buf, + struct virgl_hw_res *res); + + int (*get_caps)(struct virgl_winsys *vws, struct virgl_drm_caps *caps); + + /* fence */ + struct pipe_fence_handle *(*cs_create_fence)(struct virgl_winsys *vws); + bool (*fence_wait)(struct virgl_winsys *vws, + struct pipe_fence_handle *fence, + uint64_t timeout); + + void (*fence_reference)(struct virgl_winsys *vws, + struct pipe_fence_handle **dst, + struct pipe_fence_handle *src); + + /* for sw paths */ + void (*flush_frontbuffer)(struct virgl_winsys *vws, + struct virgl_hw_res *res, + unsigned level, unsigned layer, + void *winsys_drawable_handle, + struct pipe_box *sub_box); +}; + + +#endif diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 1ad545aae09..b15c8809c1d 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -633,6 +633,7 @@ enum pipe_cap PIPE_CAP_TGSI_TXQS, PIPE_CAP_FORCE_PERSAMPLE_INTERP, PIPE_CAP_SHAREABLE_SHADERS, + PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS, }; #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index b2646d44c74..5f0690e5ae6 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -444,7 +444,8 @@ enum pipe_video_chroma_format PIPE_VIDEO_CHROMA_FORMAT_400, PIPE_VIDEO_CHROMA_FORMAT_420, PIPE_VIDEO_CHROMA_FORMAT_422, - PIPE_VIDEO_CHROMA_FORMAT_444 + PIPE_VIDEO_CHROMA_FORMAT_444, + PIPE_VIDEO_CHROMA_FORMAT_NONE }; #ifdef __cplusplus diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index a22fb938dbb..f868d71db23 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -169,6 +169,10 @@ struct pipe_screen { struct pipe_resource * (*resource_create)(struct pipe_screen *, const struct pipe_resource *templat); + struct pipe_resource * (*resource_create_front)(struct pipe_screen *, + const struct pipe_resource *templat, + const void *map_front_private); + /** * Create a texture from a winsys_handle. The handle is often created in * another process by first creating a pipe texture and then calling diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 7d13151e643..d353be60759 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -479,6 +479,8 @@ struct pipe_h265_picture_desc uint8_t RefPicSetStCurrBefore[8]; uint8_t RefPicSetStCurrAfter[8]; uint8_t RefPicSetLtCurr[8]; + uint8_t RefPicList[2][15]; + bool UseRefPicList; }; #ifdef __cplusplus diff --git a/src/gallium/include/state_tracker/drisw_api.h b/src/gallium/include/state_tracker/drisw_api.h index 328440cf5ff..cd5a27e2482 100644 --- a/src/gallium/include/state_tracker/drisw_api.h +++ b/src/gallium/include/state_tracker/drisw_api.h @@ -11,6 +11,9 @@ struct dri_drawable; */ struct drisw_loader_funcs { + void (*get_image) (struct dri_drawable *dri_drawable, + int x, int y, unsigned width, unsigned height, unsigned stride, + void *data); void (*put_image) (struct dri_drawable *dri_drawable, void *data, unsigned width, unsigned height); void (*put_image2) (struct dri_drawable *dri_drawable, diff --git a/src/gallium/include/state_tracker/sw_winsys.h b/src/gallium/include/state_tracker/sw_winsys.h index a3479eb0bc3..0b792cd0ce4 100644 --- a/src/gallium/include/state_tracker/sw_winsys.h +++ b/src/gallium/include/state_tracker/sw_winsys.h @@ -90,6 +90,7 @@ struct sw_winsys enum pipe_format format, unsigned width, unsigned height, unsigned alignment, + const void *front_private, unsigned *stride ); /** diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index d74b50df45a..3b37f0802b0 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -247,8 +247,12 @@ namespace { // attribute. This attribute will prevent Clang from creating // illegal uses of barrier() (e.g. Moving barrier() inside a conditional // that is no executed by all threads) during its optimizaton passes. +#if HAVE_LLVM >= 0x0308 + c.getCodeGenOpts().LinkBitcodeFiles.emplace_back(llvm::Linker::Flags::None, + libclc_path); +#else c.getCodeGenOpts().LinkBitcodeFile = libclc_path; - +#endif optimization_level = c.getCodeGenOpts().OptimizationLevel; // Compile the code diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c index 4ec6992643a..753c59d696a 100644 --- a/src/gallium/state_trackers/dri/drisw.c +++ b/src/gallium/state_trackers/dri/drisw.c @@ -95,6 +95,21 @@ get_image(__DRIdrawable *dPriv, int x, int y, int width, int height, void *data) data, dPriv->loaderPrivate); } +static inline void +get_image2(__DRIdrawable *dPriv, int x, int y, int width, int height, int stride, void *data) +{ + __DRIscreen *sPriv = dPriv->driScreenPriv; + const __DRIswrastLoaderExtension *loader = sPriv->swrast_loader; + + /* getImage2 support is only in version 3 or newer */ + if (loader->base.version < 3) + return; + + loader->getImage2(dPriv, + x, y, width, height, stride, + data, dPriv->loaderPrivate); +} + static void drisw_update_drawable_info(struct dri_drawable *drawable) { @@ -105,6 +120,18 @@ drisw_update_drawable_info(struct dri_drawable *drawable) } static void +drisw_get_image(struct dri_drawable *drawable, + int x, int y, unsigned width, unsigned height, unsigned stride, + void *data) +{ + __DRIdrawable *dPriv = drawable->dPriv; + int draw_x, draw_y, draw_w, draw_h; + + get_drawable_info(dPriv, &draw_x, &draw_y, &draw_w, &draw_h); + get_image2(dPriv, x, y, draw_w, draw_h, stride, data); +} + +static void drisw_put_image(struct dri_drawable *drawable, void *data, unsigned width, unsigned height) { @@ -236,6 +263,7 @@ drisw_allocate_textures(struct dri_context *stctx, unsigned count) { struct dri_screen *screen = dri_screen(drawable->sPriv); + const __DRIswrastLoaderExtension *loader = drawable->dPriv->driScreenPriv->swrast_loader; struct pipe_resource templ; unsigned width, height; boolean resized; @@ -281,8 +309,14 @@ drisw_allocate_textures(struct dri_context *stctx, templ.format = format; templ.bind = bind; - drawable->textures[statts[i]] = - screen->base.screen->resource_create(screen->base.screen, &templ); + if (statts[i] == ST_ATTACHMENT_FRONT_LEFT && + screen->base.screen->resource_create_front && + loader->base.version >= 3) { + drawable->textures[statts[i]] = + screen->base.screen->resource_create_front(screen->base.screen, &templ, (const void *)drawable); + } else + drawable->textures[statts[i]] = + screen->base.screen->resource_create(screen->base.screen, &templ); } drawable->old_w = width; @@ -338,6 +372,7 @@ static const __DRIextension *drisw_screen_extensions[] = { }; static struct drisw_loader_funcs drisw_lf = { + .get_image = drisw_get_image, .put_image = drisw_put_image, .put_image2 = drisw_put_image2 }; diff --git a/src/gallium/state_trackers/va/Makefile.am b/src/gallium/state_trackers/va/Makefile.am index 2a93a904346..348cfe17759 100644 --- a/src/gallium/state_trackers/va/Makefile.am +++ b/src/gallium/state_trackers/va/Makefile.am @@ -30,6 +30,15 @@ AM_CFLAGS = \ $(VA_CFLAGS) \ -DVA_DRIVER_INIT_FUNC="__vaDriverInit_$(VA_MAJOR)_$(VA_MINOR)" +AM_CFLAGS += \ + $(GALLIUM_PIPE_LOADER_DEFINES) \ + -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" + +if HAVE_GALLIUM_STATIC_TARGETS +AM_CFLAGS += \ + -DGALLIUM_STATIC_TARGETS=1 +endif + AM_CPPFLAGS = \ -I$(top_srcdir)/include diff --git a/src/gallium/state_trackers/va/buffer.c b/src/gallium/state_trackers/va/buffer.c index 8f9ba440a75..71a65037757 100644 --- a/src/gallium/state_trackers/va/buffer.c +++ b/src/gallium/state_trackers/va/buffer.c @@ -26,8 +26,12 @@ * **************************************************************************/ +#include "pipe/p_screen.h" +#include "state_tracker/drm_driver.h" #include "util/u_memory.h" #include "util/u_handle_table.h" +#include "util/u_transfer.h" +#include "vl/vl_winsys.h" #include "va_private.h" @@ -73,6 +77,12 @@ vlVaBufferSetNumElements(VADriverContextP ctx, VABufferID buf_id, return VA_STATUS_ERROR_INVALID_CONTEXT; buf = handle_table_get(VL_VA_DRIVER(ctx)->htab, buf_id); + if (!buf) + return VA_STATUS_ERROR_INVALID_BUFFER; + + if (buf->derived_surface.resource) + return VA_STATUS_ERROR_INVALID_BUFFER; + buf->data = REALLOC(buf->data, buf->size * buf->num_elements, buf->size * num_elements); buf->num_elements = num_elements; @@ -86,16 +96,37 @@ vlVaBufferSetNumElements(VADriverContextP ctx, VABufferID buf_id, VAStatus vlVaMapBuffer(VADriverContextP ctx, VABufferID buf_id, void **pbuff) { + vlVaDriver *drv; vlVaBuffer *buf; if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; - buf = handle_table_get(VL_VA_DRIVER(ctx)->htab, buf_id); + drv = VL_VA_DRIVER(ctx); + if (!drv) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + if (!pbuff) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + buf = handle_table_get(drv->htab, buf_id); if (!buf) return VA_STATUS_ERROR_INVALID_BUFFER; - *pbuff = buf->data; + if (buf->export_refcount > 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + + if (buf->derived_surface.resource) { + *pbuff = pipe_buffer_map(drv->pipe, buf->derived_surface.resource, + PIPE_TRANSFER_WRITE, + &buf->derived_surface.transfer); + + if (!buf->derived_surface.transfer || !*pbuff) + return VA_STATUS_ERROR_INVALID_BUFFER; + + } else { + *pbuff = buf->data; + } return VA_STATUS_SUCCESS; } @@ -103,16 +134,30 @@ vlVaMapBuffer(VADriverContextP ctx, VABufferID buf_id, void **pbuff) VAStatus vlVaUnmapBuffer(VADriverContextP ctx, VABufferID buf_id) { + vlVaDriver *drv; vlVaBuffer *buf; if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; - buf = handle_table_get(VL_VA_DRIVER(ctx)->htab, buf_id); + drv = VL_VA_DRIVER(ctx); + if (!drv) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + buf = handle_table_get(drv->htab, buf_id); if (!buf) return VA_STATUS_ERROR_INVALID_BUFFER; - /* Nothing to do here */ + if (buf->export_refcount > 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + + if (buf->derived_surface.resource) { + if (!buf->derived_surface.transfer) + return VA_STATUS_ERROR_INVALID_BUFFER; + + pipe_buffer_unmap(drv->pipe, buf->derived_surface.transfer); + buf->derived_surface.transfer = NULL; + } return VA_STATUS_SUCCESS; } @@ -129,6 +174,13 @@ vlVaDestroyBuffer(VADriverContextP ctx, VABufferID buf_id) if (!buf) return VA_STATUS_ERROR_INVALID_BUFFER; + if (buf->derived_surface.resource) { + if (buf->export_refcount > 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + + pipe_resource_reference(&buf->derived_surface.resource, NULL); + } + FREE(buf->data); FREE(buf); handle_table_remove(VL_VA_DRIVER(ctx)->htab, buf_id); @@ -155,3 +207,126 @@ vlVaBufferInfo(VADriverContextP ctx, VABufferID buf_id, VABufferType *type, return VA_STATUS_SUCCESS; } + +VAStatus +vlVaAcquireBufferHandle(VADriverContextP ctx, VABufferID buf_id, + VABufferInfo *out_buf_info) +{ + uint32_t i; + uint32_t mem_type; + vlVaBuffer *buf ; + struct pipe_screen *screen; + + /* List of supported memory types, in preferred order. */ + static const uint32_t mem_types[] = { + VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME, + 0 + }; + + if (!ctx) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + buf = handle_table_get(VL_VA_DRIVER(ctx)->htab, buf_id); + + if (!buf) + return VA_STATUS_ERROR_INVALID_BUFFER; + + /* Only VA surface|image like buffers are supported for now .*/ + if (buf->type != VAImageBufferType) + return VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE; + + if (!out_buf_info) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + if (!out_buf_info->mem_type) + mem_type = mem_types[0]; + else { + mem_type = 0; + for (i = 0; mem_types[i] != 0; i++) { + if (out_buf_info->mem_type & mem_types[i]) { + mem_type = out_buf_info->mem_type; + break; + } + } + if (!mem_type) + return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE; + } + + if (!buf->derived_surface.resource) + return VA_STATUS_ERROR_INVALID_BUFFER; + + screen = VL_VA_PSCREEN(ctx); + + if (buf->derived_surface.fence) { + screen->fence_finish(screen, buf->derived_surface.fence, PIPE_TIMEOUT_INFINITE); + screen->fence_reference(screen, &buf->derived_surface.fence, NULL); + } + + if (buf->export_refcount > 0) { + if (buf->export_state.mem_type != mem_type) + return VA_STATUS_ERROR_INVALID_PARAMETER; + } else { + VABufferInfo * const buf_info = &buf->export_state; + + switch (mem_type) { + case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: { + struct winsys_handle whandle; + + memset(&whandle, 0, sizeof(whandle)); + whandle.type = DRM_API_HANDLE_TYPE_FD; + + if (!screen->resource_get_handle(screen, buf->derived_surface.resource, &whandle)) + return VA_STATUS_ERROR_INVALID_BUFFER; + + buf_info->handle = (intptr_t)whandle.handle; + break; + default: + return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE; + } + } + + buf_info->type = buf->type; + buf_info->mem_type = mem_type; + buf_info->mem_size = buf->num_elements * buf->size; + + } + + buf->export_refcount++; + + *out_buf_info = buf->export_state; + + return VA_STATUS_SUCCESS; +} + +VAStatus +vlVaReleaseBufferHandle(VADriverContextP ctx, VABufferID buf_id) +{ + vlVaBuffer *buf; + + if (!ctx) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + buf = handle_table_get(VL_VA_DRIVER(ctx)->htab, buf_id); + + if (!buf) + return VA_STATUS_ERROR_INVALID_BUFFER; + + if (buf->export_refcount == 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + + if (--buf->export_refcount == 0) { + VABufferInfo * const buf_info = &buf->export_state; + + switch (buf_info->mem_type) { + case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: + close((intptr_t)buf_info->handle); + break; + default: + return VA_STATUS_ERROR_INVALID_BUFFER; + } + + buf_info->mem_type = 0; + } + + return VA_STATUS_SUCCESS; +} diff --git a/src/gallium/state_trackers/va/config.c b/src/gallium/state_trackers/va/config.c index cfb0b25b71f..0f47aacdbd6 100644 --- a/src/gallium/state_trackers/va/config.c +++ b/src/gallium/state_trackers/va/config.c @@ -45,13 +45,16 @@ vlVaQueryConfigProfiles(VADriverContextP ctx, VAProfile *profile_list, int *num_ *num_profiles = 0; pscreen = VL_VA_PSCREEN(ctx); - for (p = PIPE_VIDEO_PROFILE_MPEG2_SIMPLE; p <= PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH; ++p) + for (p = PIPE_VIDEO_PROFILE_MPEG2_SIMPLE; p <= PIPE_VIDEO_PROFILE_HEVC_MAIN_444; ++p) if (pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_SUPPORTED)) { vap = PipeToProfile(p); if (vap != VAProfileNone) profile_list[(*num_profiles)++] = vap; } + /* Support postprocessing through vl_compositor */ + profile_list[(*num_profiles)++] = VAProfileNone; + return VA_STATUS_SUCCESS; } @@ -67,6 +70,11 @@ vlVaQueryConfigEntrypoints(VADriverContextP ctx, VAProfile profile, *num_entrypoints = 0; + if (profile == VAProfileNone) { + entrypoint_list[(*num_entrypoints)++] = VAEntrypointVideoProc; + return VA_STATUS_SUCCESS; + } + p = ProfileToPipe(profile); if (p == PIPE_VIDEO_PROFILE_UNKNOWN) return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; @@ -118,6 +126,11 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoin if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; + if (profile == VAProfileNone && entrypoint == VAEntrypointVideoProc) { + *config_id = PIPE_VIDEO_PROFILE_UNKNOWN; + return VA_STATUS_SUCCESS; + } + p = ProfileToPipe(profile); if (p == PIPE_VIDEO_PROFILE_UNKNOWN) return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; @@ -151,6 +164,13 @@ vlVaQueryConfigAttributes(VADriverContextP ctx, VAConfigID config_id, VAProfile return VA_STATUS_ERROR_INVALID_CONTEXT; *profile = PipeToProfile(config_id); + + if (config_id == PIPE_VIDEO_PROFILE_UNKNOWN) { + *entrypoint = VAEntrypointVideoProc; + *num_attribs = 0; + return VA_STATUS_SUCCESS; + } + *entrypoint = VAEntrypointVLD; *num_attribs = 1; diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c index 8b003aedaec..ec9e0488d85 100644 --- a/src/gallium/state_trackers/va/context.c +++ b/src/gallium/state_trackers/va/context.c @@ -28,7 +28,8 @@ #include "pipe/p_screen.h" #include "pipe/p_video_codec.h" - +#include "pipe-loader/pipe_loader.h" +#include "state_tracker/drm_driver.h" #include "util/u_memory.h" #include "util/u_handle_table.h" #include "util/u_video.h" @@ -36,6 +37,8 @@ #include "va_private.h" +#include <va/va_drmcommon.h> + static struct VADriverVTable vtable = { &vlVaTerminate, @@ -81,13 +84,28 @@ static struct VADriverVTable vtable = &vlVaSetDisplayAttributes, &vlVaBufferInfo, &vlVaLockSurface, - &vlVaUnlockSurface + &vlVaUnlockSurface, + NULL, /* DEPRECATED VaGetSurfaceAttributes */ + &vlVaCreateSurfaces2, + &vlVaQuerySurfaceAttributes, + &vlVaAcquireBufferHandle, + &vlVaReleaseBufferHandle +}; + +static struct VADriverVTableVPP vtable_vpp = +{ + 1, + &vlVaQueryVideoProcFilters, + &vlVaQueryVideoProcFilterCaps, + &vlVaQueryVideoProcPipelineCaps }; PUBLIC VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP ctx) { vlVaDriver *drv; + int drm_fd; + struct drm_state *drm_info; if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; @@ -96,9 +114,56 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx) if (!drv) return VA_STATUS_ERROR_ALLOCATION_FAILED; - drv->vscreen = vl_screen_create(ctx->native_dpy, ctx->x11_screen); - if (!drv->vscreen) - goto error_screen; + switch (ctx->display_type) { + case VA_DISPLAY_ANDROID: + case VA_DISPLAY_WAYLAND: + FREE(drv); + return VA_STATUS_ERROR_UNIMPLEMENTED; + case VA_DISPLAY_GLX: + case VA_DISPLAY_X11: + drv->vscreen = vl_screen_create(ctx->native_dpy, ctx->x11_screen); + if (!drv->vscreen) + goto error_screen; + break; + case VA_DISPLAY_DRM: + case VA_DISPLAY_DRM_RENDERNODES: { + drm_info = (struct drm_state *) ctx->drm_state; + if (!drm_info) { + FREE(drv); + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + +#if GALLIUM_STATIC_TARGETS + drm_fd = drm_info->fd; +#else + drm_fd = dup(drm_info->fd); +#endif + + if (drm_fd < 0) { + FREE(drv); + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + + drv->vscreen = CALLOC_STRUCT(vl_screen); + if (!drv->vscreen) + goto error_screen; + +#if GALLIUM_STATIC_TARGETS + drv->vscreen->pscreen = dd_create_screen(drm_fd); +#else + if (pipe_loader_drm_probe_fd(&drv->dev, drm_fd)) + drv->vscreen->pscreen = pipe_loader_create_screen(drv->dev, PIPE_SEARCH_DIR); +#endif + + if (!drv->vscreen->pscreen) + goto error_pipe; + + } + break; + default: + FREE(drv); + return VA_STATUS_ERROR_INVALID_DISPLAY; + } drv->pipe = drv->vscreen->pscreen->context_create(drv->vscreen->pscreen, drv->vscreen, 0); @@ -119,6 +184,7 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx) ctx->version_major = 0; ctx->version_minor = 1; *ctx->vtable = vtable; + *ctx->vtable_vpp = vtable_vpp; ctx->max_profiles = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH - PIPE_VIDEO_PROFILE_UNKNOWN; ctx->max_entrypoints = 1; ctx->max_attributes = 1; @@ -133,7 +199,10 @@ error_htab: drv->pipe->destroy(drv->pipe); error_pipe: - vl_screen_destroy(drv->vscreen); + if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11) + vl_screen_destroy(drv->vscreen); + else + FREE(drv->vscreen); error_screen: FREE(drv); @@ -148,11 +217,15 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID config_id, int picture_width, struct pipe_video_codec templat = {}; vlVaDriver *drv; vlVaContext *context; + int is_vpp; if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; - if (!(picture_width && picture_height)) + is_vpp = config_id == PIPE_VIDEO_PROFILE_UNKNOWN && !picture_width && + !picture_height && !flag && !render_targets && !num_render_targets; + + if (!(picture_width && picture_height) && !is_vpp) return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT; drv = VL_VA_DRIVER(ctx); @@ -160,38 +233,61 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID config_id, int picture_width, if (!context) return VA_STATUS_ERROR_ALLOCATION_FAILED; - templat.profile = config_id; - templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM; - templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; - templat.width = picture_width; - templat.height = picture_height; - templat.max_references = num_render_targets; - templat.expect_chunked_decode = true; - - if (u_reduce_video_profile(templat.profile) == - PIPE_VIDEO_FORMAT_MPEG4_AVC) - templat.level = u_get_h264_level(templat.width, templat.height, - &templat.max_references); - - context->decoder = drv->pipe->create_video_codec(drv->pipe, &templat); - if (!context->decoder) { - FREE(context); - return VA_STATUS_ERROR_ALLOCATION_FAILED; - } - - if (u_reduce_video_profile(context->decoder->profile) == - PIPE_VIDEO_FORMAT_MPEG4_AVC) { - context->desc.h264.pps = CALLOC_STRUCT(pipe_h264_pps); - if (!context->desc.h264.pps) { + if (is_vpp) { + context->decoder = NULL; + if (!drv->compositor.upload) { FREE(context); - return VA_STATUS_ERROR_ALLOCATION_FAILED; + return VA_STATUS_ERROR_INVALID_CONTEXT; } - context->desc.h264.pps->sps = CALLOC_STRUCT(pipe_h264_sps); - if (!context->desc.h264.pps->sps) { - FREE(context->desc.h264.pps); + } else { + templat.profile = config_id; + templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM; + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.width = picture_width; + templat.height = picture_height; + templat.max_references = num_render_targets; + templat.expect_chunked_decode = true; + + if (u_reduce_video_profile(templat.profile) == + PIPE_VIDEO_FORMAT_MPEG4_AVC) + templat.level = u_get_h264_level(templat.width, templat.height, + &templat.max_references); + + context->decoder = drv->pipe->create_video_codec(drv->pipe, &templat); + if (!context->decoder) { FREE(context); return VA_STATUS_ERROR_ALLOCATION_FAILED; } + + if (u_reduce_video_profile(context->decoder->profile) == + PIPE_VIDEO_FORMAT_MPEG4_AVC) { + context->desc.h264.pps = CALLOC_STRUCT(pipe_h264_pps); + if (!context->desc.h264.pps) { + FREE(context); + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + context->desc.h264.pps->sps = CALLOC_STRUCT(pipe_h264_sps); + if (!context->desc.h264.pps->sps) { + FREE(context->desc.h264.pps); + FREE(context); + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + } + + if (u_reduce_video_profile(context->decoder->profile) == + PIPE_VIDEO_FORMAT_HEVC) { + context->desc.h265.pps = CALLOC_STRUCT(pipe_h265_pps); + if (!context->desc.h265.pps) { + FREE(context); + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + context->desc.h265.pps->sps = CALLOC_STRUCT(pipe_h265_sps); + if (!context->desc.h265.pps->sps) { + FREE(context->desc.h265.pps); + FREE(context); + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + } } context->desc.base.profile = config_id; @@ -211,12 +307,20 @@ vlVaDestroyContext(VADriverContextP ctx, VAContextID context_id) drv = VL_VA_DRIVER(ctx); context = handle_table_get(drv->htab, context_id); - if (u_reduce_video_profile(context->decoder->profile) == - PIPE_VIDEO_FORMAT_MPEG4_AVC) { - FREE(context->desc.h264.pps->sps); - FREE(context->desc.h264.pps); + + if (context->decoder) { + if (u_reduce_video_profile(context->decoder->profile) == + PIPE_VIDEO_FORMAT_MPEG4_AVC) { + FREE(context->desc.h264.pps->sps); + FREE(context->desc.h264.pps); + } + if (u_reduce_video_profile(context->decoder->profile) == + PIPE_VIDEO_FORMAT_HEVC) { + FREE(context->desc.h265.pps->sps); + FREE(context->desc.h265.pps); + } + context->decoder->destroy(context->decoder); } - context->decoder->destroy(context->decoder); FREE(context); handle_table_remove(drv->htab, context_id); @@ -235,7 +339,10 @@ vlVaTerminate(VADriverContextP ctx) vl_compositor_cleanup_state(&drv->cstate); vl_compositor_cleanup(&drv->compositor); drv->pipe->destroy(drv->pipe); - vl_screen_destroy(drv->vscreen); + if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11) + vl_screen_destroy(drv->vscreen); + else + FREE(drv->vscreen); handle_table_destroy(drv->htab); FREE(drv); diff --git a/src/gallium/state_trackers/va/image.c b/src/gallium/state_trackers/va/image.c index b37a9714437..c6d0c5abf65 100644 --- a/src/gallium/state_trackers/va/image.c +++ b/src/gallium/state_trackers/va/image.c @@ -37,14 +37,21 @@ #include "va_private.h" -static const VAImageFormat formats[VL_VA_MAX_IMAGE_FORMATS] = +static const VAImageFormat formats[] = { {VA_FOURCC('N','V','1','2')}, {VA_FOURCC('I','4','2','0')}, {VA_FOURCC('Y','V','1','2')}, {VA_FOURCC('Y','U','Y','V')}, {VA_FOURCC('U','Y','V','Y')}, - {VA_FOURCC('B','G','R','A')} + {.fourcc = VA_FOURCC('B','G','R','A'), .byte_order = VA_LSB_FIRST, 32, 32, + 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000}, + {.fourcc = VA_FOURCC('R','G','B','A'), .byte_order = VA_LSB_FIRST, 32, 32, + 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000}, + {.fourcc = VA_FOURCC('B','G','R','X'), .byte_order = VA_LSB_FIRST, 32, 24, + 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000}, + {.fourcc = VA_FOURCC('R','G','B','X'), .byte_order = VA_LSB_FIRST, 32, 24, + 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000} }; static void @@ -72,6 +79,8 @@ vlVaQueryImageFormats(VADriverContextP ctx, VAImageFormat *format_list, int *num enum pipe_format format; int i; + STATIC_ASSERT(ARRAY_SIZE(formats) == VL_VA_MAX_IMAGE_FORMATS); + if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; @@ -80,8 +89,8 @@ vlVaQueryImageFormats(VADriverContextP ctx, VAImageFormat *format_list, int *num *num_formats = 0; pscreen = VL_VA_PSCREEN(ctx); - for (i = 0; i < VL_VA_MAX_IMAGE_FORMATS; ++i) { - format = YCbCrToPipe(formats[i].fourcc); + for (i = 0; i < ARRAY_SIZE(formats); ++i) { + format = VaFourccToPipeFormat(formats[i].fourcc); if (pscreen->is_video_format_supported(pscreen, format, PIPE_VIDEO_PROFILE_UNKNOWN, PIPE_VIDEO_ENTRYPOINT_BITSTREAM)) @@ -149,6 +158,9 @@ vlVaCreateImage(VADriverContextP ctx, VAImageFormat *format, int width, int heig break; case VA_FOURCC('B','G','R','A'): + case VA_FOURCC('R','G','B','A'): + case VA_FOURCC('B','G','R','X'): + case VA_FOURCC('R','G','B','X'): img->num_planes = 1; img->pitches[0] = w * 4; img->offsets[0] = 0; @@ -172,10 +184,97 @@ vlVaCreateImage(VADriverContextP ctx, VAImageFormat *format, int width, int heig VAStatus vlVaDeriveImage(VADriverContextP ctx, VASurfaceID surface, VAImage *image) { + vlVaDriver *drv; + vlVaSurface *surf; + vlVaBuffer *img_buf; + VAImage *img; + struct pipe_surface **surfaces; + int w; + int h; + int i; + if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; - return VA_STATUS_ERROR_UNIMPLEMENTED; + drv = VL_VA_DRIVER(ctx); + + if (!drv) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + surf = handle_table_get(drv->htab, surface); + + if (!surf || !surf->buffer || surf->buffer->interlaced) + return VA_STATUS_ERROR_INVALID_SURFACE; + + surfaces = surf->buffer->get_surfaces(surf->buffer); + if (!surfaces || !surfaces[0]->texture) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + + img = CALLOC(1, sizeof(VAImage)); + if (!img) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + + img->format.fourcc = PipeFormatToVaFourcc(surf->buffer->buffer_format); + img->buf = VA_INVALID_ID; + img->width = surf->buffer->width; + img->height = surf->buffer->height; + img->num_palette_entries = 0; + img->entry_bytes = 0; + w = align(surf->buffer->width, 2); + h = align(surf->buffer->height, 2); + + for (i = 0; i < ARRAY_SIZE(formats); ++i) { + if (img->format.fourcc == formats[i].fourcc) { + img->format = formats[i]; + break; + } + } + + switch (img->format.fourcc) { + case VA_FOURCC('U','Y','V','Y'): + case VA_FOURCC('Y','U','Y','V'): + img->num_planes = 1; + img->pitches[0] = w * 2; + img->offsets[0] = 0; + img->data_size = w * h * 2; + break; + + case VA_FOURCC('B','G','R','A'): + case VA_FOURCC('R','G','B','A'): + case VA_FOURCC('B','G','R','X'): + case VA_FOURCC('R','G','B','X'): + img->num_planes = 1; + img->pitches[0] = w * 4; + img->offsets[0] = 0; + img->data_size = w * h * 4; + break; + + default: + /* VaDeriveImage is designed for contiguous planes. */ + FREE(img); + return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT; + } + + img_buf = CALLOC(1, sizeof(vlVaBuffer)); + if (!img_buf) { + FREE(img); + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + + img->image_id = handle_table_add(drv->htab, img); + + img_buf->type = VAImageBufferType; + img_buf->size = image->data_size; + img_buf->num_elements = 1; + img_buf->derived_surface.fence = surf->fence; + + pipe_resource_reference(&img_buf->derived_surface.resource, surfaces[0]->texture); + + img->buf = handle_table_add(VL_VA_DRIVER(ctx)->htab, img_buf); + + *image = *img; + + return VA_STATUS_SUCCESS; } VAStatus @@ -235,7 +334,7 @@ vlVaGetImage(VADriverContextP ctx, VASurfaceID surface, int x, int y, if (!img_buf) return VA_STATUS_ERROR_INVALID_BUFFER; - format = YCbCrToPipe(vaimage->format.fourcc); + format = VaFourccToPipeFormat(vaimage->format.fourcc); if (format == PIPE_FORMAT_NONE) return VA_STATUS_ERROR_OPERATION_FAILED; @@ -330,17 +429,30 @@ vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, VAImageID image, if (!img_buf) return VA_STATUS_ERROR_INVALID_BUFFER; - format = YCbCrToPipe(vaimage->format.fourcc); + if (img_buf->derived_surface.resource) { + /* Attempting to transfer derived image to surface */ + return VA_STATUS_ERROR_UNIMPLEMENTED; + } + + format = VaFourccToPipeFormat(vaimage->format.fourcc); + if (format == PIPE_FORMAT_NONE) return VA_STATUS_ERROR_OPERATION_FAILED; - if (surf->buffer == NULL || format != surf->buffer->buffer_format) { - if (surf->buffer) - surf->buffer->destroy(surf->buffer); + if (format != surf->buffer->buffer_format) { + struct pipe_video_buffer *tmp_buf; + enum pipe_format old_surf_format = surf->templat.buffer_format; + surf->templat.buffer_format = format; - surf->buffer = drv->pipe->create_video_buffer(drv->pipe, &surf->templat); - if (!surf->buffer) - return VA_STATUS_ERROR_ALLOCATION_FAILED; + tmp_buf = drv->pipe->create_video_buffer(drv->pipe, &surf->templat); + + if (!tmp_buf) { + surf->templat.buffer_format = old_surf_format; + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + + surf->buffer->destroy(surf->buffer); + surf->buffer = tmp_buf; } views = surf->buffer->get_sampler_view_planes(surf->buffer); diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c index 9b94b397b07..e850689005d 100644 --- a/src/gallium/state_trackers/va/picture.c +++ b/src/gallium/state_trackers/va/picture.c @@ -32,6 +32,7 @@ #include "util/u_video.h" #include "vl/vl_vlc.h" +#include "vl/vl_winsys.h" #include "va_private.h" @@ -58,7 +59,17 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID rende return VA_STATUS_ERROR_INVALID_SURFACE; context->target = surf->buffer; - context->decoder->begin_frame(context->decoder, context->target, NULL); + + if (!context->decoder) { + /* VPP */ + if ((context->target->buffer_format != PIPE_FORMAT_B8G8R8A8_UNORM && + context->target->buffer_format != PIPE_FORMAT_R8G8B8A8_UNORM) || + context->target->interlaced) + return VA_STATUS_ERROR_UNIMPLEMENTED; + return VA_STATUS_SUCCESS; + } + + context->decoder->begin_frame(context->decoder, context->target, &context->desc.base); return VA_STATUS_SUCCESS; } @@ -81,6 +92,7 @@ handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext *context, vlVaBuffer * VAPictureParameterBufferH264 *h264; VAPictureParameterBufferVC1 * vc1; VAPictureParameterBufferMPEG4 *mpeg4; + VAPictureParameterBufferHEVC *hevc; vlVaSurface *surf_forward; vlVaSurface *surf_backward; unsigned int i; @@ -286,6 +298,157 @@ handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext *context, vlVaBuffer * break; + case PIPE_VIDEO_FORMAT_HEVC: + assert(buf->size >= sizeof(VAPictureParameterBufferHEVC) && buf->num_elements == 1); + hevc = buf->data; + context->desc.h265.pps->sps->chroma_format_idc = hevc->pic_fields.bits.chroma_format_idc; + context->desc.h265.pps->sps->separate_colour_plane_flag = + hevc->pic_fields.bits.separate_colour_plane_flag; + context->desc.h265.pps->sps->pic_width_in_luma_samples = hevc->pic_width_in_luma_samples; + context->desc.h265.pps->sps->pic_height_in_luma_samples = hevc->pic_height_in_luma_samples; + context->desc.h265.pps->sps->bit_depth_luma_minus8 = hevc->bit_depth_luma_minus8; + context->desc.h265.pps->sps->bit_depth_chroma_minus8 = hevc->bit_depth_chroma_minus8; + context->desc.h265.pps->sps->log2_max_pic_order_cnt_lsb_minus4 = + hevc->log2_max_pic_order_cnt_lsb_minus4; + context->desc.h265.pps->sps->sps_max_dec_pic_buffering_minus1 = + hevc->sps_max_dec_pic_buffering_minus1; + context->desc.h265.pps->sps->log2_min_luma_coding_block_size_minus3 = + hevc->log2_min_luma_coding_block_size_minus3; + context->desc.h265.pps->sps->log2_diff_max_min_luma_coding_block_size = + hevc->log2_diff_max_min_luma_coding_block_size; + context->desc.h265.pps->sps->log2_min_transform_block_size_minus2 = + hevc->log2_min_transform_block_size_minus2; + context->desc.h265.pps->sps->log2_diff_max_min_transform_block_size = + hevc->log2_diff_max_min_transform_block_size; + context->desc.h265.pps->sps->max_transform_hierarchy_depth_inter = + hevc->max_transform_hierarchy_depth_inter; + context->desc.h265.pps->sps->max_transform_hierarchy_depth_intra = + hevc->max_transform_hierarchy_depth_intra; + context->desc.h265.pps->sps->scaling_list_enabled_flag = + hevc->pic_fields.bits.scaling_list_enabled_flag; + context->desc.h265.pps->sps->amp_enabled_flag = hevc->pic_fields.bits.amp_enabled_flag; + context->desc.h265.pps->sps->sample_adaptive_offset_enabled_flag = + hevc->slice_parsing_fields.bits.sample_adaptive_offset_enabled_flag; + context->desc.h265.pps->sps->pcm_enabled_flag = hevc->pic_fields.bits.pcm_enabled_flag; + if (hevc->pic_fields.bits.pcm_enabled_flag == 1) { + context->desc.h265.pps->sps->pcm_sample_bit_depth_luma_minus1 = + hevc->pcm_sample_bit_depth_luma_minus1; + context->desc.h265.pps->sps->pcm_sample_bit_depth_chroma_minus1 = + hevc->pcm_sample_bit_depth_chroma_minus1; + context->desc.h265.pps->sps->log2_min_pcm_luma_coding_block_size_minus3 = + hevc->log2_min_pcm_luma_coding_block_size_minus3; + context->desc.h265.pps->sps->log2_diff_max_min_pcm_luma_coding_block_size = + hevc->log2_diff_max_min_pcm_luma_coding_block_size; + context->desc.h265.pps->sps->pcm_loop_filter_disabled_flag = + hevc->pic_fields.bits.pcm_loop_filter_disabled_flag; + } + context->desc.h265.pps->sps->num_short_term_ref_pic_sets = hevc->num_short_term_ref_pic_sets; + context->desc.h265.pps->sps->long_term_ref_pics_present_flag = + hevc->slice_parsing_fields.bits.long_term_ref_pics_present_flag; + context->desc.h265.pps->sps->num_long_term_ref_pics_sps = hevc->num_long_term_ref_pic_sps; + context->desc.h265.pps->sps->sps_temporal_mvp_enabled_flag = + hevc->slice_parsing_fields.bits.sps_temporal_mvp_enabled_flag; + context->desc.h265.pps->sps->strong_intra_smoothing_enabled_flag = + hevc->pic_fields.bits.strong_intra_smoothing_enabled_flag; + + context->desc.h265.pps->dependent_slice_segments_enabled_flag = + hevc->slice_parsing_fields.bits.dependent_slice_segments_enabled_flag; + context->desc.h265.pps->output_flag_present_flag = + hevc->slice_parsing_fields.bits.output_flag_present_flag; + context->desc.h265.pps->num_extra_slice_header_bits = hevc->num_extra_slice_header_bits; + context->desc.h265.pps->sign_data_hiding_enabled_flag = + hevc->pic_fields.bits.sign_data_hiding_enabled_flag; + context->desc.h265.pps->cabac_init_present_flag = + hevc->slice_parsing_fields.bits.cabac_init_present_flag; + context->desc.h265.pps->num_ref_idx_l0_default_active_minus1 = + hevc->num_ref_idx_l0_default_active_minus1; + context->desc.h265.pps->num_ref_idx_l1_default_active_minus1 = + hevc->num_ref_idx_l1_default_active_minus1; + context->desc.h265.pps->init_qp_minus26 = hevc->init_qp_minus26; + context->desc.h265.pps->constrained_intra_pred_flag = + hevc->pic_fields.bits.constrained_intra_pred_flag; + context->desc.h265.pps->transform_skip_enabled_flag = + hevc->pic_fields.bits.transform_skip_enabled_flag; + context->desc.h265.pps->cu_qp_delta_enabled_flag = + hevc->pic_fields.bits.cu_qp_delta_enabled_flag; + context->desc.h265.pps->diff_cu_qp_delta_depth = hevc->diff_cu_qp_delta_depth; + context->desc.h265.pps->pps_cb_qp_offset = hevc->pps_cb_qp_offset; + context->desc.h265.pps->pps_cr_qp_offset = hevc->pps_cr_qp_offset; + context->desc.h265.pps->pps_slice_chroma_qp_offsets_present_flag = + hevc->slice_parsing_fields.bits.pps_slice_chroma_qp_offsets_present_flag; + context->desc.h265.pps->weighted_pred_flag = hevc->pic_fields.bits.weighted_pred_flag; + context->desc.h265.pps->weighted_bipred_flag = hevc->pic_fields.bits.weighted_bipred_flag; + context->desc.h265.pps->transquant_bypass_enabled_flag = + hevc->pic_fields.bits.transquant_bypass_enabled_flag; + context->desc.h265.pps->tiles_enabled_flag = hevc->pic_fields.bits.tiles_enabled_flag; + context->desc.h265.pps->entropy_coding_sync_enabled_flag = + hevc->pic_fields.bits.entropy_coding_sync_enabled_flag; + if (hevc->pic_fields.bits.tiles_enabled_flag == 1) { + context->desc.h265.pps->num_tile_columns_minus1 = hevc->num_tile_columns_minus1; + context->desc.h265.pps->num_tile_rows_minus1 = hevc->num_tile_rows_minus1; + for (i = 0 ; i < 19 ; i++) + context->desc.h265.pps->column_width_minus1[i] = hevc->column_width_minus1[i]; + for (i = 0 ; i < 21 ; i++) + context->desc.h265.pps->row_height_minus1[i] = hevc->row_height_minus1[i]; + context->desc.h265.pps->loop_filter_across_tiles_enabled_flag = + hevc->pic_fields.bits.loop_filter_across_tiles_enabled_flag; + } + context->desc.h265.pps->pps_loop_filter_across_slices_enabled_flag = + hevc->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag; + context->desc.h265.pps->deblocking_filter_override_enabled_flag = + hevc->slice_parsing_fields.bits.deblocking_filter_override_enabled_flag; + context->desc.h265.pps->pps_deblocking_filter_disabled_flag = + hevc->slice_parsing_fields.bits.pps_disable_deblocking_filter_flag; + context->desc.h265.pps->pps_beta_offset_div2 = hevc->pps_beta_offset_div2; + context->desc.h265.pps->pps_tc_offset_div2 = hevc->pps_tc_offset_div2; + context->desc.h265.pps->lists_modification_present_flag = + hevc->slice_parsing_fields.bits.lists_modification_present_flag; + context->desc.h265.pps->log2_parallel_merge_level_minus2 = + hevc->log2_parallel_merge_level_minus2; + context->desc.h265.pps->slice_segment_header_extension_present_flag = + hevc->slice_parsing_fields.bits.slice_segment_header_extension_present_flag; + + context->desc.h265.IDRPicFlag = hevc->slice_parsing_fields.bits.IdrPicFlag; + context->desc.h265.RAPPicFlag = hevc->slice_parsing_fields.bits.RapPicFlag; + + context->desc.h265.CurrPicOrderCntVal = hevc->CurrPic.pic_order_cnt; + + for (i = 0 ; i < 8 ; i++) { + context->desc.h265.RefPicSetStCurrBefore[i] = 0xFF; + context->desc.h265.RefPicSetStCurrAfter[i] = 0xFF; + context->desc.h265.RefPicSetLtCurr[i] = 0xFF; + } + context->desc.h265.NumPocStCurrBefore = 0; + context->desc.h265.NumPocStCurrAfter = 0; + context->desc.h265.NumPocLtCurr = 0; + unsigned int iBefore = 0; + unsigned int iAfter = 0; + unsigned int iCurr = 0; + for (i = 0 ; i < 15 ; i++) { + context->desc.h265.PicOrderCntVal[i] = hevc->ReferenceFrames[i].pic_order_cnt; + + unsigned int index = hevc->ReferenceFrames[i].picture_id & 0x7F; + + if (index == 0x7F) + continue; + + getReferenceFrame(drv, hevc->ReferenceFrames[i].picture_id, &context->desc.h265.ref[i]); + + if ((hevc->ReferenceFrames[i].flags & VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE) && (iBefore < 8)) { + context->desc.h265.RefPicSetStCurrBefore[iBefore++] = i; + context->desc.h265.NumPocStCurrBefore++; + } + if ((hevc->ReferenceFrames[i].flags & VA_PICTURE_HEVC_RPS_ST_CURR_AFTER) && (iAfter < 8)) { + context->desc.h265.RefPicSetStCurrAfter[iAfter++] = i; + context->desc.h265.NumPocStCurrAfter++; + } + if ((hevc->ReferenceFrames[i].flags & VA_PICTURE_HEVC_RPS_LT_CURR) && (iCurr < 8)) { + context->desc.h265.RefPicSetLtCurr[iCurr++] = i; + context->desc.h265.NumPocLtCurr++; + } + } + break; + default: break; } @@ -297,6 +460,7 @@ handleIQMatrixBuffer(vlVaContext *context, vlVaBuffer *buf) VAIQMatrixBufferMPEG2 *mpeg2; VAIQMatrixBufferH264 *h264; VAIQMatrixBufferMPEG4 *mpeg4; + VAIQMatrixBufferHEVC *h265; switch (u_reduce_video_profile(context->decoder->profile)) { case PIPE_VIDEO_FORMAT_MPEG12: @@ -320,6 +484,17 @@ handleIQMatrixBuffer(vlVaContext *context, vlVaBuffer *buf) memcpy(&context->desc.h264.pps->ScalingList8x8, h264->ScalingList8x8, 2 * 64); break; + case PIPE_VIDEO_FORMAT_HEVC: + assert(buf->size >= sizeof(VAIQMatrixBufferH264) && buf->num_elements == 1); + h265 = buf->data; + memcpy(&context->desc.h265.pps->sps->ScalingList4x4, h265->ScalingList4x4, 6 * 16); + memcpy(&context->desc.h265.pps->sps->ScalingList8x8, h265->ScalingList8x8, 6 * 64); + memcpy(&context->desc.h265.pps->sps->ScalingList16x16, h265->ScalingList16x16, 6 * 64); + memcpy(&context->desc.h265.pps->sps->ScalingList32x32, h265->ScalingList32x32, 2 * 64); + memcpy(&context->desc.h265.pps->sps->ScalingListDCCoeff16x16, h265->ScalingListDC16x16, 6); + memcpy(&context->desc.h265.pps->sps->ScalingListDCCoeff32x32, h265->ScalingListDC32x32, 2); + break; + case PIPE_VIDEO_FORMAT_MPEG4: assert(buf->size >= sizeof(VAIQMatrixBufferMPEG4) && buf->num_elements == 1); mpeg4 = buf->data; @@ -345,6 +520,7 @@ handleSliceParameterBuffer(vlVaContext *context, vlVaBuffer *buf) { VASliceParameterBufferH264 *h264; VASliceParameterBufferMPEG4 *mpeg4; + VASliceParameterBufferHEVC *h265; switch (u_reduce_video_profile(context->decoder->profile)) { case PIPE_VIDEO_FORMAT_MPEG4_AVC: @@ -361,6 +537,15 @@ handleSliceParameterBuffer(vlVaContext *context, vlVaBuffer *buf) context->mpeg4.quant_scale = mpeg4->quant_scale; break; + case PIPE_VIDEO_FORMAT_HEVC: + assert(buf->size >= sizeof(VASliceParameterBufferHEVC) && buf->num_elements == 1); + h265 = buf->data; + for (int i = 0 ; i < 2 ; i++) { + for (int j = 0 ; j < 15 ; j++) + context->desc.h265.RefPicList[i][j] = h265->RefPicList[i][j]; + } + context->desc.h265.UseRefPicList = true; + break; default: break; } @@ -483,6 +668,7 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf) void * const *buffers[2]; unsigned sizes[2]; static const uint8_t start_code_h264[] = { 0x00, 0x00, 0x01 }; + static const uint8_t start_code_h265[] = { 0x00, 0x00, 0x01 }; static const uint8_t start_code_vc1[] = { 0x00, 0x00, 0x01, 0x0d }; format = u_reduce_video_profile(context->decoder->profile); @@ -494,6 +680,13 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf) buffers[num_buffers] = (void *const)&start_code_h264; sizes[num_buffers++] = sizeof(start_code_h264); break; + case PIPE_VIDEO_FORMAT_HEVC: + if (bufHasStartcode(buf, 0x000001, 24)) + break; + + buffers[num_buffers] = (void *const)&start_code_h265; + sizes[num_buffers++] = sizeof(start_code_h265); + break; case PIPE_VIDEO_FORMAT_VC1: if (bufHasStartcode(buf, 0x0000010d, 32) || bufHasStartcode(buf, 0x0000010c, 32) || @@ -517,15 +710,75 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf) buffers[num_buffers] = buf->data; sizes[num_buffers] = buf->size; ++num_buffers; - context->decoder->decode_bitstream(context->decoder, context->target, NULL, + context->decoder->decode_bitstream(context->decoder, context->target, &context->desc.base, num_buffers, (const void * const*)buffers, sizes); } +static VAStatus +handleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) +{ + struct u_rect src_rect; + struct u_rect dst_rect; + struct u_rect *dirty_area; + vlVaSurface *src_surface; + VAProcPipelineParameterBuffer *pipeline_param; + struct pipe_surface **surfaces; + struct pipe_screen *screen; + struct pipe_surface *psurf; + + if (!drv || !context) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + if (!buf || !buf->data) + return VA_STATUS_ERROR_INVALID_BUFFER; + + if (!context->target) + return VA_STATUS_ERROR_INVALID_SURFACE; + + pipeline_param = (VAProcPipelineParameterBuffer *)buf->data; + + src_surface = handle_table_get(drv->htab, pipeline_param->surface); + if (!src_surface || !src_surface->buffer) + return VA_STATUS_ERROR_INVALID_SURFACE; + + surfaces = context->target->get_surfaces(context->target); + + if (!surfaces || !surfaces[0]) + return VA_STATUS_ERROR_INVALID_SURFACE; + + screen = drv->pipe->screen; + + psurf = surfaces[0]; + + src_rect.x0 = pipeline_param->surface_region->x; + src_rect.y0 = pipeline_param->surface_region->y; + src_rect.x1 = pipeline_param->surface_region->x + pipeline_param->surface_region->width; + src_rect.y1 = pipeline_param->surface_region->y + pipeline_param->surface_region->height; + + dst_rect.x0 = pipeline_param->output_region->x; + dst_rect.y0 = pipeline_param->output_region->y; + dst_rect.x1 = pipeline_param->output_region->x + pipeline_param->output_region->width; + dst_rect.y1 = pipeline_param->output_region->y + pipeline_param->output_region->height; + + dirty_area = vl_screen_get_dirty_area(drv->vscreen); + + vl_compositor_clear_layers(&drv->cstate); + vl_compositor_set_buffer_layer(&drv->cstate, &drv->compositor, 0, src_surface->buffer, &src_rect, NULL, VL_COMPOSITOR_WEAVE); + vl_compositor_set_layer_dst_area(&drv->cstate, 0, &dst_rect); + vl_compositor_render(&drv->cstate, &drv->compositor, psurf, dirty_area, true); + + screen->fence_reference(screen, &src_surface->fence, NULL); + drv->pipe->flush(drv->pipe, &src_surface->fence, 0); + + return VA_STATUS_SUCCESS; +} + VAStatus vlVaRenderPicture(VADriverContextP ctx, VAContextID context_id, VABufferID *buffers, int num_buffers) { vlVaDriver *drv; vlVaContext *context; + VAStatus vaStatus = VA_STATUS_SUCCESS; unsigned i; @@ -561,13 +814,16 @@ vlVaRenderPicture(VADriverContextP ctx, VAContextID context_id, VABufferID *buff case VASliceDataBufferType: handleVASliceDataBufferType(context, buf); break; + case VAProcPipelineParameterBufferType: + vaStatus = handleVAProcPipelineParameterBufferType(drv, context, buf); + break; default: break; } } - return VA_STATUS_SUCCESS; + return vaStatus; } VAStatus @@ -587,6 +843,11 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id) if (!context) return VA_STATUS_ERROR_INVALID_CONTEXT; + if (!context->decoder) { + /* VPP */ + return VA_STATUS_SUCCESS; + } + context->mpeg4.frame_num++; context->decoder->end_frame(context->decoder, context->target, &context->desc.base); diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c index 8d4487bfb5a..8f406e09990 100644 --- a/src/gallium/state_trackers/va/surface.c +++ b/src/gallium/state_trackers/va/surface.c @@ -29,6 +29,8 @@ #include "pipe/p_screen.h" #include "pipe/p_video_codec.h" +#include "state_tracker/drm_driver.h" + #include "util/u_memory.h" #include "util/u_handle_table.h" #include "util/u_rect.h" @@ -36,64 +38,19 @@ #include "util/u_surface.h" #include "vl/vl_compositor.h" +#include "vl/vl_video_buffer.h" #include "vl/vl_winsys.h" #include "va_private.h" +#include <va/va_drmcommon.h> + VAStatus vlVaCreateSurfaces(VADriverContextP ctx, int width, int height, int format, int num_surfaces, VASurfaceID *surfaces) { - struct pipe_video_buffer templat = {}; - struct pipe_screen *pscreen; - vlVaDriver *drv; - int i; - - if (!ctx) - return VA_STATUS_ERROR_INVALID_CONTEXT; - - if (!(width && height)) - return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT; - - drv = VL_VA_DRIVER(ctx); - pscreen = VL_VA_PSCREEN(ctx); - - templat.buffer_format = pscreen->get_video_param - ( - pscreen, - PIPE_VIDEO_PROFILE_UNKNOWN, - PIPE_VIDEO_ENTRYPOINT_BITSTREAM, - PIPE_VIDEO_CAP_PREFERED_FORMAT - ); - templat.chroma_format = ChromaToPipe(format); - templat.width = width; - templat.height = height; - templat.interlaced = pscreen->get_video_param - ( - pscreen, - PIPE_VIDEO_PROFILE_UNKNOWN, - PIPE_VIDEO_ENTRYPOINT_BITSTREAM, - PIPE_VIDEO_CAP_PREFERS_INTERLACED - ); - - for (i = 0; i < num_surfaces; ++i) { - vlVaSurface *surf = CALLOC(1, sizeof(vlVaSurface)); - if (!surf) - goto no_res; - - surf->templat = templat; - surf->buffer = drv->pipe->create_video_buffer(drv->pipe, &templat); - util_dynarray_init(&surf->subpics); - surfaces[i] = handle_table_add(drv->htab, surf); - } - - return VA_STATUS_SUCCESS; - -no_res: - if (i) - vlVaDestroySurfaces(ctx, surfaces, i); - - return VA_STATUS_ERROR_ALLOCATION_FAILED; + return vlVaCreateSurfaces2(ctx, format, width, height, surfaces, num_surfaces, + NULL, 0); } VAStatus @@ -349,3 +306,427 @@ vlVaUnlockSurface(VADriverContextP ctx, VASurfaceID surface) return VA_STATUS_ERROR_UNIMPLEMENTED; } + +VAStatus +vlVaQuerySurfaceAttributes(VADriverContextP ctx, VAConfigID config, + VASurfaceAttrib *attrib_list, unsigned int *num_attribs) +{ + vlVaDriver *drv; + VASurfaceAttrib *attribs; + struct pipe_screen *pscreen; + int i; + + if (config == VA_INVALID_ID) + return VA_STATUS_ERROR_INVALID_CONFIG; + + if (!attrib_list && !num_attribs) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + if (!attrib_list) { + *num_attribs = VASurfaceAttribCount; + return VA_STATUS_SUCCESS; + } + + if (!ctx) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + drv = VL_VA_DRIVER(ctx); + + if (!drv) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + pscreen = VL_VA_PSCREEN(ctx); + + if (!pscreen) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + attribs = CALLOC(VASurfaceAttribCount, sizeof(VASurfaceAttrib)); + + if (!attribs) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + + i = 0; + + if (config == PIPE_VIDEO_PROFILE_UNKNOWN) { + /* vlVaCreateConfig returns PIPE_VIDEO_PROFILE_UNKNOWN + only for VAEntrypointVideoProc. */ + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_BGRA; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_RGBA; + i++; + } else { + /* Assume VAEntrypointVLD for now. */ + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_NV12; + i++; + } + + attribs[i].type = VASurfaceAttribMemoryType; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA | + VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; + i++; + + attribs[i].type = VASurfaceAttribExternalBufferDescriptor; + attribs[i].value.type = VAGenericValueTypePointer; + attribs[i].flags = VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.p = NULL; /* ignore */ + i++; + + attribs[i].type = VASurfaceAttribMaxWidth; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE; + attribs[i].value.value.i = vl_video_buffer_max_size(pscreen); + i++; + + attribs[i].type = VASurfaceAttribMaxHeight; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE; + attribs[i].value.value.i = vl_video_buffer_max_size(pscreen); + i++; + + if (i > *num_attribs) { + *num_attribs = i; + FREE(attribs); + return VA_STATUS_ERROR_MAX_NUM_EXCEEDED; + } + + *num_attribs = i; + memcpy(attrib_list, attribs, i * sizeof(VASurfaceAttrib)); + FREE(attribs); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +suface_from_external_memory(VADriverContextP ctx, vlVaSurface *surface, + VASurfaceAttribExternalBuffers *memory_attibute, + int index, VASurfaceID *surfaces, + struct pipe_video_buffer *templat) +{ + vlVaDriver *drv; + struct pipe_screen *pscreen; + struct pipe_resource *resource; + struct pipe_resource res_templ; + struct winsys_handle whandle; + struct pipe_resource *resources[VL_NUM_COMPONENTS]; + + if (!ctx) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + pscreen = VL_VA_PSCREEN(ctx); + drv = VL_VA_DRIVER(ctx); + + if (!memory_attibute || !memory_attibute->buffers || + index > memory_attibute->num_buffers) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + if (surface->templat.width != memory_attibute->width || + surface->templat.height != memory_attibute->height || + memory_attibute->num_planes < 1) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + switch (memory_attibute->pixel_format) { + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: + if (memory_attibute->num_planes != 1) + return VA_STATUS_ERROR_INVALID_PARAMETER; + break; + default: + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + + memset(&res_templ, 0, sizeof(res_templ)); + res_templ.target = PIPE_TEXTURE_2D; + res_templ.last_level = 0; + res_templ.depth0 = 1; + res_templ.array_size = 1; + res_templ.width0 = memory_attibute->width; + res_templ.height0 = memory_attibute->height; + res_templ.format = surface->templat.buffer_format; + res_templ.bind = PIPE_BIND_SAMPLER_VIEW; + res_templ.usage = PIPE_USAGE_DEFAULT; + + memset(&whandle, 0, sizeof(struct winsys_handle)); + whandle.type = DRM_API_HANDLE_TYPE_FD; + whandle.handle = memory_attibute->buffers[index]; + whandle.stride = memory_attibute->pitches[index]; + + resource = pscreen->resource_from_handle(pscreen, &res_templ, &whandle); + + if (!resource) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + + memset(resources, 0, sizeof resources); + resources[0] = resource; + + surface->buffer = vl_video_buffer_create_ex2(drv->pipe, templat, resources); + if (!surface->buffer) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + + util_dynarray_init(&surface->subpics); + surfaces[index] = handle_table_add(drv->htab, surface); + + if (!surfaces[index]) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + + return VA_STATUS_SUCCESS; +} + +VAStatus +vlVaCreateSurfaces2(VADriverContextP ctx, unsigned int format, + unsigned int width, unsigned int height, + VASurfaceID *surfaces, unsigned int num_surfaces, + VASurfaceAttrib *attrib_list, unsigned int num_attribs) +{ + vlVaDriver *drv; + VASurfaceAttribExternalBuffers *memory_attibute; + struct pipe_video_buffer templat; + struct pipe_screen *pscreen; + int i; + int memory_type; + int expected_fourcc; + VAStatus vaStatus; + + if (!ctx) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + if (!(width && height)) + return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT; + + drv = VL_VA_DRIVER(ctx); + + if (!drv) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + pscreen = VL_VA_PSCREEN(ctx); + + if (!pscreen) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + /* Default. */ + memory_attibute = NULL; + memory_type = VA_SURFACE_ATTRIB_MEM_TYPE_VA; + expected_fourcc = 0; + + for (i = 0; i < num_attribs && attrib_list; i++) { + if ((attrib_list[i].type == VASurfaceAttribPixelFormat) && + (attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) { + if (attrib_list[i].value.type != VAGenericValueTypeInteger) + return VA_STATUS_ERROR_INVALID_PARAMETER; + expected_fourcc = attrib_list[i].value.value.i; + } + + if ((attrib_list[i].type == VASurfaceAttribMemoryType) && + (attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) { + + if (attrib_list[i].value.type != VAGenericValueTypeInteger) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + switch (attrib_list[i].value.value.i) { + case VA_SURFACE_ATTRIB_MEM_TYPE_VA: + case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: + memory_type = attrib_list[i].value.value.i; + break; + default: + return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE; + } + } + + if ((attrib_list[i].type == VASurfaceAttribExternalBufferDescriptor) && + (attrib_list[i].flags == VA_SURFACE_ATTRIB_SETTABLE)) { + if (attrib_list[i].value.type != VAGenericValueTypePointer) + return VA_STATUS_ERROR_INVALID_PARAMETER; + memory_attibute = (VASurfaceAttribExternalBuffers *)attrib_list[i].value.value.p; + } + } + + if (VA_RT_FORMAT_YUV420 != format && + VA_RT_FORMAT_YUV422 != format && + VA_RT_FORMAT_YUV444 != format && + VA_RT_FORMAT_RGB32 != format) { + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; + } + + switch (memory_type) { + case VA_SURFACE_ATTRIB_MEM_TYPE_VA: + break; + case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: + if (!memory_attibute) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + expected_fourcc = memory_attibute->pixel_format; + break; + default: + assert(0); + } + + memset(&templat, 0, sizeof(templat)); + + if (expected_fourcc) { + templat.buffer_format = VaFourccToPipeFormat(expected_fourcc); + templat.interlaced = 0; + } else { + templat.buffer_format = pscreen->get_video_param + ( + pscreen, + PIPE_VIDEO_PROFILE_UNKNOWN, + PIPE_VIDEO_ENTRYPOINT_BITSTREAM, + PIPE_VIDEO_CAP_PREFERED_FORMAT + ); + templat.interlaced = pscreen->get_video_param + ( + pscreen, + PIPE_VIDEO_PROFILE_UNKNOWN, + PIPE_VIDEO_ENTRYPOINT_BITSTREAM, + PIPE_VIDEO_CAP_PREFERS_INTERLACED + ); + } + + templat.chroma_format = ChromaToPipe(format); + + templat.width = width; + templat.height = height; + + memset(surfaces, VA_INVALID_ID, num_surfaces * sizeof(VASurfaceID)); + + for (i = 0; i < num_surfaces; i++) { + vlVaSurface *surf = CALLOC(1, sizeof(vlVaSurface)); + if (!surf) + goto no_res; + + surf->templat = templat; + + switch (memory_type) { + case VA_SURFACE_ATTRIB_MEM_TYPE_VA: + surf->buffer = drv->pipe->create_video_buffer(drv->pipe, &templat); + if (!surf->buffer) + goto no_res; + util_dynarray_init(&surf->subpics); + surfaces[i] = handle_table_add(drv->htab, surf); + break; + case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: + vaStatus = suface_from_external_memory(ctx, surf, memory_attibute, i, surfaces, &templat); + if (vaStatus != VA_STATUS_SUCCESS) + goto no_res; + break; + default: + assert(0); + } + } + + return VA_STATUS_SUCCESS; + +no_res: + if (i) + vlVaDestroySurfaces(ctx, surfaces, i); + + return VA_STATUS_ERROR_ALLOCATION_FAILED; +} + +VAStatus +vlVaQueryVideoProcFilters(VADriverContextP ctx, VAContextID context, + VAProcFilterType *filters, unsigned int *num_filters) +{ + unsigned int num = 0; + + if (!ctx) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + if (!num_filters || !filters) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + filters[num++] = VAProcFilterNone; + + *num_filters = num; + + return VA_STATUS_SUCCESS; +} + +VAStatus +vlVaQueryVideoProcFilterCaps(VADriverContextP ctx, VAContextID context, + VAProcFilterType type, void *filter_caps, + unsigned int *num_filter_caps) +{ + unsigned int i; + + if (!ctx) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + if (!filter_caps || !num_filter_caps) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + i = 0; + + switch (type) { + case VAProcFilterNone: + break; + case VAProcFilterNoiseReduction: + case VAProcFilterDeinterlacing: + case VAProcFilterSharpening: + case VAProcFilterColorBalance: + case VAProcFilterSkinToneEnhancement: + return VA_STATUS_ERROR_UNIMPLEMENTED; + default: + assert(0); + } + + *num_filter_caps = i; + + return VA_STATUS_SUCCESS; +} + +static VAProcColorStandardType vpp_input_color_standards[VAProcColorStandardCount] = { + VAProcColorStandardBT601 +}; + +static VAProcColorStandardType vpp_output_color_standards[VAProcColorStandardCount] = { + VAProcColorStandardBT601 +}; + +VAStatus +vlVaQueryVideoProcPipelineCaps(VADriverContextP ctx, VAContextID context, + VABufferID *filters, unsigned int num_filters, + VAProcPipelineCaps *pipeline_cap) +{ + unsigned int i = 0; + + if (!ctx) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + if (!pipeline_cap) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + if (num_filters && !filters) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + pipeline_cap->pipeline_flags = 0; + pipeline_cap->filter_flags = 0; + pipeline_cap->num_forward_references = 0; + pipeline_cap->num_backward_references = 0; + pipeline_cap->num_input_color_standards = 1; + pipeline_cap->input_color_standards = vpp_input_color_standards; + pipeline_cap->num_output_color_standards = 1; + pipeline_cap->output_color_standards = vpp_output_color_standards; + + for (i = 0; i < num_filters; i++) { + vlVaBuffer *buf = handle_table_get(VL_VA_DRIVER(ctx)->htab, filters[i]); + + if (!buf || buf->type >= VABufferTypeMax) + return VA_STATUS_ERROR_INVALID_BUFFER; + } + + return VA_STATUS_SUCCESS; +} diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h index 1ea7be79aa3..2b645d08a03 100644 --- a/src/gallium/state_trackers/va/va_private.h +++ b/src/gallium/state_trackers/va/va_private.h @@ -33,6 +33,8 @@ #include <va/va.h> #include <va/va_backend.h> +#include <va/va_backend_vpp.h> +#include <va/va_drmcommon.h> #include "pipe/p_video_enums.h" #include "pipe/p_video_codec.h" @@ -46,7 +48,7 @@ #define VL_VA_DRIVER(ctx) ((vlVaDriver *)ctx->pDriverData) #define VL_VA_PSCREEN(ctx) (VL_VA_DRIVER(ctx)->vscreen->pscreen) -#define VL_VA_MAX_IMAGE_FORMATS 6 +#define VL_VA_MAX_IMAGE_FORMATS 9 static inline enum pipe_video_chroma_format ChromaToPipe(int format) @@ -59,13 +61,12 @@ ChromaToPipe(int format) case VA_RT_FORMAT_YUV444: return PIPE_VIDEO_CHROMA_FORMAT_444; default: - assert(0); - return PIPE_VIDEO_CHROMA_FORMAT_420; + return PIPE_VIDEO_CHROMA_FORMAT_NONE; } } static inline enum pipe_format -YCbCrToPipe(unsigned format) +VaFourccToPipeFormat(unsigned format) { switch(format) { case VA_FOURCC('N','V','1','2'): @@ -80,12 +81,46 @@ YCbCrToPipe(unsigned format) return PIPE_FORMAT_UYVY; case VA_FOURCC('B','G','R','A'): return PIPE_FORMAT_B8G8R8A8_UNORM; + case VA_FOURCC('R','G','B','A'): + return PIPE_FORMAT_R8G8B8A8_UNORM; + case VA_FOURCC('B','G','R','X'): + return PIPE_FORMAT_B8G8R8X8_UNORM; + case VA_FOURCC('R','G','B','X'): + return PIPE_FORMAT_R8G8B8X8_UNORM; default: assert(0); return PIPE_FORMAT_NONE; } } +static inline unsigned +PipeFormatToVaFourcc(enum pipe_format p_format) +{ + switch (p_format) { + case PIPE_FORMAT_NV12: + return VA_FOURCC('N','V','1','2'); + case PIPE_FORMAT_IYUV: + return VA_FOURCC('I','4','2','0'); + case PIPE_FORMAT_YV12: + return VA_FOURCC('Y','V','1','2'); + case PIPE_FORMAT_UYVY: + return VA_FOURCC('U','Y','V','Y'); + case PIPE_FORMAT_YUYV: + return VA_FOURCC('Y','U','Y','V'); + case PIPE_FORMAT_B8G8R8A8_UNORM: + return VA_FOURCC('B','G','R','A'); + case PIPE_FORMAT_R8G8B8A8_UNORM: + return VA_FOURCC('R','G','B','A'); + case PIPE_FORMAT_B8G8R8X8_UNORM: + return VA_FOURCC('B','G','R','X'); + case PIPE_FORMAT_R8G8B8X8_UNORM: + return VA_FOURCC('R','G','B','X'); + default: + assert(0); + return -1; + } +} + static inline VAProfile PipeToProfile(enum pipe_video_profile profile) { @@ -110,8 +145,11 @@ PipeToProfile(enum pipe_video_profile profile) return VAProfileH264Main; case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: return VAProfileH264High; + case PIPE_VIDEO_PROFILE_HEVC_MAIN: + return VAProfileHEVCMain; case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED: - return VAProfileNone; + case PIPE_VIDEO_PROFILE_UNKNOWN: + return VAProfileNone; default: assert(0); return -1; @@ -142,6 +180,10 @@ ProfileToPipe(VAProfile profile) return PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN; case VAProfileH264High: return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH; + case VAProfileHEVCMain: + return PIPE_VIDEO_PROFILE_HEVC_MAIN; + case VAProfileNone: + return PIPE_VIDEO_PROFILE_UNKNOWN; default: return PIPE_VIDEO_PROFILE_UNKNOWN; } @@ -174,6 +216,7 @@ typedef struct { struct pipe_mpeg4_picture_desc mpeg4; struct pipe_vc1_picture_desc vc1; struct pipe_h264_picture_desc h264; + struct pipe_h265_picture_desc h265; } desc; struct { @@ -191,6 +234,13 @@ typedef struct { unsigned int size; unsigned int num_elements; void *data; + struct { + struct pipe_resource *resource; + struct pipe_transfer *transfer; + struct pipe_fence_handle *fence; + } derived_surface; + unsigned int export_refcount; + VABufferInfo export_state; } vlVaBuffer; typedef struct { @@ -275,5 +325,19 @@ VAStatus vlVaLockSurface(VADriverContextP ctx, VASurfaceID surface, unsigned int unsigned int *luma_offset, unsigned int *chroma_u_offset, unsigned int *chroma_v_offset, unsigned int *buffer_name, void **buffer); VAStatus vlVaUnlockSurface(VADriverContextP ctx, VASurfaceID surface); +VAStatus vlVaCreateSurfaces2(VADriverContextP ctx, unsigned int format, unsigned int width, unsigned int height, + VASurfaceID *surfaces, unsigned int num_surfaces, VASurfaceAttrib *attrib_list, + unsigned int num_attribs); +VAStatus vlVaQuerySurfaceAttributes(VADriverContextP ctx, VAConfigID config, VASurfaceAttrib *attrib_list, + unsigned int *num_attribs); + +VAStatus vlVaAcquireBufferHandle(VADriverContextP ctx, VABufferID buf_id, VABufferInfo *out_buf_info); +VAStatus vlVaReleaseBufferHandle(VADriverContextP ctx, VABufferID buf_id); +VAStatus vlVaQueryVideoProcFilters(VADriverContextP ctx, VAContextID context, VAProcFilterType *filters, + unsigned int *num_filters); +VAStatus vlVaQueryVideoProcFilterCaps(VADriverContextP ctx, VAContextID context, VAProcFilterType type, + void *filter_caps, unsigned int *num_filter_caps); +VAStatus vlVaQueryVideoProcPipelineCaps(VADriverContextP ctx, VAContextID context, VABufferID *filters, + unsigned int num_filters, VAProcPipelineCaps *pipeline_cap); #endif //VA_PRIVATE_H diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c index 3233799d650..f85bce823bb 100644 --- a/src/gallium/state_trackers/vdpau/decode.c +++ b/src/gallium/state_trackers/vdpau/decode.c @@ -518,6 +518,7 @@ vlVdpDecoderRenderH265(struct pipe_h265_picture_desc *picture, memcpy(picture->RefPicSetStCurrBefore, picture_info->RefPicSetStCurrBefore, 8); memcpy(picture->RefPicSetStCurrAfter, picture_info->RefPicSetStCurrAfter, 8); memcpy(picture->RefPicSetLtCurr, picture_info->RefPicSetLtCurr, 8); + picture->UseRefPicList = false; return VDP_STATUS_OK; } diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am index 7f945d14b5c..95efdd4451c 100644 --- a/src/gallium/targets/dri/Makefile.am +++ b/src/gallium/targets/dri/Makefile.am @@ -83,6 +83,8 @@ include $(top_srcdir)/src/gallium/drivers/freedreno/Automake.inc include $(top_srcdir)/src/gallium/drivers/vc4/Automake.inc +include $(top_srcdir)/src/gallium/drivers/virgl/Automake.inc + include $(top_srcdir)/src/gallium/drivers/softpipe/Automake.inc include $(top_srcdir)/src/gallium/drivers/llvmpipe/Automake.inc diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_id.h b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h index 8882c418e12..90fe0cd50f1 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_id.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h @@ -151,11 +151,15 @@ enum { /* CZ specific rev IDs */ enum { - CZ_CARRIZO_A0 = 0x01, + CARRIZO_A0 = 0x01, + STONEY_A0 = 0x61, CZ_UNKNOWN = 0xFF }; #define ASICREV_IS_CARRIZO(eChipRev) \ - (eChipRev >= CARRIZO_A0) + ((eChipRev >= CARRIZO_A0) && (eChipRev < STONEY_A0)) + +#define ASICREV_IS_STONEY(eChipRev) \ + ((eChipRev >= STONEY_A0) && (eChipRev < CZ_UNKNOWN)) #endif /* AMDGPU_ID_H */ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c index 358df381011..3006bd17958 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c @@ -175,7 +175,9 @@ static int compute_level(struct amdgpu_winsys *ws, struct radeon_surf *surf, bool is_stencil, unsigned level, unsigned type, bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut) + ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, + ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, + ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut) { struct radeon_surf_level *surf_level; ADDR_E_RETURNCODE ret; @@ -248,6 +250,31 @@ static int compute_level(struct amdgpu_winsys *ws, surf->tiling_index[level] = AddrSurfInfoOut->tileIndex; surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize; + + if (AddrSurfInfoIn->flags.dccCompatible) { + AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize; + AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; + AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; + AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; + AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; + + ret = AddrComputeDccInfo(ws->addrlib, + AddrDccIn, + AddrDccOut); + + if (ret == ADDR_OK) { + surf_level->dcc_offset = surf->dcc_size; + surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize; + surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign); + } else { + surf->dcc_size = 0; + surf_level->dcc_offset = 0; + } + } else { + surf->dcc_size = 0; + surf_level->dcc_offset = 0; + } + return 0; } @@ -259,6 +286,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, bool compressed; ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; + ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; + ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; ADDR_TILEINFO AddrTileInfoIn = {0}; ADDR_TILEINFO AddrTileInfoOut = {0}; int r; @@ -269,6 +298,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT); AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); + AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); + AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; type = RADEON_SURF_GET(surf->flags, TYPE); @@ -318,10 +349,10 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, } } else { - AddrSurfInfoIn.bpp = surf->bpe * 8; + AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8; } - AddrSurfInfoIn.numSamples = surf->nsamples; + AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf->nsamples; AddrSurfInfoIn.tileIndex = -1; /* Set the micro tile type. */ @@ -339,6 +370,9 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0; AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0; AddrSurfInfoIn.flags.degrade4Space = 1; + AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && + !(surf->flags & RADEON_SURF_SCANOUT) && + !compressed && AddrDccIn.numSamples <= 1; /* This disables incorrect calculations (hacks) in addrlib. */ AddrSurfInfoIn.flags.noStencil = 1; @@ -375,11 +409,13 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, } surf->bo_size = 0; + surf->dcc_size = 0; + surf->dcc_alignment = 1; /* Calculate texture layout information. */ for (level = 0; level <= surf->last_level; level++) { r = compute_level(ws, surf, false, level, type, compressed, - &AddrSurfInfoIn, &AddrSurfInfoOut); + &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); if (r) return r; @@ -406,7 +442,7 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, for (level = 0; level <= surf->last_level; level++) { r = compute_level(ws, surf, true, level, type, compressed, - &AddrSurfInfoIn, &AddrSurfInfoOut); + &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); if (r) return r; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index c8772490e74..32cd9d9aa50 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -226,7 +226,11 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws) break; case CHIP_CARRIZO: ws->family = FAMILY_CZ; - ws->rev_id = CZ_CARRIZO_A0; + ws->rev_id = CARRIZO_A0; + break; + case CHIP_STONEY: + ws->family = FAMILY_CZ; + ws->rev_id = STONEY_A0; break; case CHIP_FIJI: ws->family = FAMILY_VI; diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c index 1675af4cbc8..4dc32366d61 100644 --- a/src/gallium/winsys/svga/drm/vmw_context.c +++ b/src/gallium/winsys/svga/drm/vmw_context.c @@ -251,6 +251,7 @@ vmw_swc_flush(struct svga_winsys_context *swc, vswc->must_flush = FALSE; debug_flush_flush(vswc->fctx); #endif + swc->hints &= ~SVGA_HINT_FLAG_DRAW_EMITTED; vswc->preemptive_flush = FALSE; vswc->seen_surfaces = 0; vswc->seen_regions = 0; @@ -372,7 +373,8 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc, if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) { vswc->seen_regions += reloc->buffer->size; - if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/5) + if ((swc->hints & SVGA_HINT_FLAG_DRAW_EMITTED) && + vswc->seen_regions >= VMW_GMR_POOL_SIZE/5) vswc->preemptive_flush = TRUE; } @@ -413,8 +415,10 @@ vmw_swc_mob_relocation(struct svga_winsys_context *swc, if (vmw_swc_add_validate_buffer(vswc, pb_buffer, flags)) { vswc->seen_mobs += pb_buffer->size; - /* divide by 5, tested for best performance */ - if (vswc->seen_mobs >= vswc->vws->ioctl.max_mob_memory / VMW_MAX_MOB_MEM_FACTOR) + + if ((swc->hints & SVGA_HINT_FLAG_DRAW_EMITTED) && + vswc->seen_mobs >= + vswc->vws->ioctl.max_mob_memory / VMW_MAX_MOB_MEM_FACTOR) vswc->preemptive_flush = TRUE; } @@ -475,8 +479,9 @@ vmw_swc_surface_only_relocation(struct svga_winsys_context *swc, ++vswc->surface.staged; vswc->seen_surfaces += vsurf->size; - /* divide by 5 not well tuned for performance */ - if (vswc->seen_surfaces >= vswc->vws->ioctl.max_surface_memory / VMW_MAX_SURF_MEM_FACTOR) + if ((swc->hints & SVGA_HINT_FLAG_DRAW_EMITTED) && + vswc->seen_surfaces >= + vswc->vws->ioctl.max_surface_memory / VMW_MAX_SURF_MEM_FACTOR) vswc->preemptive_flush = TRUE; } diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.c b/src/gallium/winsys/sw/dri/dri_sw_winsys.c index 8451d832806..5c98f2603c7 100644 --- a/src/gallium/winsys/sw/dri/dri_sw_winsys.c +++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.c @@ -44,8 +44,10 @@ struct dri_sw_displaytarget unsigned height; unsigned stride; + unsigned map_flags; void *data; void *mapped; + const void *front_private; }; struct dri_sw_winsys @@ -83,6 +85,7 @@ dri_sw_displaytarget_create(struct sw_winsys *winsys, enum pipe_format format, unsigned width, unsigned height, unsigned alignment, + const void *front_private, unsigned *stride) { struct dri_sw_displaytarget *dri_sw_dt; @@ -95,6 +98,7 @@ dri_sw_displaytarget_create(struct sw_winsys *winsys, dri_sw_dt->format = format; dri_sw_dt->width = width; dri_sw_dt->height = height; + dri_sw_dt->front_private = front_private; format_stride = util_format_get_stride(format, width); dri_sw_dt->stride = align(format_stride, alignment); @@ -133,6 +137,12 @@ dri_sw_displaytarget_map(struct sw_winsys *ws, { struct dri_sw_displaytarget *dri_sw_dt = dri_sw_displaytarget(dt); dri_sw_dt->mapped = dri_sw_dt->data; + + if (dri_sw_dt->front_private && (flags & PIPE_TRANSFER_READ)) { + struct dri_sw_winsys *dri_sw_ws = dri_sw_winsys(ws); + dri_sw_ws->lf->get_image((void *)dri_sw_dt->front_private, 0, 0, dri_sw_dt->width, dri_sw_dt->height, dri_sw_dt->stride, dri_sw_dt->data); + } + dri_sw_dt->map_flags = flags; return dri_sw_dt->mapped; } @@ -141,6 +151,11 @@ dri_sw_displaytarget_unmap(struct sw_winsys *ws, struct sw_displaytarget *dt) { struct dri_sw_displaytarget *dri_sw_dt = dri_sw_displaytarget(dt); + if (dri_sw_dt->front_private && (dri_sw_dt->map_flags & PIPE_TRANSFER_WRITE)) { + struct dri_sw_winsys *dri_sw_ws = dri_sw_winsys(ws); + dri_sw_ws->lf->put_image2((void *)dri_sw_dt->front_private, dri_sw_dt->data, 0, 0, dri_sw_dt->width, dri_sw_dt->height, dri_sw_dt->stride); + } + dri_sw_dt->map_flags = 0; dri_sw_dt->mapped = NULL; } diff --git a/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c b/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c index dc725f4b90c..16f641833c6 100644 --- a/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c +++ b/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c @@ -124,6 +124,7 @@ gdi_sw_displaytarget_create(struct sw_winsys *winsys, enum pipe_format format, unsigned width, unsigned height, unsigned alignment, + const void *front_private, unsigned *stride) { struct gdi_sw_displaytarget *gdt; diff --git a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c index 900c49f83e6..1e859717f1c 100644 --- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c +++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c @@ -111,6 +111,7 @@ kms_sw_displaytarget_create(struct sw_winsys *ws, enum pipe_format format, unsigned width, unsigned height, unsigned alignment, + const void *front_private, unsigned *stride) { struct kms_sw_winsys *kms_sw = kms_sw_winsys(ws); diff --git a/src/gallium/winsys/sw/null/null_sw_winsys.c b/src/gallium/winsys/sw/null/null_sw_winsys.c index 9c8b3ec4396..10ce2508507 100644 --- a/src/gallium/winsys/sw/null/null_sw_winsys.c +++ b/src/gallium/winsys/sw/null/null_sw_winsys.c @@ -84,6 +84,7 @@ null_sw_displaytarget_create(struct sw_winsys *winsys, enum pipe_format format, unsigned width, unsigned height, unsigned alignment, + const void *front_private, unsigned *stride) { fprintf(stderr, "null_sw_displaytarget_create() returning NULL\n"); diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c index 5c179930d9b..4d87a580cb1 100644 --- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c +++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c @@ -148,6 +148,7 @@ wsw_dt_create(struct sw_winsys *ws, enum pipe_format format, unsigned width, unsigned height, unsigned alignment, + const void *front_private, unsigned *stride) { struct wrapper_sw_winsys *wsw = wrapper_sw_winsys(ws); diff --git a/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c b/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c index 515ecd9f7b7..cc2a3de9dd3 100644 --- a/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c +++ b/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c @@ -391,6 +391,7 @@ xlib_displaytarget_create(struct sw_winsys *winsys, enum pipe_format format, unsigned width, unsigned height, unsigned alignment, + const void *front_private, unsigned *stride) { struct xlib_displaytarget *xlib_dt; diff --git a/src/gallium/winsys/virgl/drm/Makefile.am b/src/gallium/winsys/virgl/drm/Makefile.am new file mode 100644 index 00000000000..2473b88ad6e --- /dev/null +++ b/src/gallium/winsys/virgl/drm/Makefile.am @@ -0,0 +1,33 @@ +# Copyright © 2015 Red Hat Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_srcdir)/src/gallium/drivers \ + $(GALLIUM_WINSYS_CFLAGS) \ + $(LIBDRM_CFLAGS) + +noinst_LTLIBRARIES = libvirgldrm.la + +libvirgldrm_la_SOURCES = $(C_SOURCES) diff --git a/src/gallium/winsys/virgl/drm/Makefile.sources b/src/gallium/winsys/virgl/drm/Makefile.sources new file mode 100644 index 00000000000..0430d4ada4a --- /dev/null +++ b/src/gallium/winsys/virgl/drm/Makefile.sources @@ -0,0 +1,5 @@ +C_SOURCES := \ + virgl_drm_public.h \ + virgl_drm_winsys.c \ + virgl_drm_winsys.h \ + virtgpu_drm.h diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_public.h b/src/gallium/winsys/virgl/drm/virgl_drm_public.h new file mode 100644 index 00000000000..be01021ca9a --- /dev/null +++ b/src/gallium/winsys/virgl/drm/virgl_drm_public.h @@ -0,0 +1,30 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_DRM_PUBLIC_H +#define VIRGL_DRM_PUBLIC_H + +struct virgl_winsys; + +struct virgl_winsys *virgl_drm_winsys_create(int drmFD); + +#endif diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c new file mode 100644 index 00000000000..d77ebd6ca15 --- /dev/null +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c @@ -0,0 +1,774 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <sys/ioctl.h> + +#include "os/os_mman.h" +#include "os/os_time.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_hash_table.h" +#include "util/u_inlines.h" +#include "state_tracker/drm_driver.h" + +#include <xf86drm.h> +#include "virtgpu_drm.h" + +#include "virgl_drm_winsys.h" +#include "virgl_drm_public.h" + +static inline boolean can_cache_resource(struct virgl_hw_res *res) +{ + return res->cacheable == TRUE; +} + +static void virgl_hw_res_destroy(struct virgl_drm_winsys *qdws, + struct virgl_hw_res *res) +{ + struct drm_gem_close args; + + if (res->name) { + pipe_mutex_lock(qdws->bo_handles_mutex); + util_hash_table_remove(qdws->bo_handles, + (void *)(uintptr_t)res->name); + pipe_mutex_unlock(qdws->bo_handles_mutex); + } + + if (res->ptr) + os_munmap(res->ptr, res->size); + + memset(&args, 0, sizeof(args)); + args.handle = res->bo_handle; + drmIoctl(qdws->fd, DRM_IOCTL_GEM_CLOSE, &args); + FREE(res); +} + +static boolean virgl_drm_resource_is_busy(struct virgl_drm_winsys *qdws, + struct virgl_hw_res *res) +{ + struct drm_virtgpu_3d_wait waitcmd; + int ret; + + memset(&waitcmd, 0, sizeof(waitcmd)); + waitcmd.handle = res->bo_handle; + waitcmd.flags = VIRTGPU_WAIT_NOWAIT; + + ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_WAIT, &waitcmd); + if (ret && errno == EBUSY) + return TRUE; + return FALSE; +} + +static void +virgl_cache_flush(struct virgl_drm_winsys *qdws) +{ + struct list_head *curr, *next; + struct virgl_hw_res *res; + + pipe_mutex_lock(qdws->mutex); + curr = qdws->delayed.next; + next = curr->next; + + while (curr != &qdws->delayed) { + res = LIST_ENTRY(struct virgl_hw_res, curr, head); + LIST_DEL(&res->head); + virgl_hw_res_destroy(qdws, res); + curr = next; + next = curr->next; + } + pipe_mutex_unlock(qdws->mutex); +} +static void +virgl_drm_winsys_destroy(struct virgl_winsys *qws) +{ + struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); + + virgl_cache_flush(qdws); + + util_hash_table_destroy(qdws->bo_handles); + pipe_mutex_destroy(qdws->bo_handles_mutex); + pipe_mutex_destroy(qdws->mutex); + + FREE(qdws); +} + +static void +virgl_cache_list_check_free(struct virgl_drm_winsys *qdws) +{ + struct list_head *curr, *next; + struct virgl_hw_res *res; + int64_t now; + + now = os_time_get(); + curr = qdws->delayed.next; + next = curr->next; + while (curr != &qdws->delayed) { + res = LIST_ENTRY(struct virgl_hw_res, curr, head); + if (!os_time_timeout(res->start, res->end, now)) + break; + + LIST_DEL(&res->head); + virgl_hw_res_destroy(qdws, res); + curr = next; + next = curr->next; + } +} + +static void virgl_drm_resource_reference(struct virgl_drm_winsys *qdws, + struct virgl_hw_res **dres, + struct virgl_hw_res *sres) +{ + struct virgl_hw_res *old = *dres; + if (pipe_reference(&(*dres)->reference, &sres->reference)) { + + if (!can_cache_resource(old)) { + virgl_hw_res_destroy(qdws, old); + } else { + pipe_mutex_lock(qdws->mutex); + virgl_cache_list_check_free(qdws); + + old->start = os_time_get(); + old->end = old->start + qdws->usecs; + LIST_ADDTAIL(&old->head, &qdws->delayed); + qdws->num_delayed++; + pipe_mutex_unlock(qdws->mutex); + } + } + *dres = sres; +} + +static struct virgl_hw_res * +virgl_drm_winsys_resource_create(struct virgl_winsys *qws, + enum pipe_texture_target target, + uint32_t format, + uint32_t bind, + uint32_t width, + uint32_t height, + uint32_t depth, + uint32_t array_size, + uint32_t last_level, + uint32_t nr_samples, + uint32_t size) +{ + struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); + struct drm_virtgpu_resource_create createcmd; + int ret; + struct virgl_hw_res *res; + uint32_t stride = width * util_format_get_blocksize(format); + + res = CALLOC_STRUCT(virgl_hw_res); + if (!res) + return NULL; + + memset(&createcmd, 0, sizeof(createcmd)); + createcmd.target = target; + createcmd.format = format; + createcmd.bind = bind; + createcmd.width = width; + createcmd.height = height; + createcmd.depth = depth; + createcmd.array_size = array_size; + createcmd.last_level = last_level; + createcmd.nr_samples = nr_samples; + createcmd.stride = stride; + createcmd.size = size; + + ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE, &createcmd); + if (ret != 0) { + FREE(res); + return NULL; + } + + res->bind = bind; + res->format = format; + + res->res_handle = createcmd.res_handle; + res->bo_handle = createcmd.bo_handle; + res->size = size; + res->stride = stride; + pipe_reference_init(&res->reference, 1); + res->num_cs_references = 0; + return res; +} + +static inline int virgl_is_res_compat(struct virgl_drm_winsys *qdws, + struct virgl_hw_res *res, + uint32_t size, uint32_t bind, + uint32_t format) +{ + if (res->bind != bind) + return 0; + if (res->format != format) + return 0; + if (res->size < size) + return 0; + if (res->size > size * 2) + return 0; + + if (virgl_drm_resource_is_busy(qdws, res)) { + return -1; + } + + return 1; +} + +static int +virgl_bo_transfer_put(struct virgl_winsys *vws, + struct virgl_hw_res *res, + const struct pipe_box *box, + uint32_t stride, uint32_t layer_stride, + uint32_t buf_offset, uint32_t level) +{ + struct virgl_drm_winsys *vdws = virgl_drm_winsys(vws); + struct drm_virtgpu_3d_transfer_to_host tohostcmd; + + memset(&tohostcmd, 0, sizeof(tohostcmd)); + tohostcmd.bo_handle = res->bo_handle; + tohostcmd.box = *(struct drm_virtgpu_3d_box *)box; + tohostcmd.offset = buf_offset; + tohostcmd.level = level; + // tohostcmd.stride = stride; + // tohostcmd.layer_stride = stride; + return drmIoctl(vdws->fd, DRM_IOCTL_VIRTGPU_TRANSFER_TO_HOST, &tohostcmd); +} + +static int +virgl_bo_transfer_get(struct virgl_winsys *vws, + struct virgl_hw_res *res, + const struct pipe_box *box, + uint32_t stride, uint32_t layer_stride, + uint32_t buf_offset, uint32_t level) +{ + struct virgl_drm_winsys *vdws = virgl_drm_winsys(vws); + struct drm_virtgpu_3d_transfer_from_host fromhostcmd; + + memset(&fromhostcmd, 0, sizeof(fromhostcmd)); + fromhostcmd.bo_handle = res->bo_handle; + fromhostcmd.level = level; + fromhostcmd.offset = buf_offset; + // fromhostcmd.stride = stride; + // fromhostcmd.layer_stride = layer_stride; + fromhostcmd.box = *(struct drm_virtgpu_3d_box *)box; + return drmIoctl(vdws->fd, DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST, &fromhostcmd); +} + +static struct virgl_hw_res * +virgl_drm_winsys_resource_cache_create(struct virgl_winsys *qws, + enum pipe_texture_target target, + uint32_t format, + uint32_t bind, + uint32_t width, + uint32_t height, + uint32_t depth, + uint32_t array_size, + uint32_t last_level, + uint32_t nr_samples, + uint32_t size) +{ + struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); + struct virgl_hw_res *res, *curr_res; + struct list_head *curr, *next; + int64_t now; + int ret; + + /* only store binds for vertex/index/const buffers */ + if (bind != VIRGL_BIND_CONSTANT_BUFFER && bind != VIRGL_BIND_INDEX_BUFFER && + bind != VIRGL_BIND_VERTEX_BUFFER && bind != VIRGL_BIND_CUSTOM) + goto alloc; + + pipe_mutex_lock(qdws->mutex); + + res = NULL; + curr = qdws->delayed.next; + next = curr->next; + + now = os_time_get(); + while (curr != &qdws->delayed) { + curr_res = LIST_ENTRY(struct virgl_hw_res, curr, head); + + if (!res && (ret = virgl_is_res_compat(qdws, curr_res, size, bind, format) > 0)) + res = curr_res; + else if (os_time_timeout(curr_res->start, curr_res->end, now)) { + LIST_DEL(&curr_res->head); + virgl_hw_res_destroy(qdws, curr_res); + } else + break; + + if (ret == -1) + break; + + curr = next; + next = curr->next; + } + + if (!res && ret != -1) { + while (curr != &qdws->delayed) { + curr_res = LIST_ENTRY(struct virgl_hw_res, curr, head); + ret = virgl_is_res_compat(qdws, curr_res, size, bind, format); + if (ret > 0) { + res = curr_res; + break; + } + if (ret == -1) + break; + curr = next; + next = curr->next; + } + } + + if (res) { + LIST_DEL(&res->head); + --qdws->num_delayed; + pipe_mutex_unlock(qdws->mutex); + pipe_reference_init(&res->reference, 1); + return res; + } + + pipe_mutex_unlock(qdws->mutex); + +alloc: + res = virgl_drm_winsys_resource_create(qws, target, format, bind, + width, height, depth, array_size, + last_level, nr_samples, size); + if (bind == VIRGL_BIND_CONSTANT_BUFFER || bind == VIRGL_BIND_INDEX_BUFFER || + bind == VIRGL_BIND_VERTEX_BUFFER) + res->cacheable = TRUE; + return res; +} + +static struct virgl_hw_res * +virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws, + struct winsys_handle *whandle) +{ + struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); + struct drm_gem_open open_arg = {}; + struct drm_virtgpu_resource_info info_arg = {}; + struct virgl_hw_res *res; + + pipe_mutex_lock(qdws->bo_handles_mutex); + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)whandle->handle); + if (res) { + struct virgl_hw_res *r = NULL; + virgl_drm_resource_reference(qdws, &r, res); + goto done; + } + } + + res = CALLOC_STRUCT(virgl_hw_res); + if (!res) + goto done; + + if (whandle->type == DRM_API_HANDLE_TYPE_FD) { + int r; + uint32_t handle; + r = drmPrimeFDToHandle(qdws->fd, whandle->handle, &handle); + if (r) { + FREE(res); + res = NULL; + goto done; + } + res->bo_handle = handle; + } else { + memset(&open_arg, 0, sizeof(open_arg)); + open_arg.name = whandle->handle; + if (drmIoctl(qdws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) { + FREE(res); + res = NULL; + goto done; + } + res->bo_handle = open_arg.handle; + } + res->name = whandle->handle; + + memset(&info_arg, 0, sizeof(info_arg)); + info_arg.bo_handle = res->bo_handle; + + if (drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_RESOURCE_INFO, &info_arg)) { + /* close */ + FREE(res); + res = NULL; + goto done; + } + + res->res_handle = info_arg.res_handle; + + res->size = info_arg.size; + res->stride = info_arg.stride; + pipe_reference_init(&res->reference, 1); + res->num_cs_references = 0; + + util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)whandle->handle, res); + +done: + pipe_mutex_unlock(qdws->bo_handles_mutex); + return res; +} + +static boolean virgl_drm_winsys_resource_get_handle(struct virgl_winsys *qws, + struct virgl_hw_res *res, + uint32_t stride, + struct winsys_handle *whandle) + { + struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); + struct drm_gem_flink flink; + + if (!res) + return FALSE; + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + if (!res->flinked) { + memset(&flink, 0, sizeof(flink)); + flink.handle = res->bo_handle; + + if (drmIoctl(qdws->fd, DRM_IOCTL_GEM_FLINK, &flink)) { + return FALSE; + } + res->flinked = TRUE; + res->flink = flink.name; + + pipe_mutex_lock(qdws->bo_handles_mutex); + util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)res->flink, res); + pipe_mutex_unlock(qdws->bo_handles_mutex); + } + whandle->handle = res->flink; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = res->bo_handle; + } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) { + if (drmPrimeHandleToFD(qdws->fd, res->bo_handle, DRM_CLOEXEC, (int*)&whandle->handle)) + return FALSE; + } + whandle->stride = stride; + return TRUE; +} + +static void virgl_drm_winsys_resource_unref(struct virgl_winsys *qws, + struct virgl_hw_res *hres) +{ + struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); + + virgl_drm_resource_reference(qdws, &hres, NULL); +} + +static void *virgl_drm_resource_map(struct virgl_winsys *qws, + struct virgl_hw_res *res) +{ + struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); + struct drm_virtgpu_map mmap_arg; + void *ptr; + + if (res->ptr) + return res->ptr; + + memset(&mmap_arg, 0, sizeof(mmap_arg)); + mmap_arg.handle = res->bo_handle; + if (drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_MAP, &mmap_arg)) + return NULL; + + ptr = os_mmap(0, res->size, PROT_READ|PROT_WRITE, MAP_SHARED, + qdws->fd, mmap_arg.offset); + if (ptr == MAP_FAILED) + return NULL; + + res->ptr = ptr; + return ptr; + +} + +static void virgl_drm_resource_wait(struct virgl_winsys *qws, + struct virgl_hw_res *res) +{ + struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); + struct drm_virtgpu_3d_wait waitcmd; + int ret; + + memset(&waitcmd, 0, sizeof(waitcmd)); + waitcmd.handle = res->bo_handle; + again: + ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_WAIT, &waitcmd); + if (ret == -EAGAIN) + goto again; +} + +static struct virgl_cmd_buf *virgl_drm_cmd_buf_create(struct virgl_winsys *qws) +{ + struct virgl_drm_cmd_buf *cbuf; + + cbuf = CALLOC_STRUCT(virgl_drm_cmd_buf); + if (!cbuf) + return NULL; + + cbuf->ws = qws; + + cbuf->nres = 512; + cbuf->res_bo = CALLOC(cbuf->nres, sizeof(struct virgl_hw_buf*)); + if (!cbuf->res_bo) { + FREE(cbuf); + return NULL; + } + cbuf->res_hlist = MALLOC(cbuf->nres * sizeof(uint32_t)); + if (!cbuf->res_hlist) { + FREE(cbuf->res_bo); + FREE(cbuf); + return NULL; + } + + cbuf->base.buf = cbuf->buf; + return &cbuf->base; +} + +static void virgl_drm_cmd_buf_destroy(struct virgl_cmd_buf *_cbuf) +{ + struct virgl_drm_cmd_buf *cbuf = virgl_drm_cmd_buf(_cbuf); + + FREE(cbuf->res_hlist); + FREE(cbuf->res_bo); + FREE(cbuf); + +} + +static boolean virgl_drm_lookup_res(struct virgl_drm_cmd_buf *cbuf, + struct virgl_hw_res *res) +{ + unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1); + int i; + + if (cbuf->is_handle_added[hash]) { + i = cbuf->reloc_indices_hashlist[hash]; + if (cbuf->res_bo[i] == res) + return true; + + for (i = 0; i < cbuf->cres; i++) { + if (cbuf->res_bo[i] == res) { + cbuf->reloc_indices_hashlist[hash] = i; + return true; + } + } + } + return false; +} + +static void virgl_drm_add_res(struct virgl_drm_winsys *qdws, + struct virgl_drm_cmd_buf *cbuf, + struct virgl_hw_res *res) +{ + unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1); + + if (cbuf->cres > cbuf->nres) { + fprintf(stderr,"failure to add relocation\n"); + return; + } + + cbuf->res_bo[cbuf->cres] = NULL; + virgl_drm_resource_reference(qdws, &cbuf->res_bo[cbuf->cres], res); + cbuf->res_hlist[cbuf->cres] = res->bo_handle; + cbuf->is_handle_added[hash] = TRUE; + + cbuf->reloc_indices_hashlist[hash] = cbuf->cres; + p_atomic_inc(&res->num_cs_references); + cbuf->cres++; +} + +static void virgl_drm_release_all_res(struct virgl_drm_winsys *qdws, + struct virgl_drm_cmd_buf *cbuf) +{ + int i; + + for (i = 0; i < cbuf->cres; i++) { + p_atomic_dec(&cbuf->res_bo[i]->num_cs_references); + virgl_drm_resource_reference(qdws, &cbuf->res_bo[i], NULL); + } + cbuf->cres = 0; +} + +static void virgl_drm_emit_res(struct virgl_winsys *qws, + struct virgl_cmd_buf *_cbuf, + struct virgl_hw_res *res, boolean write_buf) +{ + struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); + struct virgl_drm_cmd_buf *cbuf = virgl_drm_cmd_buf(_cbuf); + boolean already_in_list = virgl_drm_lookup_res(cbuf, res); + + if (write_buf) + cbuf->base.buf[cbuf->base.cdw++] = res->res_handle; + + if (!already_in_list) + virgl_drm_add_res(qdws, cbuf, res); +} + +static boolean virgl_drm_res_is_ref(struct virgl_winsys *qws, + struct virgl_cmd_buf *_cbuf, + struct virgl_hw_res *res) +{ + if (!res->num_cs_references) + return FALSE; + + return TRUE; +} + +static int virgl_drm_winsys_submit_cmd(struct virgl_winsys *qws, + struct virgl_cmd_buf *_cbuf) +{ + struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); + struct virgl_drm_cmd_buf *cbuf = virgl_drm_cmd_buf(_cbuf); + struct drm_virtgpu_execbuffer eb; + int ret; + + if (cbuf->base.cdw == 0) + return 0; + + memset(&eb, 0, sizeof(struct drm_virtgpu_execbuffer)); + eb.command = (unsigned long)(void*)cbuf->buf; + eb.size = cbuf->base.cdw * 4; + eb.num_bo_handles = cbuf->cres; + eb.bo_handles = (unsigned long)(void *)cbuf->res_hlist; + + ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &eb); + if (ret == -1) + fprintf(stderr,"got error from kernel - expect bad rendering %d\n", errno); + cbuf->base.cdw = 0; + + virgl_drm_release_all_res(qdws, cbuf); + + memset(cbuf->is_handle_added, 0, sizeof(cbuf->is_handle_added)); + return ret; +} + +static int virgl_drm_get_caps(struct virgl_winsys *vws, + struct virgl_drm_caps *caps) +{ + struct virgl_drm_winsys *vdws = virgl_drm_winsys(vws); + struct drm_virtgpu_get_caps args; + + memset(&args, 0, sizeof(args)); + + args.cap_set_id = 1; + args.addr = (unsigned long)&caps->caps; + args.size = sizeof(union virgl_caps); + return drmIoctl(vdws->fd, DRM_IOCTL_VIRTGPU_GET_CAPS, &args); +} + +#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) + +static unsigned handle_hash(void *key) +{ + return PTR_TO_UINT(key); +} + +static int handle_compare(void *key1, void *key2) +{ + return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); +} + +static struct pipe_fence_handle * +virgl_cs_create_fence(struct virgl_winsys *vws) +{ + struct virgl_hw_res *res; + + res = virgl_drm_winsys_resource_cache_create(vws, + PIPE_BUFFER, + PIPE_FORMAT_R8_UNORM, + VIRGL_BIND_CUSTOM, + 8, 1, 1, 0, 0, 0, 8); + + return (struct pipe_fence_handle *)res; +} + +static bool virgl_fence_wait(struct virgl_winsys *vws, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + struct virgl_drm_winsys *vdws = virgl_drm_winsys(vws); + struct virgl_hw_res *res = virgl_hw_res(fence); + + if (timeout == 0) + return virgl_drm_resource_is_busy(vdws, res); + + if (timeout != PIPE_TIMEOUT_INFINITE) { + int64_t start_time = os_time_get(); + timeout /= 1000; + while (virgl_drm_resource_is_busy(vdws, res)) { + if (os_time_get() - start_time >= timeout) + return FALSE; + os_time_sleep(10); + } + return TRUE; + } + virgl_drm_resource_wait(vws, res); + return TRUE; +} + +static void virgl_fence_reference(struct virgl_winsys *vws, + struct pipe_fence_handle **dst, + struct pipe_fence_handle *src) +{ + struct virgl_drm_winsys *vdws = virgl_drm_winsys(vws); + virgl_drm_resource_reference(vdws, (struct virgl_hw_res **)dst, + virgl_hw_res(src)); +} + + +struct virgl_winsys * +virgl_drm_winsys_create(int drmFD) +{ + struct virgl_drm_winsys *qdws; + + qdws = CALLOC_STRUCT(virgl_drm_winsys); + if (!qdws) + return NULL; + + qdws->fd = drmFD; + qdws->num_delayed = 0; + qdws->usecs = 1000000; + LIST_INITHEAD(&qdws->delayed); + pipe_mutex_init(qdws->mutex); + pipe_mutex_init(qdws->bo_handles_mutex); + qdws->bo_handles = util_hash_table_create(handle_hash, handle_compare); + qdws->base.destroy = virgl_drm_winsys_destroy; + + qdws->base.transfer_put = virgl_bo_transfer_put; + qdws->base.transfer_get = virgl_bo_transfer_get; + qdws->base.resource_create = virgl_drm_winsys_resource_cache_create; + qdws->base.resource_unref = virgl_drm_winsys_resource_unref; + qdws->base.resource_create_from_handle = virgl_drm_winsys_resource_create_handle; + qdws->base.resource_get_handle = virgl_drm_winsys_resource_get_handle; + qdws->base.resource_map = virgl_drm_resource_map; + qdws->base.resource_wait = virgl_drm_resource_wait; + qdws->base.cmd_buf_create = virgl_drm_cmd_buf_create; + qdws->base.cmd_buf_destroy = virgl_drm_cmd_buf_destroy; + qdws->base.submit_cmd = virgl_drm_winsys_submit_cmd; + qdws->base.emit_res = virgl_drm_emit_res; + qdws->base.res_is_referenced = virgl_drm_res_is_ref; + + qdws->base.cs_create_fence = virgl_cs_create_fence; + qdws->base.fence_wait = virgl_fence_wait; + qdws->base.fence_reference = virgl_fence_reference; + + qdws->base.get_caps = virgl_drm_get_caps; + return &qdws->base; + +} diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h new file mode 100644 index 00000000000..da85ff87d2a --- /dev/null +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h @@ -0,0 +1,102 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_DRM_WINSYS_H +#define VIRGL_DRM_WINSYS_H + +#include <stdint.h> +#include "os/os_thread.h" +#include "pipe/p_state.h" +#include "util/list.h" + +#include "virgl/virgl_winsys.h" + +struct pipe_fence_handle; +struct util_hash_table; + +struct virgl_hw_res { + struct pipe_reference reference; + uint32_t res_handle; + uint32_t bo_handle; + uint32_t name; + int num_cs_references; + uint32_t size; + void *ptr; + uint32_t stride; + + struct list_head head; + uint32_t format; + uint32_t bind; + boolean cacheable; + int64_t start, end; + boolean flinked; + uint32_t flink; +}; + +struct virgl_drm_winsys +{ + struct virgl_winsys base; + int fd; + struct list_head delayed; + int num_delayed; + unsigned usecs; + pipe_mutex mutex; + + struct util_hash_table *bo_handles; + pipe_mutex bo_handles_mutex; +}; + +struct virgl_drm_cmd_buf { + struct virgl_cmd_buf base; + + uint32_t buf[VIRGL_MAX_CMDBUF_DWORDS]; + + unsigned nres; + unsigned cres; + struct virgl_hw_res **res_bo; + struct virgl_winsys *ws; + uint32_t *res_hlist; + + char is_handle_added[512]; + unsigned reloc_indices_hashlist[512]; + +}; + +static inline struct virgl_hw_res * +virgl_hw_res(struct pipe_fence_handle *f) +{ + return (struct virgl_hw_res *)f; +} + +static inline struct virgl_drm_winsys * +virgl_drm_winsys(struct virgl_winsys *iws) +{ + return (struct virgl_drm_winsys *)iws; +} + +static inline struct virgl_drm_cmd_buf * +virgl_drm_cmd_buf(struct virgl_cmd_buf *cbuf) +{ + return (struct virgl_drm_cmd_buf *)cbuf; +} + +#endif diff --git a/src/gallium/winsys/virgl/drm/virtgpu_drm.h b/src/gallium/winsys/virgl/drm/virtgpu_drm.h new file mode 100644 index 00000000000..30bc3afdd81 --- /dev/null +++ b/src/gallium/winsys/virgl/drm/virtgpu_drm.h @@ -0,0 +1,163 @@ +/* + * Copyright 2013 Red Hat + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRTGPU_DRM_H +#define VIRTGPU_DRM_H + +#include <stddef.h> +#include "drm.h" + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + * + * Do not use pointers, use uint64_t instead for 32 bit / 64 bit user/kernel + * compatibility Keep fields aligned to their size + */ + +#define DRM_VIRTGPU_MAP 0x01 +#define DRM_VIRTGPU_EXECBUFFER 0x02 +#define DRM_VIRTGPU_GETPARAM 0x03 +#define DRM_VIRTGPU_RESOURCE_CREATE 0x04 +#define DRM_VIRTGPU_RESOURCE_INFO 0x05 +#define DRM_VIRTGPU_TRANSFER_FROM_HOST 0x06 +#define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07 +#define DRM_VIRTGPU_WAIT 0x08 +#define DRM_VIRTGPU_GET_CAPS 0x09 + +struct drm_virtgpu_map { + uint64_t offset; /* use for mmap system call */ + uint32_t handle; + uint32_t pad; +}; + +struct drm_virtgpu_execbuffer { + uint32_t flags; /* for future use */ + uint32_t size; + uint64_t command; /* void* */ + uint64_t bo_handles; + uint32_t num_bo_handles; + uint32_t pad; +}; + +#define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ + +struct drm_virtgpu_getparam { + uint64_t param; + uint64_t value; +}; + +/* NO_BO flags? NO resource flag? */ +/* resource flag for y_0_top */ +struct drm_virtgpu_resource_create { + uint32_t target; + uint32_t format; + uint32_t bind; + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t array_size; + uint32_t last_level; + uint32_t nr_samples; + uint32_t flags; + uint32_t bo_handle; /* if this is set - recreate a new resource attached to this bo ? */ + uint32_t res_handle; /* returned by kernel */ + uint32_t size; /* validate transfer in the host */ + uint32_t stride; /* validate transfer in the host */ +}; + +struct drm_virtgpu_resource_info { + uint32_t bo_handle; + uint32_t res_handle; + uint32_t size; + uint32_t stride; +}; + +struct drm_virtgpu_3d_box { + uint32_t x, y, z; + uint32_t w, h, d; +}; + +struct drm_virtgpu_3d_transfer_to_host { + uint32_t bo_handle; + struct drm_virtgpu_3d_box box; + uint32_t level; + uint32_t offset; +}; + +struct drm_virtgpu_3d_transfer_from_host { + uint32_t bo_handle; + struct drm_virtgpu_3d_box box; + uint32_t level; + uint32_t offset; +}; + +#define VIRTGPU_WAIT_NOWAIT 1 /* like it */ +struct drm_virtgpu_3d_wait { + uint32_t handle; /* 0 is an invalid handle */ + uint32_t flags; +}; + +struct drm_virtgpu_get_caps { + uint32_t cap_set_id; + uint32_t cap_set_ver; + uint64_t addr; + uint32_t size; + uint32_t pad; +}; + +#define DRM_IOCTL_VIRTGPU_MAP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) + +#define DRM_IOCTL_VIRTGPU_EXECBUFFER \ + DRM_IOW(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\ + struct drm_virtgpu_execbuffer) + +#define DRM_IOCTL_VIRTGPU_GETPARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GETPARAM,\ + struct drm_virtgpu_getparam) + +#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE, \ + struct drm_virtgpu_resource_create) + +#define DRM_IOCTL_VIRTGPU_RESOURCE_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_INFO, \ + struct drm_virtgpu_resource_info) + +#define DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_FROM_HOST, \ + struct drm_virtgpu_3d_transfer_from_host) + +#define DRM_IOCTL_VIRTGPU_TRANSFER_TO_HOST \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_TO_HOST, \ + struct drm_virtgpu_3d_transfer_to_host) + +#define DRM_IOCTL_VIRTGPU_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_WAIT, \ + struct drm_virtgpu_3d_wait) + +#define DRM_IOCTL_VIRTGPU_GET_CAPS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \ + struct drm_virtgpu_get_caps) + +#endif diff --git a/src/gallium/winsys/virgl/vtest/Makefile.am b/src/gallium/winsys/virgl/vtest/Makefile.am new file mode 100644 index 00000000000..b15a3afd60c --- /dev/null +++ b/src/gallium/winsys/virgl/vtest/Makefile.am @@ -0,0 +1,32 @@ +# Copyright © 2015 Red Hat +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_srcdir)/src/gallium/drivers \ + $(GALLIUM_WINSYS_CFLAGS) + +noinst_LTLIBRARIES = libvirglvtest.la + +libvirglvtest_la_SOURCES = $(C_SOURCES) diff --git a/src/gallium/winsys/virgl/vtest/Makefile.sources b/src/gallium/winsys/virgl/vtest/Makefile.sources new file mode 100644 index 00000000000..12370d96fa8 --- /dev/null +++ b/src/gallium/winsys/virgl/vtest/Makefile.sources @@ -0,0 +1,6 @@ +C_SOURCES := \ + virgl_vtest_public.h \ + virgl_vtest_socket.c \ + virgl_vtest_winsys.c \ + virgl_vtest_winsys.h \ + vtest_protocol.h diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_public.h b/src/gallium/winsys/virgl/vtest/virgl_vtest_public.h new file mode 100644 index 00000000000..47379d9c735 --- /dev/null +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_public.h @@ -0,0 +1,31 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_VTEST_PUBLIC_H +#define VIRGL_VTEST_PUBLIC_H + +struct virgl_winsys; +struct sw_winsys; + +struct virgl_winsys *virgl_vtest_winsys_wrap(struct sw_winsys *sws); + +#endif diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c new file mode 100644 index 00000000000..4541419d8e8 --- /dev/null +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c @@ -0,0 +1,293 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <sys/socket.h> +#include <errno.h> +#include <stdio.h> +#include <netinet/in.h> +#include <sys/un.h> +#include <unistd.h> + +#include <os/os_process.h> +#include <util/u_format.h> +/* connect to remote socket */ +#define VTEST_SOCKET_NAME "/tmp/.virgl_test" + +#include "virgl_vtest_winsys.h" +#include "virgl_vtest_public.h" + +/* block read/write routines */ +static int virgl_block_write(int fd, void *buf, int size) +{ + void *ptr = buf; + int left; + int ret; + left = size; + do { + ret = write(fd, ptr, left); + if (ret < 0) + return -errno; + left -= ret; + ptr += ret; + } while (left); + return size; +} + +static int virgl_block_read(int fd, void *buf, int size) +{ + void *ptr = buf; + int left; + int ret; + left = size; + do { + ret = read(fd, ptr, left); + if (ret <= 0) { + fprintf(stderr, + "lost connection to rendering server on %d read %d %d\n", + size, ret, errno); + abort(); + return ret < 0 ? -errno : 0; + } + left -= ret; + ptr += ret; + } while (left); + return size; +} + +static int virgl_vtest_send_init(struct virgl_vtest_winsys *vws) +{ + uint32_t buf[VTEST_HDR_SIZE]; + const char *nstr = "virtest"; + char cmdline[64]; + int ret; + + ret = os_get_process_name(cmdline, 63); + if (ret == FALSE) + strcpy(cmdline, nstr); +#if defined(__GLIBC__) || defined(__CYGWIN__) + if (!strcmp(cmdline, "shader_runner")) { + const char *name; + /* hack to get better testname */ + name = program_invocation_short_name; + name += strlen(name) + 1; + strncpy(cmdline, name, 63); + } +#endif + buf[VTEST_CMD_LEN] = strlen(cmdline) + 1; + buf[VTEST_CMD_ID] = VCMD_CREATE_RENDERER; + + virgl_block_write(vws->sock_fd, &buf, sizeof(buf)); + virgl_block_write(vws->sock_fd, (void *)cmdline, strlen(cmdline) + 1); + return 0; +} + +int virgl_vtest_connect(struct virgl_vtest_winsys *vws) +{ + struct sockaddr_un un; + int sock, ret; + + sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (sock < 0) + return -1; + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + snprintf(un.sun_path, sizeof(un.sun_path), "%s", VTEST_SOCKET_NAME); + + do { + ret = 0; + if (connect(sock, (struct sockaddr *)&un, sizeof(un)) < 0) { + ret = -errno; + } + } while (ret == -EINTR); + + vws->sock_fd = sock; + virgl_vtest_send_init(vws); + return 0; +} + +int virgl_vtest_send_get_caps(struct virgl_vtest_winsys *vws, + struct virgl_drm_caps *caps) +{ + uint32_t get_caps_buf[VTEST_HDR_SIZE]; + uint32_t resp_buf[VTEST_HDR_SIZE]; + + int ret; + get_caps_buf[VTEST_CMD_LEN] = 0; + get_caps_buf[VTEST_CMD_ID] = VCMD_GET_CAPS; + + virgl_block_write(vws->sock_fd, &get_caps_buf, sizeof(get_caps_buf)); + + ret = virgl_block_read(vws->sock_fd, resp_buf, sizeof(resp_buf)); + if (ret <= 0) + return 0; + + ret = virgl_block_read(vws->sock_fd, &caps->caps, sizeof(union virgl_caps)); + + return 0; +} + +int virgl_vtest_send_resource_create(struct virgl_vtest_winsys *vws, + uint32_t handle, + enum pipe_texture_target target, + uint32_t format, + uint32_t bind, + uint32_t width, + uint32_t height, + uint32_t depth, + uint32_t array_size, + uint32_t last_level, + uint32_t nr_samples) +{ + uint32_t res_create_buf[VCMD_RES_CREATE_SIZE], vtest_hdr[VTEST_HDR_SIZE]; + + vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE_SIZE; + vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE; + + res_create_buf[VCMD_RES_CREATE_RES_HANDLE] = handle; + res_create_buf[VCMD_RES_CREATE_TARGET] = target; + res_create_buf[VCMD_RES_CREATE_FORMAT] = format; + res_create_buf[VCMD_RES_CREATE_BIND] = bind; + res_create_buf[VCMD_RES_CREATE_WIDTH] = width; + res_create_buf[VCMD_RES_CREATE_HEIGHT] = height; + res_create_buf[VCMD_RES_CREATE_DEPTH] = depth; + res_create_buf[VCMD_RES_CREATE_ARRAY_SIZE] = array_size; + res_create_buf[VCMD_RES_CREATE_LAST_LEVEL] = last_level; + res_create_buf[VCMD_RES_CREATE_NR_SAMPLES] = nr_samples; + + virgl_block_write(vws->sock_fd, &vtest_hdr, sizeof(vtest_hdr)); + virgl_block_write(vws->sock_fd, &res_create_buf, sizeof(res_create_buf)); + + return 0; +} + +int virgl_vtest_submit_cmd(struct virgl_vtest_winsys *vws, + struct virgl_vtest_cmd_buf *cbuf) +{ + uint32_t vtest_hdr[VTEST_HDR_SIZE]; + + vtest_hdr[VTEST_CMD_LEN] = cbuf->base.cdw; + vtest_hdr[VTEST_CMD_ID] = VCMD_SUBMIT_CMD; + + virgl_block_write(vws->sock_fd, &vtest_hdr, sizeof(vtest_hdr)); + virgl_block_write(vws->sock_fd, cbuf->buf, cbuf->base.cdw * 4); + return 0; +} + +int virgl_vtest_send_resource_unref(struct virgl_vtest_winsys *vws, + uint32_t handle) +{ + uint32_t vtest_hdr[VTEST_HDR_SIZE]; + uint32_t cmd[1]; + vtest_hdr[VTEST_CMD_LEN] = 1; + vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_UNREF; + + cmd[0] = handle; + virgl_block_write(vws->sock_fd, &vtest_hdr, sizeof(vtest_hdr)); + virgl_block_write(vws->sock_fd, &cmd, sizeof(cmd)); + return 0; +} + +int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws, + uint32_t vcmd, + uint32_t handle, + uint32_t level, uint32_t stride, + uint32_t layer_stride, + const struct pipe_box *box, + uint32_t data_size) +{ + uint32_t vtest_hdr[VTEST_HDR_SIZE]; + uint32_t cmd[VCMD_TRANSFER_HDR_SIZE]; + vtest_hdr[VTEST_CMD_LEN] = VCMD_TRANSFER_HDR_SIZE; + vtest_hdr[VTEST_CMD_ID] = vcmd; + + if (vcmd == VCMD_TRANSFER_PUT) + vtest_hdr[VTEST_CMD_LEN] += data_size + 3 / 4; + + cmd[0] = handle; + cmd[1] = level; + cmd[2] = stride; + cmd[3] = layer_stride; + cmd[4] = box->x; + cmd[5] = box->y; + cmd[6] = box->z; + cmd[7] = box->width; + cmd[8] = box->height; + cmd[9] = box->depth; + cmd[10] = data_size; + virgl_block_write(vws->sock_fd, &vtest_hdr, sizeof(vtest_hdr)); + virgl_block_write(vws->sock_fd, &cmd, sizeof(cmd)); + + return 0; +} + +int virgl_vtest_send_transfer_put_data(struct virgl_vtest_winsys *vws, + void *data, + uint32_t data_size) +{ + return virgl_block_write(vws->sock_fd, data, data_size); +} + +int virgl_vtest_recv_transfer_get_data(struct virgl_vtest_winsys *vws, + void *data, + uint32_t data_size, + uint32_t stride, + const struct pipe_box *box, + uint32_t format) +{ + void *line; + void *ptr = data; + int hblocks = util_format_get_nblocksy(format, box->height); + + line = malloc(stride); + while (hblocks) { + virgl_block_read(vws->sock_fd, line, stride); + memcpy(ptr, line, util_format_get_stride(format, box->width)); + ptr += stride; + hblocks--; + } + free(line); + return 0; +} + +int virgl_vtest_busy_wait(struct virgl_vtest_winsys *vws, int handle, + int flags) +{ + uint32_t vtest_hdr[VTEST_HDR_SIZE]; + uint32_t cmd[VCMD_BUSY_WAIT_SIZE]; + uint32_t result[1]; + int ret; + vtest_hdr[VTEST_CMD_LEN] = VCMD_BUSY_WAIT_SIZE; + vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_BUSY_WAIT; + cmd[VCMD_BUSY_WAIT_HANDLE] = handle; + cmd[VCMD_BUSY_WAIT_FLAGS] = flags; + + virgl_block_write(vws->sock_fd, &vtest_hdr, sizeof(vtest_hdr)); + virgl_block_write(vws->sock_fd, &cmd, sizeof(cmd)); + + ret = virgl_block_read(vws->sock_fd, vtest_hdr, sizeof(vtest_hdr)); + assert(ret); + ret = virgl_block_read(vws->sock_fd, result, sizeof(result)); + assert(ret); + return result[0]; +} diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c new file mode 100644 index 00000000000..b19c4561493 --- /dev/null +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c @@ -0,0 +1,666 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdio.h> +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "os/os_time.h" +#include "state_tracker/sw_winsys.h" + +#include "virgl_vtest_winsys.h" +#include "virgl_vtest_public.h" + +static void *virgl_vtest_resource_map(struct virgl_winsys *vws, + struct virgl_hw_res *res); +static void virgl_vtest_resource_unmap(struct virgl_winsys *vws, + struct virgl_hw_res *res); + +static inline boolean can_cache_resource(struct virgl_hw_res *res) +{ + return res->cacheable == TRUE; +} + +static uint32_t vtest_get_transfer_size(struct virgl_hw_res *res, + const struct pipe_box *box, + uint32_t stride, uint32_t layer_stride, + uint32_t level, uint32_t *valid_stride_p) +{ + uint32_t valid_stride, valid_layer_stride; + + valid_stride = util_format_get_stride(res->format, box->width); + if (stride) { + if (box->height > 1) + valid_stride = stride; + } + + valid_layer_stride = util_format_get_2d_size(res->format, valid_stride, + box->height); + if (layer_stride) { + if (box->depth > 1) + valid_layer_stride = layer_stride; + } + + *valid_stride_p = valid_stride; + return valid_layer_stride * box->depth; +} + +static int +virgl_vtest_transfer_put(struct virgl_winsys *vws, + struct virgl_hw_res *res, + const struct pipe_box *box, + uint32_t stride, uint32_t layer_stride, + uint32_t buf_offset, uint32_t level) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + uint32_t size; + void *ptr; + uint32_t valid_stride; + + size = vtest_get_transfer_size(res, box, stride, layer_stride, level, + &valid_stride); + + virgl_vtest_send_transfer_cmd(vtws, VCMD_TRANSFER_PUT, res->res_handle, + level, stride, layer_stride, + box, size); + ptr = virgl_vtest_resource_map(vws, res); + virgl_vtest_send_transfer_put_data(vtws, ptr + buf_offset, size); + virgl_vtest_resource_unmap(vws, res); + return 0; +} + +static int +virgl_vtest_transfer_get(struct virgl_winsys *vws, + struct virgl_hw_res *res, + const struct pipe_box *box, + uint32_t stride, uint32_t layer_stride, + uint32_t buf_offset, uint32_t level) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + uint32_t size; + void *ptr; + uint32_t valid_stride; + + size = vtest_get_transfer_size(res, box, stride, layer_stride, level, + &valid_stride); + + virgl_vtest_send_transfer_cmd(vtws, VCMD_TRANSFER_GET, res->res_handle, + level, stride, layer_stride, + box, size); + + + ptr = virgl_vtest_resource_map(vws, res); + virgl_vtest_recv_transfer_get_data(vtws, ptr + buf_offset, size, + valid_stride, box, res->format); + virgl_vtest_resource_unmap(vws, res); + return 0; +} + +static void virgl_hw_res_destroy(struct virgl_vtest_winsys *vtws, + struct virgl_hw_res *res) +{ + virgl_vtest_send_resource_unref(vtws, res->res_handle); + if (res->dt) + vtws->sws->displaytarget_destroy(vtws->sws, res->dt); + free(res->ptr); + FREE(res); +} + +static boolean virgl_vtest_resource_is_busy(struct virgl_vtest_winsys *vtws, + struct virgl_hw_res *res) +{ + /* implement busy check */ + int ret; + ret = virgl_vtest_busy_wait(vtws, res->res_handle, 0); + + if (ret < 0) + return FALSE; + + return ret == 1 ? TRUE : FALSE; +} + +static void +virgl_cache_flush(struct virgl_vtest_winsys *vtws) +{ + struct list_head *curr, *next; + struct virgl_hw_res *res; + + pipe_mutex_lock(vtws->mutex); + curr = vtws->delayed.next; + next = curr->next; + + while (curr != &vtws->delayed) { + res = LIST_ENTRY(struct virgl_hw_res, curr, head); + LIST_DEL(&res->head); + virgl_hw_res_destroy(vtws, res); + curr = next; + next = curr->next; + } + pipe_mutex_unlock(vtws->mutex); +} + +static void +virgl_cache_list_check_free(struct virgl_vtest_winsys *vtws) +{ + struct list_head *curr, *next; + struct virgl_hw_res *res; + int64_t now; + + now = os_time_get(); + curr = vtws->delayed.next; + next = curr->next; + while (curr != &vtws->delayed) { + res = LIST_ENTRY(struct virgl_hw_res, curr, head); + if (!os_time_timeout(res->start, res->end, now)) + break; + + LIST_DEL(&res->head); + virgl_hw_res_destroy(vtws, res); + curr = next; + next = curr->next; + } +} + +static void virgl_vtest_resource_reference(struct virgl_vtest_winsys *vtws, + struct virgl_hw_res **dres, + struct virgl_hw_res *sres) +{ + struct virgl_hw_res *old = *dres; + if (pipe_reference(&(*dres)->reference, &sres->reference)) { + if (!can_cache_resource(old)) { + virgl_hw_res_destroy(vtws, old); + } else { + pipe_mutex_lock(vtws->mutex); + virgl_cache_list_check_free(vtws); + + old->start = os_time_get(); + old->end = old->start + vtws->usecs; + LIST_ADDTAIL(&old->head, &vtws->delayed); + vtws->num_delayed++; + pipe_mutex_unlock(vtws->mutex); + } + } + *dres = sres; +} + +static struct virgl_hw_res * +virgl_vtest_winsys_resource_create(struct virgl_winsys *vws, + enum pipe_texture_target target, + uint32_t format, + uint32_t bind, + uint32_t width, + uint32_t height, + uint32_t depth, + uint32_t array_size, + uint32_t last_level, + uint32_t nr_samples, + uint32_t size) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + struct virgl_hw_res *res; + static int handle = 1; + + res = CALLOC_STRUCT(virgl_hw_res); + if (!res) + return NULL; + + if (bind & (VIRGL_BIND_DISPLAY_TARGET | VIRGL_BIND_SCANOUT)) { + res->dt = vtws->sws->displaytarget_create(vtws->sws, bind, format, + width, height, 64, NULL, + &res->stride); + + } else { + res->ptr = align_malloc(size, 64); + if (!res->ptr) { + FREE(res); + return NULL; + } + } + + res->bind = bind; + res->format = format; + res->height = height; + res->width = width; + virgl_vtest_send_resource_create(vtws, handle, target, format, bind, + width, height, depth, array_size, + last_level, nr_samples); + + res->res_handle = handle++; + pipe_reference_init(&res->reference, 1); + return res; +} + +static void virgl_vtest_winsys_resource_unref(struct virgl_winsys *vws, + struct virgl_hw_res *hres) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + virgl_vtest_resource_reference(vtws, &hres, NULL); +} + +static void *virgl_vtest_resource_map(struct virgl_winsys *vws, + struct virgl_hw_res *res) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + + if (res->dt) { + return vtws->sws->displaytarget_map(vtws->sws, res->dt, 0); + } else { + res->mapped = res->ptr; + return res->mapped; + } +} + +static void virgl_vtest_resource_unmap(struct virgl_winsys *vws, + struct virgl_hw_res *res) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + if (res->mapped) + res->mapped = NULL; + + if (res->dt) + vtws->sws->displaytarget_unmap(vtws->sws, res->dt); +} + +static void virgl_vtest_resource_wait(struct virgl_winsys *vws, + struct virgl_hw_res *res) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + + virgl_vtest_busy_wait(vtws, res->res_handle, VCMD_BUSY_WAIT_FLAG_WAIT); +} + +static inline int virgl_is_res_compat(struct virgl_vtest_winsys *vtws, + struct virgl_hw_res *res, + uint32_t size, uint32_t bind, + uint32_t format) +{ + if (res->bind != bind) + return 0; + if (res->format != format) + return 0; + if (res->size < size) + return 0; + if (res->size > size * 2) + return 0; + + if (virgl_vtest_resource_is_busy(vtws, res)) { + return -1; + } + + return 1; +} + +static struct virgl_hw_res * +virgl_vtest_winsys_resource_cache_create(struct virgl_winsys *vws, + enum pipe_texture_target target, + uint32_t format, + uint32_t bind, + uint32_t width, + uint32_t height, + uint32_t depth, + uint32_t array_size, + uint32_t last_level, + uint32_t nr_samples, + uint32_t size) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + struct virgl_hw_res *res, *curr_res; + struct list_head *curr, *next; + int64_t now; + int ret; + + /* only store binds for vertex/index/const buffers */ + if (bind != VIRGL_BIND_CONSTANT_BUFFER && bind != VIRGL_BIND_INDEX_BUFFER && + bind != VIRGL_BIND_VERTEX_BUFFER && bind != VIRGL_BIND_CUSTOM) + goto alloc; + + pipe_mutex_lock(vtws->mutex); + + res = NULL; + curr = vtws->delayed.next; + next = curr->next; + + now = os_time_get(); + while (curr != &vtws->delayed) { + curr_res = LIST_ENTRY(struct virgl_hw_res, curr, head); + + if (!res && (ret = virgl_is_res_compat(vtws, curr_res, size, bind, format) > 0)) + res = curr_res; + else if (os_time_timeout(curr_res->start, curr_res->end, now)) { + LIST_DEL(&curr_res->head); + virgl_hw_res_destroy(vtws, curr_res); + } else + break; + + if (ret == -1) + break; + + curr = next; + next = curr->next; + } + + if (!res && ret != -1) { + while (curr != &vtws->delayed) { + curr_res = LIST_ENTRY(struct virgl_hw_res, curr, head); + ret = virgl_is_res_compat(vtws, curr_res, size, bind, format); + if (ret > 0) { + res = curr_res; + break; + } + if (ret == -1) + break; + curr = next; + next = curr->next; + } + } + + if (res) { + LIST_DEL(&res->head); + --vtws->num_delayed; + pipe_mutex_unlock(vtws->mutex); + pipe_reference_init(&res->reference, 1); + return res; + } + + pipe_mutex_unlock(vtws->mutex); + +alloc: + res = virgl_vtest_winsys_resource_create(vws, target, format, bind, + width, height, depth, array_size, + last_level, nr_samples, size); + if (bind == VIRGL_BIND_CONSTANT_BUFFER || bind == VIRGL_BIND_INDEX_BUFFER || + bind == VIRGL_BIND_VERTEX_BUFFER) + res->cacheable = TRUE; + return res; +} + +static struct virgl_cmd_buf *virgl_vtest_cmd_buf_create(struct virgl_winsys *vws) +{ + struct virgl_vtest_cmd_buf *cbuf; + + cbuf = CALLOC_STRUCT(virgl_vtest_cmd_buf); + if (!cbuf) + return NULL; + + cbuf->nres = 512; + cbuf->res_bo = CALLOC(cbuf->nres, sizeof(struct virgl_hw_buf*)); + if (!cbuf->res_bo) { + FREE(cbuf); + return NULL; + } + cbuf->ws = vws; + cbuf->base.buf = cbuf->buf; + return &cbuf->base; +} + +static void virgl_vtest_cmd_buf_destroy(struct virgl_cmd_buf *_cbuf) +{ + struct virgl_vtest_cmd_buf *cbuf = virgl_vtest_cmd_buf(_cbuf); + + FREE(cbuf->res_bo); + FREE(cbuf); +} + +static boolean virgl_vtest_lookup_res(struct virgl_vtest_cmd_buf *cbuf, + struct virgl_hw_res *res) +{ + unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1); + int i; + + if (cbuf->is_handle_added[hash]) { + i = cbuf->reloc_indices_hashlist[hash]; + if (cbuf->res_bo[i] == res) + return true; + + for (i = 0; i < cbuf->cres; i++) { + if (cbuf->res_bo[i] == res) { + cbuf->reloc_indices_hashlist[hash] = i; + return true; + } + } + } + return false; +} + +static void virgl_vtest_release_all_res(struct virgl_vtest_winsys *vtws, + struct virgl_vtest_cmd_buf *cbuf) +{ + int i; + + for (i = 0; i < cbuf->cres; i++) { + p_atomic_dec(&cbuf->res_bo[i]->num_cs_references); + virgl_vtest_resource_reference(vtws, &cbuf->res_bo[i], NULL); + } + cbuf->cres = 0; +} + +static void virgl_vtest_add_res(struct virgl_vtest_winsys *vtws, + struct virgl_vtest_cmd_buf *cbuf, + struct virgl_hw_res *res) +{ + unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1); + + if (cbuf->cres > cbuf->nres) { + fprintf(stderr,"failure to add relocation\n"); + return; + } + + cbuf->res_bo[cbuf->cres] = NULL; + virgl_vtest_resource_reference(vtws, &cbuf->res_bo[cbuf->cres], res); + cbuf->is_handle_added[hash] = TRUE; + + cbuf->reloc_indices_hashlist[hash] = cbuf->cres; + p_atomic_inc(&res->num_cs_references); + cbuf->cres++; +} + +static int virgl_vtest_winsys_submit_cmd(struct virgl_winsys *vws, + struct virgl_cmd_buf *_cbuf) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + struct virgl_vtest_cmd_buf *cbuf = virgl_vtest_cmd_buf(_cbuf); + int ret; + + if (cbuf->base.cdw == 0) + return 0; + + ret = virgl_vtest_submit_cmd(vtws, cbuf); + + virgl_vtest_release_all_res(vtws, cbuf); + memset(cbuf->is_handle_added, 0, sizeof(cbuf->is_handle_added)); + cbuf->base.cdw = 0; + return ret; +} + +static void virgl_vtest_emit_res(struct virgl_winsys *vws, + struct virgl_cmd_buf *_cbuf, + struct virgl_hw_res *res, boolean write_buf) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + struct virgl_vtest_cmd_buf *cbuf = virgl_vtest_cmd_buf(_cbuf); + boolean already_in_list = virgl_vtest_lookup_res(cbuf, res); + + if (write_buf) + cbuf->base.buf[cbuf->base.cdw++] = res->res_handle; + if (!already_in_list) + virgl_vtest_add_res(vtws, cbuf, res); +} + +static boolean virgl_vtest_res_is_ref(struct virgl_winsys *vws, + struct virgl_cmd_buf *_cbuf, + struct virgl_hw_res *res) +{ + if (!res->num_cs_references) + return FALSE; + + return TRUE; +} + +static int virgl_vtest_get_caps(struct virgl_winsys *vws, + struct virgl_drm_caps *caps) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + return virgl_vtest_send_get_caps(vtws, caps); +} + +static struct pipe_fence_handle * +virgl_cs_create_fence(struct virgl_winsys *vws) +{ + struct virgl_hw_res *res; + + res = virgl_vtest_winsys_resource_cache_create(vws, + PIPE_BUFFER, + PIPE_FORMAT_R8_UNORM, + PIPE_BIND_CUSTOM, + 8, 1, 1, 0, 0, 0, 8); + + return (struct pipe_fence_handle *)res; +} + +static bool virgl_fence_wait(struct virgl_winsys *vws, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + struct virgl_vtest_winsys *vdws = virgl_vtest_winsys(vws); + struct virgl_hw_res *res = virgl_hw_res(fence); + + if (timeout == 0) + return virgl_vtest_resource_is_busy(vdws, res); + + if (timeout != PIPE_TIMEOUT_INFINITE) { + int64_t start_time = os_time_get(); + timeout /= 1000; + while (virgl_vtest_resource_is_busy(vdws, res)) { + if (os_time_get() - start_time >= timeout) + return FALSE; + os_time_sleep(10); + } + return TRUE; + } + virgl_vtest_resource_wait(vws, res); + return TRUE; +} + +static void virgl_fence_reference(struct virgl_winsys *vws, + struct pipe_fence_handle **dst, + struct pipe_fence_handle *src) +{ + struct virgl_vtest_winsys *vdws = virgl_vtest_winsys(vws); + virgl_vtest_resource_reference(vdws, (struct virgl_hw_res **)dst, + virgl_hw_res(src)); +} + +static void virgl_vtest_flush_frontbuffer(struct virgl_winsys *vws, + struct virgl_hw_res *res, + unsigned level, unsigned layer, + void *winsys_drawable_handle, + struct pipe_box *sub_box) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + struct pipe_box box; + void *map; + uint32_t size; + uint32_t offset = 0, valid_stride; + if (!res->dt) + return; + + memset(&box, 0, sizeof(box)); + + if (sub_box) { + box = *sub_box; + offset = box.y / util_format_get_blockheight(res->format) * res->stride + + box.x / util_format_get_blockwidth(res->format) * util_format_get_blocksize(res->format); + } else { + box.z = layer; + box.width = res->width; + box.height = res->height; + box.depth = 1; + } + + size = vtest_get_transfer_size(res, &box, res->stride, 0, level, &valid_stride); + + virgl_vtest_busy_wait(vtws, res->res_handle, VCMD_BUSY_WAIT_FLAG_WAIT); + map = vtws->sws->displaytarget_map(vtws->sws, res->dt, 0); + + /* execute a transfer */ + virgl_vtest_send_transfer_cmd(vtws, VCMD_TRANSFER_GET, res->res_handle, + level, res->stride, 0, &box, size); + virgl_vtest_recv_transfer_get_data(vtws, map + offset, size, valid_stride, + &box, res->format); + vtws->sws->displaytarget_unmap(vtws->sws, res->dt); + + vtws->sws->displaytarget_display(vtws->sws, res->dt, winsys_drawable_handle, + sub_box); +} + +static void +virgl_vtest_winsys_destroy(struct virgl_winsys *vws) +{ + struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); + + virgl_cache_flush(vtws); + + pipe_mutex_destroy(vtws->mutex); + FREE(vtws); +} + +struct virgl_winsys * +virgl_vtest_winsys_wrap(struct sw_winsys *sws) +{ + struct virgl_vtest_winsys *vtws; + + vtws = CALLOC_STRUCT(virgl_vtest_winsys); + if (!vtws) + return NULL; + + virgl_vtest_connect(vtws); + vtws->sws = sws; + + vtws->usecs = 1000000; + LIST_INITHEAD(&vtws->delayed); + pipe_mutex_init(vtws->mutex); + + vtws->base.destroy = virgl_vtest_winsys_destroy; + + vtws->base.transfer_put = virgl_vtest_transfer_put; + vtws->base.transfer_get = virgl_vtest_transfer_get; + + vtws->base.resource_create = virgl_vtest_winsys_resource_cache_create; + vtws->base.resource_unref = virgl_vtest_winsys_resource_unref; + vtws->base.resource_map = virgl_vtest_resource_map; + vtws->base.resource_wait = virgl_vtest_resource_wait; + vtws->base.cmd_buf_create = virgl_vtest_cmd_buf_create; + vtws->base.cmd_buf_destroy = virgl_vtest_cmd_buf_destroy; + vtws->base.submit_cmd = virgl_vtest_winsys_submit_cmd; + + vtws->base.emit_res = virgl_vtest_emit_res; + vtws->base.res_is_referenced = virgl_vtest_res_is_ref; + vtws->base.get_caps = virgl_vtest_get_caps; + + vtws->base.cs_create_fence = virgl_cs_create_fence; + vtws->base.fence_wait = virgl_fence_wait; + vtws->base.fence_reference = virgl_fence_reference; + + vtws->base.flush_frontbuffer = virgl_vtest_flush_frontbuffer; + + return &vtws->base; +} diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h new file mode 100644 index 00000000000..b4faa70b67e --- /dev/null +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.h @@ -0,0 +1,149 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_DRM_WINSYS_H +#define VIRGL_DRM_WINSYS_H + +#include <stdint.h> +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/list.h" +#include "os/os_thread.h" + +#include "virgl/virgl_winsys.h" +#include "vtest_protocol.h" + +struct pipe_fence_handle; +struct sw_winsys; +struct sw_displaytarget; + +struct virgl_vtest_winsys { + struct virgl_winsys base; + + struct sw_winsys *sws; + + /* fd to remote renderer */ + int sock_fd; + + struct list_head delayed; + int num_delayed; + unsigned usecs; + pipe_mutex mutex; +}; + +struct virgl_hw_res { + struct pipe_reference reference; + uint32_t res_handle; + int num_cs_references; + + void *ptr; + int size; + + uint32_t format; + uint32_t stride; + uint32_t width; + uint32_t height; + + struct sw_displaytarget *dt; + void *mapped; + + struct list_head head; + uint32_t bind; + boolean cacheable; + int64_t start, end; + +}; + +struct virgl_vtest_cmd_buf { + struct virgl_cmd_buf base; + uint32_t buf[VIRGL_MAX_CMDBUF_DWORDS]; + unsigned nres; + unsigned cres; + struct virgl_winsys *ws; + struct virgl_hw_res **res_bo; + + char is_handle_added[512]; + unsigned reloc_indices_hashlist[512]; +}; + +static inline struct virgl_hw_res * +virgl_hw_res(struct pipe_fence_handle *f) +{ + return (struct virgl_hw_res *)f; +} + +static inline struct virgl_vtest_winsys * +virgl_vtest_winsys(struct virgl_winsys *iws) +{ + return (struct virgl_vtest_winsys *)iws; +} + +static inline struct virgl_vtest_cmd_buf * +virgl_vtest_cmd_buf(struct virgl_cmd_buf *cbuf) +{ + return (struct virgl_vtest_cmd_buf *)cbuf; +} + + +int virgl_vtest_connect(struct virgl_vtest_winsys *vws); +int virgl_vtest_send_get_caps(struct virgl_vtest_winsys *vws, + struct virgl_drm_caps *caps); + +int virgl_vtest_send_resource_create(struct virgl_vtest_winsys *vws, + uint32_t handle, + enum pipe_texture_target target, + uint32_t format, + uint32_t bind, + uint32_t width, + uint32_t height, + uint32_t depth, + uint32_t array_size, + uint32_t last_level, + uint32_t nr_samples); + +int virgl_vtest_send_resource_unref(struct virgl_vtest_winsys *vws, + uint32_t handle); +int virgl_vtest_submit_cmd(struct virgl_vtest_winsys *vtws, + struct virgl_vtest_cmd_buf *cbuf); + +int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws, + uint32_t vcmd, + uint32_t handle, + uint32_t level, uint32_t stride, + uint32_t layer_stride, + const struct pipe_box *box, + uint32_t data_size); + +int virgl_vtest_send_transfer_put_data(struct virgl_vtest_winsys *vws, + void *data, + uint32_t data_size); +int virgl_vtest_recv_transfer_get_data(struct virgl_vtest_winsys *vws, + void *data, + uint32_t data_size, + uint32_t stride, + const struct pipe_box *box, + uint32_t format); + +int virgl_vtest_busy_wait(struct virgl_vtest_winsys *vws, int handle, + int flags); +#endif diff --git a/src/gallium/winsys/virgl/vtest/vtest_protocol.h b/src/gallium/winsys/virgl/vtest/vtest_protocol.h new file mode 100644 index 00000000000..86d197f006c --- /dev/null +++ b/src/gallium/winsys/virgl/vtest/vtest_protocol.h @@ -0,0 +1,88 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VTEST_PROTOCOL +#define VTEST_PROTOCOL + +#define VTEST_DEFAULT_SOCKET_NAME "/tmp/.virgl_test" + +/* 32-bit length field */ +/* 32-bit cmd field */ +#define VTEST_HDR_SIZE 2 +#define VTEST_CMD_LEN 0 /* length of data */ +#define VTEST_CMD_ID 1 +#define VTEST_CMD_DATA_START 2 + +/* vtest cmds */ +#define VCMD_GET_CAPS 1 + +#define VCMD_RESOURCE_CREATE 2 +#define VCMD_RESOURCE_UNREF 3 + +#define VCMD_TRANSFER_GET 4 +#define VCMD_TRANSFER_PUT 5 + +#define VCMD_SUBMIT_CMD 6 + +#define VCMD_RESOURCE_BUSY_WAIT 7 + +/* pass the process cmd line for debugging */ +#define VCMD_CREATE_RENDERER 8 +/* get caps */ +/* 0 length cmd */ +/* resp VCMD_GET_CAPS + caps */ + +#define VCMD_RES_CREATE_SIZE 10 +#define VCMD_RES_CREATE_RES_HANDLE 0 +#define VCMD_RES_CREATE_TARGET 1 +#define VCMD_RES_CREATE_FORMAT 2 +#define VCMD_RES_CREATE_BIND 3 +#define VCMD_RES_CREATE_WIDTH 4 +#define VCMD_RES_CREATE_HEIGHT 5 +#define VCMD_RES_CREATE_DEPTH 6 +#define VCMD_RES_CREATE_ARRAY_SIZE 7 +#define VCMD_RES_CREATE_LAST_LEVEL 8 +#define VCMD_RES_CREATE_NR_SAMPLES 9 + +#define VCMD_RES_UNREF_SIZE 1 +#define VCMD_RES_UNREF_RES_HANDLE 0 + +#define VCMD_TRANSFER_HDR_SIZE 11 +#define VCMD_TRANSFER_RES_HANDLE 0 +#define VCMD_TRANSFER_LEVEL 1 +#define VCMD_TRANSFER_STRIDE 2 +#define VCMD_TRANSFER_LAYER_STRIDE 3 +#define VCMD_TRANSFER_X 4 +#define VCMD_TRANSFER_Y 5 +#define VCMD_TRANSFER_Z 6 +#define VCMD_TRANSFER_WIDTH 7 +#define VCMD_TRANSFER_HEIGHT 8 +#define VCMD_TRANSFER_DEPTH 9 +#define VCMD_TRANSFER_DATA_SIZE 10 + +#define VCMD_BUSY_WAIT_FLAG_WAIT 1 + +#define VCMD_BUSY_WAIT_SIZE 2 +#define VCMD_BUSY_WAIT_HANDLE 0 +#define VCMD_BUSY_WAIT_FLAGS 1 + +#endif diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h index 2708e50a45c..8db2153e84b 100644 --- a/src/gbm/main/gbm.h +++ b/src/gbm/main/gbm.h @@ -35,6 +35,7 @@ extern "C" { #define __GBM__ 1 +#include <stddef.h> #include <stdint.h> /** diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 961183636a9..0a79fb14633 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2423,21 +2423,6 @@ validate_explicit_location(const struct ast_type_qualifier *qual, const struct gl_context *const ctx = state->ctx; unsigned max_loc = qual->location + var->type->uniform_locations() - 1; - /* ARB_explicit_uniform_location specification states: - * - * "The explicitly defined locations and the generated locations - * must be in the range of 0 to MAX_UNIFORM_LOCATIONS minus one." - * - * "Valid locations for default-block uniform variable locations - * are in the range of 0 to the implementation-defined maximum - * number of uniform locations." - */ - if (qual->location < 0) { - _mesa_glsl_error(loc, state, - "explicit location < 0 for uniform %s", var->name); - return; - } - if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) { _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s " ">= MAX_UNIFORM_LOCATIONS (%u)", var->name, @@ -2528,41 +2513,30 @@ validate_explicit_location(const struct ast_type_qualifier *qual, } else { var->data.explicit_location = true; - /* This bit of silliness is needed because invalid explicit locations - * are supposed to be flagged during linking. Small negative values - * biased by VERT_ATTRIB_GENERIC0 or FRAG_RESULT_DATA0 could alias - * built-in values (e.g., -16+VERT_ATTRIB_GENERIC0 = VERT_ATTRIB_POS). - * The linker needs to be able to differentiate these cases. This - * ensures that negative values stay negative. - */ - if (qual->location >= 0) { - switch (state->stage) { - case MESA_SHADER_VERTEX: - var->data.location = (var->data.mode == ir_var_shader_in) - ? (qual->location + VERT_ATTRIB_GENERIC0) - : (qual->location + VARYING_SLOT_VAR0); - break; + switch (state->stage) { + case MESA_SHADER_VERTEX: + var->data.location = (var->data.mode == ir_var_shader_in) + ? (qual->location + VERT_ATTRIB_GENERIC0) + : (qual->location + VARYING_SLOT_VAR0); + break; - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - if (var->data.patch) - var->data.location = qual->location + VARYING_SLOT_PATCH0; - else - var->data.location = qual->location + VARYING_SLOT_VAR0; - break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if (var->data.patch) + var->data.location = qual->location + VARYING_SLOT_PATCH0; + else + var->data.location = qual->location + VARYING_SLOT_VAR0; + break; - case MESA_SHADER_FRAGMENT: - var->data.location = (var->data.mode == ir_var_shader_out) - ? (qual->location + FRAG_RESULT_DATA0) - : (qual->location + VARYING_SLOT_VAR0); - break; - case MESA_SHADER_COMPUTE: - assert(!"Unexpected shader type"); - break; - } - } else { - var->data.location = qual->location; + case MESA_SHADER_FRAGMENT: + var->data.location = (var->data.mode == ir_var_shader_out) + ? (qual->location + FRAG_RESULT_DATA0) + : (qual->location + VARYING_SLOT_VAR0); + break; + case MESA_SHADER_COMPUTE: + assert(!"Unexpected shader type"); + break; } if (qual->flags.q.explicit_index) { diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index aae25f893e8..509a57b8813 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -401,6 +401,12 @@ shader_atomic_counters(const _mesa_glsl_parse_state *state) } static bool +shader_clock(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_clock_enable; +} + +static bool shader_storage_buffer_object(const _mesa_glsl_parse_state *state) { return state->has_shader_storage_buffer_objects(); @@ -782,6 +788,11 @@ private: ir_function_signature *_memory_barrier( builtin_available_predicate avail); + ir_function_signature *_shader_clock_intrinsic(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_shader_clock(builtin_available_predicate avail, + const glsl_type *type); + #undef B0 #undef B1 #undef B2 @@ -952,6 +963,11 @@ builtin_builder::create_intrinsics() add_function("__intrinsic_memory_barrier", _memory_barrier_intrinsic(shader_image_load_store), NULL); + + add_function("__intrinsic_shader_clock", + _shader_clock_intrinsic(shader_clock, + glsl_type::uvec2_type), + NULL); } /** @@ -2741,6 +2757,11 @@ builtin_builder::create_builtins() _memory_barrier(shader_image_load_store), NULL); + add_function("clock2x32ARB", + _shader_clock(shader_clock, + glsl_type::uvec2_type), + NULL); + #undef F #undef FI #undef FIUD @@ -5251,6 +5272,28 @@ builtin_builder::_memory_barrier(builtin_available_predicate avail) return sig; } +ir_function_signature * +builtin_builder::_shader_clock_intrinsic(builtin_available_predicate avail, + const glsl_type *type) +{ + MAKE_INTRINSIC(type, avail, 0); + return sig; +} + +ir_function_signature * +builtin_builder::_shader_clock(builtin_available_predicate avail, + const glsl_type *type) +{ + MAKE_SIG(type, avail, 0); + + ir_variable *retval = body.make_temp(type, "clock_retval"); + + body.emit(call(shader->symbols->get_function("__intrinsic_shader_clock"), + retval, sig->parameters)); + body.emit(ret(retval)); + return sig; +} + /** @} */ /******************************************************************************/ diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp index a6ad1050552..c30fb9226e5 100644 --- a/src/glsl/builtin_variables.cpp +++ b/src/glsl/builtin_variables.cpp @@ -710,7 +710,7 @@ builtin_variable_generator::generate_constants() } } - if (state->is_version(430, 0) || state->ARB_compute_shader_enable) { + if (state->is_version(430, 310) || state->ARB_compute_shader_enable) { add_const("gl_MaxComputeAtomicCounterBuffers", MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS); add_const("gl_MaxComputeAtomicCounters", MAX_COMPUTE_ATOMIC_COUNTERS); add_const("gl_MaxComputeImageUniforms", MAX_COMPUTE_IMAGE_UNIFORMS); @@ -887,16 +887,22 @@ builtin_variable_generator::generate_uniforms() void builtin_variable_generator::generate_vs_special_vars() { + ir_variable *var; + if (state->is_version(130, 300)) add_system_value(SYSTEM_VALUE_VERTEX_ID, int_t, "gl_VertexID"); if (state->ARB_draw_instanced_enable) add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB"); if (state->ARB_draw_instanced_enable || state->is_version(140, 300)) add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceID"); - if (state->AMD_vertex_shader_layer_enable) - add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); - if (state->AMD_vertex_shader_viewport_index_enable) - add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + if (state->AMD_vertex_shader_layer_enable) { + var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + if (state->AMD_vertex_shader_viewport_index_enable) { + var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } if (compatibility) { add_input(VERT_ATTRIB_POS, vec4_t, "gl_Vertex"); add_input(VERT_ATTRIB_NORMAL, vec3_t, "gl_Normal"); @@ -954,9 +960,14 @@ builtin_variable_generator::generate_tes_special_vars() void builtin_variable_generator::generate_gs_special_vars() { - add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); - if (state->is_version(410, 0) || state->ARB_viewport_array_enable) - add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + ir_variable *var; + + var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + if (state->is_version(410, 0) || state->ARB_viewport_array_enable) { + var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID"); @@ -970,7 +981,6 @@ builtin_variable_generator::generate_gs_special_vars() * the specific case of gl_PrimitiveIDIn. So we don't need to treat * gl_PrimitiveIDIn as an {ARB,EXT}_geometry_shader4-only variable. */ - ir_variable *var; var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveIDIn"); var->data.interpolation = INTERP_QUALIFIER_FLAT; var = add_output(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); @@ -984,14 +994,15 @@ builtin_variable_generator::generate_gs_special_vars() void builtin_variable_generator::generate_fs_special_vars() { + ir_variable *var; + add_input(VARYING_SLOT_POS, vec4_t, "gl_FragCoord"); add_input(VARYING_SLOT_FACE, bool_t, "gl_FrontFacing"); if (state->is_version(120, 100)) add_input(VARYING_SLOT_PNTC, vec2_t, "gl_PointCoord"); if (state->is_version(150, 0)) { - ir_variable *var = - add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); + var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); var->data.interpolation = INTERP_QUALIFIER_FLAT; } @@ -1043,8 +1054,10 @@ builtin_variable_generator::generate_fs_special_vars() } if (state->is_version(430, 0) || state->ARB_fragment_layer_viewport_enable) { - add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer"); - add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var = add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + var = add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; } } diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y index 1d7a3af8b74..4acccf74065 100644 --- a/src/glsl/glcpp/glcpp-parse.y +++ b/src/glsl/glcpp/glcpp-parse.y @@ -2426,6 +2426,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions->ARB_shader_bit_encoding) add_builtin_define(parser, "GL_ARB_shader_bit_encoding", 1); + if (extensions->ARB_shader_clock) + add_builtin_define(parser, "GL_ARB_shader_clock", 1); + if (extensions->ARB_uniform_buffer_object) add_builtin_define(parser, "GL_ARB_uniform_buffer_object", 1); diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 2f2e10d7992..4636435f191 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -948,7 +948,8 @@ parameter_qualifier: if ($2.precision != ast_precision_none) _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); - if (!state->has_420pack() && $2.flags.i != 0) + if (!(state->has_420pack() || state->is_version(420, 310)) && + $2.flags.i != 0) _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); $$ = $2; @@ -1847,7 +1848,8 @@ type_qualifier: if ($2.precision != ast_precision_none) _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); - if (!state->has_420pack() && $2.flags.i != 0) + if (!(state->has_420pack() || state->is_version(420, 310)) && + $2.flags.i != 0) _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); $$ = $2; diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 692b1228ee9..f856a200e09 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -606,6 +606,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_separate_shader_objects, true, false, dummy_true), EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters), EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding), + EXT(ARB_shader_clock, true, false, ARB_shader_clock), EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store), EXT(ARB_shader_image_size, true, false, ARB_shader_image_size), EXT(ARB_shader_precision, true, false, ARB_shader_precision), diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index e8740f9ecb9..b54c5359149 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -519,6 +519,8 @@ struct _mesa_glsl_parse_state { bool ARB_shader_atomic_counters_warn; bool ARB_shader_bit_encoding_enable; bool ARB_shader_bit_encoding_warn; + bool ARB_shader_clock_enable; + bool ARB_shader_clock_warn; bool ARB_shader_image_load_store_enable; bool ARB_shader_image_load_store_warn; bool ARB_shader_image_size_enable; diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp index 70ef0e1c891..cdcc06d53e2 100644 --- a/src/glsl/link_atomics.cpp +++ b/src/glsl/link_atomics.cpp @@ -198,6 +198,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx, struct gl_shader_program *prog) { unsigned num_buffers; + unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {}; active_atomic_buffer *abs = find_active_atomic_counters(ctx, prog, &num_buffers); @@ -242,13 +243,49 @@ link_assign_atomic_counter_resources(struct gl_context *ctx, } /* Assign stage-specific fields. */ - for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) - mab.StageReferences[j] = - (ab.stage_references[j] ? GL_TRUE : GL_FALSE); + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + if (ab.stage_references[j]) { + mab.StageReferences[j] = GL_TRUE; + num_atomic_buffers[j]++; + } else { + mab.StageReferences[j] = GL_FALSE; + } + } i++; } + /* Store a list pointers to atomic buffers per stage and store the index + * to the intra-stage buffer list in uniform storage. + */ + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) { + prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j]; + prog->_LinkedShaders[j]->AtomicBuffers = + rzalloc_array(prog, gl_active_atomic_buffer *, + num_atomic_buffers[j]); + + unsigned intra_stage_idx = 0; + for (unsigned i = 0; i < num_buffers; i++) { + struct gl_active_atomic_buffer *atomic_buffer = + &prog->AtomicBuffers[i]; + if (atomic_buffer->StageReferences[j]) { + prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] = + atomic_buffer; + + for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) { + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index = + intra_stage_idx; + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active = + true; + } + + intra_stage_idx++; + } + } + } + } + delete [] abs; assert(i == num_buffers); } diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp index 5285d8d01e4..d5d30bb0a0d 100644 --- a/src/glsl/link_uniform_blocks.cpp +++ b/src/glsl/link_uniform_blocks.cpp @@ -100,7 +100,7 @@ private: virtual void visit_field(const glsl_type *type, const char *name, bool row_major, const glsl_type *, const unsigned packing, - bool /* last_field */) + bool last_field) { assert(this->index < this->num_variables); @@ -131,12 +131,28 @@ private: unsigned alignment = 0; unsigned size = 0; + /* From ARB_program_interface_query: + * + * "If the final member of an active shader storage block is array + * with no declared size, the minimum buffer size is computed + * assuming the array was declared as an array with one element." + * + * For that reason, we use the base type of the unsized array to calculate + * its size. We don't need to check if the unsized array is the last member + * of a shader storage block (that check was already done by the parser). + */ + const glsl_type *type_for_size = type; + if (type->is_unsized_array()) { + assert(last_field); + type_for_size = type->without_array(); + } + if (packing == GLSL_INTERFACE_PACKING_STD430) { alignment = type->std430_base_alignment(v->RowMajor); - size = type->std430_size(v->RowMajor); + size = type_for_size->std430_size(v->RowMajor); } else { alignment = type->std140_base_alignment(v->RowMajor); - size = type->std140_size(v->RowMajor); + size = type_for_size->std140_size(v->RowMajor); } this->offset = glsl_align(this->offset, alignment); diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index 8183e65d2f5..47bb7717f84 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -1010,38 +1010,37 @@ link_update_uniform_buffer_variables(struct gl_shader *shader) } } -/** - * Scan the program for image uniforms and store image unit access - * information into the gl_shader data structure. - */ static void -link_set_image_access_qualifiers(struct gl_shader_program *prog) +link_set_image_access_qualifiers(struct gl_shader_program *prog, + gl_shader *sh, unsigned shader_stage, + ir_variable *var, const glsl_type *type, + char **name, size_t name_length) { - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - gl_shader *sh = prog->_LinkedShaders[i]; - - if (sh == NULL) - continue; + /* Handle arrays of arrays */ + if (type->is_array() && type->fields.array->is_array()) { + for (unsigned i = 0; i < type->length; i++) { + size_t new_length = name_length; - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *var = node->as_variable(); + /* Append the subscript to the current variable name */ + ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); - if (var && var->data.mode == ir_var_uniform && - var->type->contains_image()) { - unsigned id = 0; - bool found = prog->UniformHash->get(id, var->name); - assert(found); - (void) found; - const gl_uniform_storage *storage = &prog->UniformStorage[id]; - const unsigned index = storage->opaque[i].index; - const GLenum access = (var->data.image_read_only ? GL_READ_ONLY : - var->data.image_write_only ? GL_WRITE_ONLY : - GL_READ_WRITE); - - for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j) - sh->ImageAccess[index + j] = access; - } + link_set_image_access_qualifiers(prog, sh, shader_stage, var, + type->fields.array, name, + new_length); } + } else { + unsigned id = 0; + bool found = prog->UniformHash->get(id, *name); + assert(found); + (void) found; + const gl_uniform_storage *storage = &prog->UniformStorage[id]; + const unsigned index = storage->opaque[shader_stage].index; + const GLenum access = (var->data.image_read_only ? GL_READ_ONLY : + var->data.image_write_only ? GL_WRITE_ONLY : + GL_READ_WRITE); + + for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j) + sh->ImageAccess[index + j] = access; } } @@ -1305,7 +1304,29 @@ link_assign_uniform_locations(struct gl_shader_program *prog, prog->NumHiddenUniforms = hidden_uniforms; prog->UniformStorage = uniforms; - link_set_image_access_qualifiers(prog); + /** + * Scan the program for image uniforms and store image unit access + * information into the gl_shader data structure. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh == NULL) + continue; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + + if (var && var->data.mode == ir_var_uniform && + var->type->contains_image()) { + char *name_copy = ralloc_strdup(NULL, var->name); + link_set_image_access_qualifiers(prog, sh, i, var, var->type, + &name_copy, strlen(var->name)); + ralloc_free(name_copy); + } + } + } + link_set_uniform_initializers(prog, boolean_true); return; diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 07ea0e0c7e5..c35d87acea6 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2282,6 +2282,22 @@ resize_tes_inputs(struct gl_context *ctx, foreach_in_list(ir_instruction, ir, tes->ir) { ir->accept(&input_resize_visitor); } + + if (tcs) { + /* Convert the gl_PatchVerticesIn system value into a constant, since + * the value is known at this point. + */ + foreach_in_list(ir_instruction, ir, tes->ir) { + ir_variable *var = ir->as_variable(); + if (var && var->data.mode == ir_var_system_value && + var->data.location == SYSTEM_VALUE_VERTICES_IN) { + void *mem_ctx = ralloc_parent(var); + var->data.mode = ir_var_auto; + var->data.location = 0; + var->constant_value = new(mem_ctx) ir_constant(num_vertices); + } + } + } } /** @@ -3137,7 +3153,8 @@ should_add_buffer_variable(struct gl_shader_program *shProg, GLenum type, const char *name) { bool found_interface = false; - const char *block_name = NULL; + unsigned block_name_len = 0; + const char *block_name_dot = strchr(name, '.'); /* These rules only apply to buffer variables. So we return * true for the rest of types. @@ -3146,8 +3163,28 @@ should_add_buffer_variable(struct gl_shader_program *shProg, return true; for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - block_name = shProg->BufferInterfaceBlocks[i].Name; - if (strncmp(block_name, name, strlen(block_name)) == 0) { + const char *block_name = shProg->BufferInterfaceBlocks[i].Name; + block_name_len = strlen(block_name); + + const char *block_square_bracket = strchr(block_name, '['); + if (block_square_bracket) { + /* The block is part of an array of named interfaces, + * for the name comparison we ignore the "[x]" part. + */ + block_name_len -= strlen(block_square_bracket); + } + + if (block_name_dot) { + /* Check if the variable name starts with the interface + * name. The interface name (if present) should have the + * length than the interface block name we are comparing to. + */ + unsigned len = strlen(name) - strlen(block_name_dot); + if (len != block_name_len) + continue; + } + + if (strncmp(block_name, name, block_name_len) == 0) { found_interface = true; break; } @@ -3157,7 +3194,7 @@ should_add_buffer_variable(struct gl_shader_program *shProg, * including the dot that follows it. */ if (found_interface) - name = name + strlen(block_name) + 1; + name = name + block_name_len + 1; /* From: ARB_program_interface_query extension: * @@ -3166,14 +3203,14 @@ should_add_buffer_variable(struct gl_shader_program *shProg, * of its type. For arrays of aggregate types, the enumeration rules are * applied recursively for the single enumerated array element. */ - const char *first_dot = strchr(name, '.'); + const char *struct_first_dot = strchr(name, '.'); const char *first_square_bracket = strchr(name, '['); /* The buffer variable is on top level and it is not an array */ if (!first_square_bracket) { return true; /* The shader storage block member is a struct, then generate the entry */ - } else if (first_dot && first_dot < first_square_bracket) { + } else if (struct_first_dot && struct_first_dot < first_square_bracket) { return true; } else { /* Shader storage block member is an array, only generate an entry for the @@ -3349,6 +3386,12 @@ add_interface_variables(struct gl_shader_program *shProg, if (strncmp(var->name, "packed:", 7) == 0) continue; + /* Skip fragdata arrays, these are handled separately + * by add_fragdata_arrays. + */ + if (strncmp(var->name, "gl_out_FragData", 15) == 0) + continue; + if (!add_program_resource(shProg, programInterface, var, build_stageref(shProg, var->name, var->data.mode) | mask)) @@ -3388,6 +3431,26 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage) return true; } +static bool +add_fragdata_arrays(struct gl_shader_program *shProg) +{ + struct gl_shader *sh = shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + if (!sh || !sh->fragdata_arrays) + return true; + + foreach_in_list(ir_instruction, node, sh->fragdata_arrays) { + ir_variable *var = node->as_variable(); + if (var) { + assert(var->data.mode == ir_var_shader_out); + if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, var, + 1 << MESA_SHADER_FRAGMENT)) + return false; + } + } + return true; +} + static char* get_top_level_name(const char *name) { @@ -3467,80 +3530,78 @@ is_top_level_shader_storage_block_member(const char* name, return result; } -static void -calculate_array_size(struct gl_shader_program *shProg, - struct gl_uniform_storage *uni) +static int +get_array_size(struct gl_uniform_storage *uni, const glsl_struct_field *field, + char *interface_name, char *var_name) { - int block_index = uni->block_index; - int array_size = -1; - char *var_name = get_top_level_name(uni->name); - char *interface_name = - get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name); - - if (strcmp(var_name, interface_name) == 0) { - /* Deal with instanced array of SSBOs */ - char *temp_name = get_var_name(uni->name); - free(var_name); - var_name = get_top_level_name(temp_name); - free(temp_name); - } - - for (unsigned i = 0; i < shProg->NumShaders; i++) { - if (shProg->Shaders[i] == NULL) - continue; - - const gl_shader *stage = shProg->Shaders[i]; - foreach_in_list(ir_instruction, node, stage->ir) { - ir_variable *var = node->as_variable(); - if (!var || !var->get_interface_type() || - var->data.mode != ir_var_shader_storage) - continue; - - const glsl_type *interface = var->get_interface_type(); - - if (strcmp(interface_name, interface->name) != 0) - continue; - - for (unsigned i = 0; i < interface->length; i++) { - const glsl_struct_field *field = &interface->fields.structure[i]; - if (strcmp(field->name, var_name) != 0) - continue; - /* From GL_ARB_program_interface_query spec: - * - * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer - * identifying the number of active array elements of the top-level - * shader storage block member containing to the active variable is - * written to <params>. If the top-level block member is not - * declared as an array, the value one is written to <params>. If - * the top-level block member is an array with no declared size, - * the value zero is written to <params>. - */ - if (is_top_level_shader_storage_block_member(uni->name, - interface_name, - var_name)) - array_size = 1; - else if (field->type->is_unsized_array()) - array_size = 0; - else if (field->type->is_array()) - array_size = field->type->length; - else - array_size = 1; + /* From GL_ARB_program_interface_query spec: + * + * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer + * identifying the number of active array elements of the top-level + * shader storage block member containing to the active variable is + * written to <params>. If the top-level block member is not + * declared as an array, the value one is written to <params>. If + * the top-level block member is an array with no declared size, + * the value zero is written to <params>. + */ + if (is_top_level_shader_storage_block_member(uni->name, + interface_name, + var_name)) + return 1; + else if (field->type->is_unsized_array()) + return 0; + else if (field->type->is_array()) + return field->type->length; + + return 1; +} - goto found_top_level_array_size; - } +static int +get_array_stride(struct gl_uniform_storage *uni, const glsl_type *interface, + const glsl_struct_field *field, char *interface_name, + char *var_name) +{ + /* From GL_ARB_program_interface_query: + * + * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer + * identifying the stride between array elements of the top-level + * shader storage block member containing the active variable is + * written to <params>. For top-level block members declared as + * arrays, the value written is the difference, in basic machine + * units, between the offsets of the active variable for + * consecutive elements in the top-level array. For top-level + * block members not declared as an array, zero is written to + * <params>." + */ + if (field->type->is_array()) { + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(field->matrix_layout); + bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; + const glsl_type *array_type = field->type->fields.array; + + if (is_top_level_shader_storage_block_member(uni->name, + interface_name, + var_name)) + return 0; + + if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) { + if (array_type->is_record() || array_type->is_array()) + return glsl_align(array_type->std140_size(row_major), 16); + else + return MAX2(array_type->std140_base_alignment(row_major), 16); + } else { + return array_type->std430_array_stride(row_major); } } -found_top_level_array_size: - free(interface_name); - free(var_name); - uni->top_level_array_size = array_size; + return 0; } static void -calculate_array_stride(struct gl_shader_program *shProg, - struct gl_uniform_storage *uni) +calculate_array_size_and_stride(struct gl_shader_program *shProg, + struct gl_uniform_storage *uni) { int block_index = uni->block_index; + int array_size = -1; int array_stride = -1; char *var_name = get_top_level_name(uni->name); char *interface_name = @@ -3549,9 +3610,17 @@ calculate_array_stride(struct gl_shader_program *shProg, if (strcmp(var_name, interface_name) == 0) { /* Deal with instanced array of SSBOs */ char *temp_name = get_var_name(uni->name); + if (!temp_name) { + linker_error(shProg, "Out of memory during linking.\n"); + goto write_top_level_array_size_and_stride; + } free(var_name); var_name = get_top_level_name(temp_name); free(temp_name); + if (!var_name) { + linker_error(shProg, "Out of memory during linking.\n"); + goto write_top_level_array_size_and_stride; + } } for (unsigned i = 0; i < shProg->NumShaders; i++) { @@ -3567,61 +3636,26 @@ calculate_array_stride(struct gl_shader_program *shProg, const glsl_type *interface = var->get_interface_type(); - if (strcmp(interface_name, interface->name) != 0) { + if (strcmp(interface_name, interface->name) != 0) continue; - } for (unsigned i = 0; i < interface->length; i++) { const glsl_struct_field *field = &interface->fields.structure[i]; if (strcmp(field->name, var_name) != 0) continue; - /* From GL_ARB_program_interface_query: - * - * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer - * identifying the stride between array elements of the top-level - * shader storage block member containing the active variable is - * written to <params>. For top-level block members declared as - * arrays, the value written is the difference, in basic machine - * units, between the offsets of the active variable for - * consecutive elements in the top-level array. For top-level - * block members not declared as an array, zero is written to - * <params>." - */ - if (field->type->is_array()) { - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(field->matrix_layout); - bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; - const glsl_type *array_type = field->type->fields.array; - - if (is_top_level_shader_storage_block_member(uni->name, - interface_name, - var_name)) { - array_stride = 0; - goto found_top_level_array_stride; - } - if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) { - if (array_type->is_record() || array_type->is_array()) { - array_stride = array_type->std140_size(row_major); - array_stride = glsl_align(array_stride, 16); - } else { - unsigned element_base_align = 0; - element_base_align = array_type->std140_base_alignment(row_major); - array_stride = MAX2(element_base_align, 16); - } - } else { - array_stride = array_type->std430_array_stride(row_major); - } - } else { - array_stride = 0; - } - goto found_top_level_array_stride; + + array_stride = get_array_stride(uni, interface, field, + interface_name, var_name); + array_size = get_array_size(uni, field, interface_name, var_name); + goto write_top_level_array_size_and_stride; } } } -found_top_level_array_stride: +write_top_level_array_size_and_stride: free(interface_name); free(var_name); uni->top_level_array_stride = array_stride; + uni->top_level_array_size = array_size; } /** @@ -3664,6 +3698,9 @@ build_program_resource_list(struct gl_shader_program *shProg) return; } + if (!add_fragdata_arrays(shProg)) + return; + /* Add inputs and outputs to the resource list. */ if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir, GL_PROGRAM_INPUT)) @@ -3709,8 +3746,7 @@ build_program_resource_list(struct gl_shader_program *shProg) continue; if (is_shader_storage) { - calculate_array_size(shProg, &shProg->UniformStorage[i]); - calculate_array_stride(shProg, &shProg->UniformStorage[i]); + calculate_array_size_and_stride(shProg, &shProg->UniformStorage[i]); } if (!add_program_resource(shProg, type, diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 129dd02781b..ba14bbbeb6a 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -306,6 +306,7 @@ nir_visitor::visit(ir_variable *ir) var->data.read_only = ir->data.read_only; var->data.centroid = ir->data.centroid; var->data.sample = ir->data.sample; + var->data.patch = ir->data.patch; var->data.invariant = ir->data.invariant; var->data.location = ir->data.location; @@ -396,8 +397,6 @@ nir_visitor::visit(ir_variable *ir) var->data.index = ir->data.index; var->data.descriptor_set = 0; var->data.binding = ir->data.binding; - /* XXX Get rid of buffer_index */ - var->data.atomic.buffer_index = ir->data.binding; var->data.atomic.offset = ir->data.atomic.offset; var->data.image.read_only = ir->data.image_read_only; var->data.image.write_only = ir->data.image_write_only; @@ -722,6 +721,8 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_ssbo_atomic_exchange; } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) { op = nir_intrinsic_ssbo_atomic_comp_swap; + } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) { + op = nir_intrinsic_shader_clock; } else { unreachable("not reached"); } @@ -826,6 +827,10 @@ nir_visitor::visit(ir_call *ir) case nir_intrinsic_memory_barrier: nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); break; + case nir_intrinsic_shader_clock: + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); + break; case nir_intrinsic_store_ssbo: { exec_node *param = ir->actual_parameters.get_head(); ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 793bdafb54b..5f03095d673 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -1557,12 +1557,14 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_num_work_groups; case SYSTEM_VALUE_PRIMITIVE_ID: return nir_intrinsic_load_primitive_id; - /* FINISHME: Add tessellation intrinsics. case SYSTEM_VALUE_TESS_COORD: - case SYSTEM_VALUE_VERTICES_IN: + return nir_intrinsic_load_tess_coord; case SYSTEM_VALUE_TESS_LEVEL_OUTER: + return nir_intrinsic_load_tess_level_outer; case SYSTEM_VALUE_TESS_LEVEL_INNER: - */ + return nir_intrinsic_load_tess_level_inner; + case SYSTEM_VALUE_VERTICES_IN: + return nir_intrinsic_load_patch_vertices_in; default: unreachable("system value does not directly correspond to intrinsic"); } @@ -1598,13 +1600,14 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_WORK_GROUP_ID; case nir_intrinsic_load_primitive_id: return SYSTEM_VALUE_PRIMITIVE_ID; - /* FINISHME: Add tessellation intrinsics. + case nir_intrinsic_load_tess_coord: return SYSTEM_VALUE_TESS_COORD; - return SYSTEM_VALUE_VERTICES_IN; - return SYSTEM_VALUE_PRIMITIVE_ID; + case nir_intrinsic_load_tess_level_outer: return SYSTEM_VALUE_TESS_LEVEL_OUTER; + case nir_intrinsic_load_tess_level_inner: return SYSTEM_VALUE_TESS_LEVEL_INNER; - */ + case nir_intrinsic_load_patch_vertices_in: + return SYSTEM_VALUE_VERTICES_IN; default: unreachable("intrinsic doesn't produce a system value"); } diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 229d534bf3d..9b278d6a767 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -171,6 +171,7 @@ typedef struct { unsigned read_only:1; unsigned centroid:1; unsigned sample:1; + unsigned patch:1; unsigned invariant:1; /** @@ -313,7 +314,6 @@ typedef struct { * Location an atomic counter is stored at. */ struct { - unsigned buffer_index; unsigned offset; } atomic; @@ -2016,7 +2016,8 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); void nir_lower_two_sided_color(nir_shader *shader); -void nir_lower_atomics(nir_shader *shader); +void nir_lower_atomics(nir_shader *shader, + const struct gl_shader_program *shader_program); void nir_lower_to_source_mods(nir_shader *shader); bool nir_lower_gs_intrinsics(nir_shader *shader); diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index b2ceff566cf..9fd91de157f 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -83,6 +83,14 @@ BARRIER(discard) */ BARRIER(memory_barrier) +/* + * Shader clock intrinsic with semantics analogous to the clock2x32ARB() + * GLSL intrinsic. + * The latter can be used as code motion barrier, which is currently not + * feasible with NIR. + */ +INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE) + /** A conditional discard, with a single boolean source. */ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) @@ -217,6 +225,10 @@ SYSTEM_VALUE(sample_pos, 2, 0) SYSTEM_VALUE(sample_mask_in, 1, 0) SYSTEM_VALUE(primitive_id, 1, 0) SYSTEM_VALUE(invocation_id, 1, 0) +SYSTEM_VALUE(tess_coord, 3, 0) +SYSTEM_VALUE(tess_level_outer, 4, 0) +SYSTEM_VALUE(tess_level_inner, 2, 0) +SYSTEM_VALUE(patch_vertices_in, 1, 0) SYSTEM_VALUE(local_invocation_id, 3, 0) SYSTEM_VALUE(work_group_id, 3, 0) SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */ diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c index 46e137652a1..40ca3de96cf 100644 --- a/src/glsl/nir/nir_lower_atomics.c +++ b/src/glsl/nir/nir_lower_atomics.c @@ -25,17 +25,24 @@ * */ +#include "ir_uniform.h" #include "nir.h" #include "main/config.h" #include <assert.h> +typedef struct { + const struct gl_shader_program *shader_program; + nir_shader *shader; +} lower_atomic_state; + /* * replace atomic counter intrinsics that use a variable with intrinsics * that directly store the buffer index and byte offset */ static void -lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) +lower_instr(nir_intrinsic_instr *instr, + lower_atomic_state *state) { nir_intrinsic_op op; switch (instr->intrinsic) { @@ -60,10 +67,11 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) return; /* atomics passed as function arguments can't be lowered */ void *mem_ctx = ralloc_parent(instr); + unsigned uniform_loc = instr->variables[0]->var->data.location; nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); new_instr->const_index[0] = - (int) instr->variables[0]->var->data.atomic.buffer_index; + state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index; nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset; @@ -132,18 +140,25 @@ lower_block(nir_block *block, void *state) { nir_foreach_instr_safe(block, instr) { if (instr->type == nir_instr_type_intrinsic) - lower_instr(nir_instr_as_intrinsic(instr), state); + lower_instr(nir_instr_as_intrinsic(instr), + (lower_atomic_state *) state); } return true; } void -nir_lower_atomics(nir_shader *shader) +nir_lower_atomics(nir_shader *shader, + const struct gl_shader_program *shader_program) { + lower_atomic_state state = { + .shader = shader, + .shader_program = shader_program, + }; + nir_foreach_overload(shader, overload) { if (overload->impl) { - nir_foreach_block(overload->impl, lower_block, overload->impl); + nir_foreach_block(overload->impl, lower_block, (void *) &state); nir_metadata_preserve(overload->impl, nir_metadata_block_index | nir_metadata_dominance); } diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index f2d584fe484..3c0f1da94af 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -468,6 +468,51 @@ binop("fmax", tfloat, "", "fmaxf(src0, src1)") binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0") +# Saturated vector add for 4 8bit ints. +binop("usadd_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; +} +""") + +# Saturated vector subtract for 4 8bit ints. +binop("ussub_4x8", tint, "", """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + if (src0_chan > src1_chan) + dst |= (src0_chan - src1_chan) << i; +} +""") + +# vector min for 4 8bit ints. +binop("umin_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; +} +""") + +# vector max for 4 8bit ints. +binop("umax_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; +} +""") + +# unorm multiply: (a * b) / 255. +binop("umul_unorm_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + dst |= ((src0_chan * src1_chan) / 255) << i; +} +""") + binop("fpow", tfloat, "", "powf(src0, src1)") binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat, diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index cafbd6d66a5..30ede52b146 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -56,12 +56,16 @@ optimizations = [ (('iabs', ('ineg', a)), ('iabs', a)), (('fadd', a, 0.0), a), (('iadd', a, 0), a), + (('usadd_4x8', a, 0), a), + (('usadd_4x8', a, ~0), ~0), (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))), (('fadd', ('fneg', a), a), 0.0), (('iadd', ('ineg', a), a), 0), (('fmul', a, 0.0), 0.0), (('imul', a, 0), 0), + (('umul_unorm_4x8', a, 0), 0), + (('umul_unorm_4x8', a, ~0), a), (('fmul', a, 1.0), a), (('imul', a, 1), a), (('fmul', a, -1.0), ('fneg', a)), @@ -202,6 +206,8 @@ optimizations = [ # Subtracts (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), (('isub', a, ('isub', 0, b)), ('iadd', a, b)), + (('ussub_4x8', a, 0), a), + (('ussub_4x8', a, ~0), 0), (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'), (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'), (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 09663996869..30220c5e48d 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -228,12 +228,13 @@ print_var_decl(nir_variable *var, print_state *state) const char *const cent = (var->data.centroid) ? "centroid " : ""; const char *const samp = (var->data.sample) ? "sample " : ""; + const char *const patch = (var->data.patch) ? "patch " : ""; const char *const inv = (var->data.invariant) ? "invariant " : ""; const char *const mode[] = { "shader_in ", "shader_out ", "", "", "uniform ", "shader_storage", "system " }; - fprintf(fp, "%s%s%s%s%s ", - cent, samp, inv, mode[var->data.mode], + fprintf(fp, "%s%s%s%s%s%s ", + cent, samp, patch, inv, mode[var->data.mode], glsl_interp_qualifier_name(var->data.interpolation)); glsl_print_type(var->type, fp); diff --git a/src/glsl/opt_dead_builtin_varyings.cpp b/src/glsl/opt_dead_builtin_varyings.cpp index 31719d20c05..68b70eedf92 100644 --- a/src/glsl/opt_dead_builtin_varyings.cpp +++ b/src/glsl/opt_dead_builtin_varyings.cpp @@ -269,14 +269,14 @@ public: */ class replace_varyings_visitor : public ir_rvalue_visitor { public: - replace_varyings_visitor(exec_list *ir, + replace_varyings_visitor(struct gl_shader *sha, const varying_info_visitor *info, unsigned external_texcoord_usage, unsigned external_color_usage, bool external_has_fog) - : info(info), new_fog(NULL) + : shader(sha), info(info), new_fog(NULL) { - void *const ctx = ir; + void *const ctx = shader->ir; memset(this->new_fragdata, 0, sizeof(this->new_fragdata)); memset(this->new_texcoord, 0, sizeof(this->new_texcoord)); @@ -293,14 +293,16 @@ public: * occurrences of gl_TexCoord will be replaced with. */ if (info->lower_texcoord_array) { - prepare_array(ir, this->new_texcoord, ARRAY_SIZE(this->new_texcoord), + prepare_array(shader->ir, this->new_texcoord, + ARRAY_SIZE(this->new_texcoord), VARYING_SLOT_TEX0, "TexCoord", mode_str, info->texcoord_usage, external_texcoord_usage); } /* Handle gl_FragData in the same way like gl_TexCoord. */ if (info->lower_fragdata_array) { - prepare_array(ir, this->new_fragdata, ARRAY_SIZE(this->new_fragdata), + prepare_array(shader->ir, this->new_fragdata, + ARRAY_SIZE(this->new_fragdata), FRAG_RESULT_DATA0, "FragData", mode_str, info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) - 1); } @@ -340,7 +342,7 @@ public: } /* Now do the replacing. */ - visit_list_elements(this, ir); + visit_list_elements(this, shader->ir); } void prepare_array(exec_list *ir, @@ -389,6 +391,13 @@ public: /* Remove the gl_FragData array. */ if (this->info->lower_fragdata_array && var == this->info->fragdata_array) { + + /* Clone variable for program resource list before it is removed. */ + if (!shader->fragdata_arrays) + shader->fragdata_arrays = new (shader) exec_list; + + shader->fragdata_arrays->push_tail(var->clone(shader, NULL)); + var->remove(); } @@ -487,6 +496,7 @@ public: } private: + struct gl_shader *shader; const varying_info_visitor *info; ir_variable *new_fragdata[MAX_DRAW_BUFFERS]; ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS]; @@ -498,20 +508,20 @@ private: } /* anonymous namespace */ static void -lower_texcoord_array(exec_list *ir, const varying_info_visitor *info) +lower_texcoord_array(struct gl_shader *shader, const varying_info_visitor *info) { - replace_varyings_visitor(ir, info, + replace_varyings_visitor(shader, info, (1 << MAX_TEXTURE_COORD_UNITS) - 1, 1 | 2, true); } static void -lower_fragdata_array(exec_list *ir) +lower_fragdata_array(struct gl_shader *shader) { varying_info_visitor info(ir_var_shader_out, true); - info.get(ir, 0, NULL); + info.get(shader->ir, 0, NULL); - replace_varyings_visitor(ir, &info, 0, 0, 0); + replace_varyings_visitor(shader, &info, 0, 0, 0); } @@ -523,7 +533,7 @@ do_dead_builtin_varyings(struct gl_context *ctx, { /* Lower the gl_FragData array to separate variables. */ if (consumer && consumer->Stage == MESA_SHADER_FRAGMENT) { - lower_fragdata_array(consumer->ir); + lower_fragdata_array(consumer); } /* Lowering of built-in varyings has no effect with the core context and @@ -544,7 +554,7 @@ do_dead_builtin_varyings(struct gl_context *ctx, if (!consumer) { /* At least eliminate unused gl_TexCoord elements. */ if (producer_info.lower_texcoord_array) { - lower_texcoord_array(producer->ir, &producer_info); + lower_texcoord_array(producer, &producer_info); } return; } @@ -556,7 +566,7 @@ do_dead_builtin_varyings(struct gl_context *ctx, if (!producer) { /* At least eliminate unused gl_TexCoord elements. */ if (consumer_info.lower_texcoord_array) { - lower_texcoord_array(consumer->ir, &consumer_info); + lower_texcoord_array(consumer, &consumer_info); } return; } @@ -566,7 +576,7 @@ do_dead_builtin_varyings(struct gl_context *ctx, if (producer_info.lower_texcoord_array || producer_info.color_usage || producer_info.has_fog) { - replace_varyings_visitor(producer->ir, + replace_varyings_visitor(producer, &producer_info, consumer_info.texcoord_usage, consumer_info.color_usage, @@ -587,7 +597,7 @@ do_dead_builtin_varyings(struct gl_context *ctx, if (consumer_info.lower_texcoord_array || consumer_info.color_usage || consumer_info.has_fog) { - replace_varyings_visitor(consumer->ir, + replace_varyings_visitor(consumer, &consumer_info, producer_info.texcoord_usage, producer_info.color_usage, diff --git a/src/glx/drisw_glx.c b/src/glx/drisw_glx.c index 749ceb08aac..76cc3214b7b 100644 --- a/src/glx/drisw_glx.c +++ b/src/glx/drisw_glx.c @@ -177,9 +177,9 @@ swrastPutImage(__DRIdrawable * draw, int op, } static void -swrastGetImage(__DRIdrawable * read, - int x, int y, int w, int h, - char *data, void *loaderPrivate) +swrastGetImage2(__DRIdrawable * read, + int x, int y, int w, int h, int stride, + char *data, void *loaderPrivate) { struct drisw_drawable *prp = loaderPrivate; __GLXDRIdrawable *pread = &(prp->base); @@ -193,20 +193,29 @@ swrastGetImage(__DRIdrawable * read, ximage->data = data; ximage->width = w; ximage->height = h; - ximage->bytes_per_line = bytes_per_line(w * ximage->bits_per_pixel, 32); + ximage->bytes_per_line = stride ? stride : bytes_per_line(w * ximage->bits_per_pixel, 32); XGetSubImage(dpy, readable, x, y, w, h, ~0L, ZPixmap, ximage, 0, 0); ximage->data = NULL; } +static void +swrastGetImage(__DRIdrawable * read, + int x, int y, int w, int h, + char *data, void *loaderPrivate) +{ + swrastGetImage2(read, x, y, w, h, 0, data, loaderPrivate); +} + static const __DRIswrastLoaderExtension swrastLoaderExtension = { - .base = {__DRI_SWRAST_LOADER, 2 }, + .base = {__DRI_SWRAST_LOADER, 3 }, .getDrawableInfo = swrastGetDrawableInfo, .putImage = swrastPutImage, .getImage = swrastGetImage, .putImage2 = swrastPutImage2, + .getImage2 = swrastGetImage2, }; static const __DRIextension *loader_extensions[] = { diff --git a/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml b/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml index 120bda13dd8..72aa62c7751 100644 --- a/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml +++ b/src/mapi/glapi/gen/ARB_draw_elements_base_vertex.xml @@ -8,7 +8,7 @@ <category name="GL_ARB_draw_elements_base_vertex" number="62"> - <function name="DrawElementsBaseVertex" exec="dynamic"> + <function name="DrawElementsBaseVertex" es2="3.2" exec="dynamic"> <param name="mode" type="GLenum"/> <param name="count" type="GLsizei"/> <param name="type" type="GLenum"/> @@ -16,7 +16,7 @@ <param name="basevertex" type="GLint"/> </function> - <function name="DrawRangeElementsBaseVertex" exec="dynamic"> + <function name="DrawRangeElementsBaseVertex" es2="3.2" exec="dynamic"> <param name="mode" type="GLenum"/> <param name="start" type="GLuint"/> <param name="end" type="GLuint"/> @@ -35,7 +35,7 @@ <param name="basevertex" type="const GLint *"/> </function> - <function name="DrawElementsInstancedBaseVertex" exec="dynamic"> + <function name="DrawElementsInstancedBaseVertex" es2="3.2" exec="dynamic"> <param name="mode" type="GLenum"/> <param name="count" type="GLsizei"/> <param name="type" type="GLenum"/> diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index cfca5a980bb..bf20e4801cc 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -817,4 +817,92 @@ </function> </category> +<category name="GL_EXT_draw_elements_base_vertex" number="204"> + + <function name="DrawElementsBaseVertexEXT" alias="DrawElementsBaseVertex" + es2="2.0" exec="dynamic"> + <param name="mode" type="GLenum"/> + <param name="count" type="GLsizei"/> + <param name="type" type="GLenum"/> + <param name="indices" type="const GLvoid *"/> + <param name="basevertex" type="GLint"/> + </function> + + <function name="DrawRangeElementsBaseVertexEXT" alias="DrawRangeElementsBaseVertex" + es2="3.0" exec="dynamic"> + <param name="mode" type="GLenum"/> + <param name="start" type="GLuint"/> + <param name="end" type="GLuint"/> + <param name="count" type="GLsizei"/> + <param name="type" type="GLenum"/> + <param name="indices" type="const GLvoid *"/> + <param name="basevertex" type="GLint"/> + </function> + + <function name="MultiDrawElementsBaseVertexEXT" alias="MultiDrawElementsBaseVertex" + es2="2.0" exec="dynamic"> + <param name="mode" type="GLenum"/> + <param name="count" type="const GLsizei *"/> + <param name="type" type="GLenum"/> + <param name="indices" type="const GLvoid * const *"/> + <param name="primcount" type="GLsizei"/> + <param name="basevertex" type="const GLint *"/> + </function> + + <function name="DrawElementsInstancedBaseVertexEXT" alias="DrawElementsInstancedBaseVertex" + es2="3.0" exec="dynamic"> + <param name="mode" type="GLenum"/> + <param name="count" type="GLsizei"/> + <param name="type" type="GLenum"/> + <param name="indices" type="const GLvoid *"/> + <param name="primcount" type="GLsizei"/> + <param name="basevertex" type="GLint"/> + </function> + +</category> + +<category name="GL_OES_draw_elements_base_vertex" number="219"> + + <function name="DrawElementsBaseVertexOES" alias="DrawElementsBaseVertex" + es2="2.0" exec="dynamic"> + <param name="mode" type="GLenum"/> + <param name="count" type="GLsizei"/> + <param name="type" type="GLenum"/> + <param name="indices" type="const GLvoid *"/> + <param name="basevertex" type="GLint"/> + </function> + + <function name="DrawRangeElementsBaseVertexOES" alias="DrawRangeElementsBaseVertex" + es2="3.0" exec="dynamic"> + <param name="mode" type="GLenum"/> + <param name="start" type="GLuint"/> + <param name="end" type="GLuint"/> + <param name="count" type="GLsizei"/> + <param name="type" type="GLenum"/> + <param name="indices" type="const GLvoid *"/> + <param name="basevertex" type="GLint"/> + </function> + + <function name="MultiDrawElementsBaseVertexOES" alias="MultiDrawElementsBaseVertex" + es2="2.0" exec="dynamic"> + <param name="mode" type="GLenum"/> + <param name="count" type="const GLsizei *"/> + <param name="type" type="GLenum"/> + <param name="indices" type="const GLvoid * const *"/> + <param name="primcount" type="GLsizei"/> + <param name="basevertex" type="const GLint *"/> + </function> + + <function name="DrawElementsInstancedBaseVertexOES" alias="DrawElementsInstancedBaseVertex" + es2="3.0" exec="dynamic"> + <param name="mode" type="GLenum"/> + <param name="count" type="GLsizei"/> + <param name="type" type="GLenum"/> + <param name="indices" type="const GLvoid *"/> + <param name="primcount" type="GLsizei"/> + <param name="basevertex" type="GLint"/> + </function> + +</category> + </OpenGLAPI> diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 34fb4461985..de0e330b7d1 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -345,7 +345,6 @@ TNL_FILES = \ tnl/tnl.h \ tnl/t_pipeline.c \ tnl/t_pipeline.h \ - tnl/t_rasterpos.c \ tnl/t_vb_cliptmp.h \ tnl/t_vb_fog.c \ tnl/t_vb_light.c \ @@ -424,6 +423,8 @@ STATETRACKER_FILES = \ state_tracker/st_cb_clear.h \ state_tracker/st_cb_condrender.c \ state_tracker/st_cb_condrender.h \ + state_tracker/st_cb_copyimage.c \ + state_tracker/st_cb_copyimage.h \ state_tracker/st_cb_drawpixels.c \ state_tracker/st_cb_drawpixels.h \ state_tracker/st_cb_drawpixels_shader.c \ diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 3d1fccb3ab4..752aaf6c006 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -33,6 +33,7 @@ #include "main/mipmap.h" #include "main/queryobj.h" #include "main/readpix.h" +#include "main/rastpos.h" #include "main/renderbuffer.h" #include "main/shaderobj.h" #include "main/texcompress.h" @@ -81,7 +82,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver) /* framebuffer/image functions */ driver->Clear = _swrast_Clear; - driver->RasterPos = _tnl_RasterPos; + driver->RasterPos = _mesa_RasterPos; driver->DrawPixels = _swrast_DrawPixels; driver->ReadPixels = _mesa_readpixels; driver->CopyPixels = _swrast_CopyPixels; diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 04b3f9cc8ce..9d003e48bd8 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -59,6 +59,7 @@ TESTS = \ test_fs_saturate_propagation \ test_eu_compact \ test_vf_float_conversions \ + test_vec4_cmod_propagation \ test_vec4_copy_propagation \ test_vec4_register_coalesce @@ -94,6 +95,12 @@ test_vec4_copy_propagation_LDADD = \ $(top_builddir)/src/gtest/libgtest.la \ $(TEST_LIBS) +test_vec4_cmod_propagation_SOURCES = \ + test_vec4_cmod_propagation.cpp +test_vec4_cmod_propagation_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(TEST_LIBS) + test_eu_compact_SOURCES = \ test_eu_compact.c nodist_EXTRA_test_eu_compact_SOURCES = dummy.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index ccd540dabca..ed2654ef329 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -58,6 +58,7 @@ i965_compiler_FILES = \ brw_util.c \ brw_util.h \ brw_vec4_builder.h \ + brw_vec4_cmod_propagation.cpp \ brw_vec4_copy_propagation.cpp \ brw_vec4.cpp \ brw_vec4_cse.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index 10bcd4bafd4..5d46615bc7b 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp @@ -528,7 +528,9 @@ cfg_t::dump_domtree() { printf("digraph DominanceTree {\n"); foreach_block(block, this) { - printf("\t%d -> %d\n", block->idom->num, block->num); + if (block->idom) { + printf("\t%d -> %d\n", block->idom->num, block->num); + } } printf("}\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index a06b0aa1cd0..69e39e8964d 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -90,6 +90,8 @@ struct bblock_t { struct exec_list parents; struct exec_list children; int num; + + unsigned cycle_count; }; static inline struct backend_instruction * @@ -285,6 +287,8 @@ struct cfg_t { int num_blocks; bool idom_dirty; + + unsigned cycle_count; }; /* Note that this is implemented with a double for loop -- break will diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index d9967143d8a..e5133ef5a3d 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -338,6 +338,7 @@ struct brw_wm_prog_data { } binding_table; uint8_t computed_depth_mode; + bool computed_stencil; bool early_fragment_tests; bool no_8; @@ -443,9 +444,7 @@ struct brw_vue_map { * directly correspond to a gl_varying_slot, the value comes from * brw_varying_slot. * - * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this - * simplifies code that uses the value stored in slot_to_varying to - * create a bit mask). + * For slots that are not in use, the value is BRW_VARYING_SLOT_PAD. */ signed char slot_to_varying[BRW_VARYING_SLOT_COUNT]; @@ -467,8 +466,8 @@ static inline GLuint brw_vue_slot_to_offset(GLuint slot) * Convert a vertex output (brw_varying_slot) into a byte offset within the * VUE. */ -static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map, - GLuint varying) +static inline +GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying) { return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]); } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4f503ae4869..c83f47bdff7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -501,8 +501,6 @@ struct brw_cache_item { }; -typedef void (*cache_aux_free_func)(const void *aux); - struct brw_cache { struct brw_context *brw; @@ -512,9 +510,6 @@ struct brw_cache { uint32_t next_offset; bool bo_used_by_gpu; - - /** Optional functions for freeing other pointers attached to a prog_data. */ - cache_aux_free_func aux_free[BRW_MAX_CACHE]; }; @@ -1177,7 +1172,7 @@ struct brw_context int num_atoms[BRW_NUM_PIPELINES]; const struct brw_tracked_state render_atoms[60]; - const struct brw_tracked_state compute_atoms[8]; + const struct brw_tracked_state compute_atoms[9]; /* If (INTEL_DEBUG & DEBUG_BATCH) */ struct { @@ -1463,7 +1458,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw, struct brw_stage_prog_data *prog_data, bool dword_pitch); void brw_upload_abo_surfaces(struct brw_context *brw, - struct gl_shader_program *prog, + struct gl_shader *shader, struct brw_stage_state *stage_state, struct brw_stage_prog_data *prog_data); void brw_upload_image_surfaces(struct brw_context *brw, @@ -1680,6 +1675,7 @@ struct opcode_desc { extern const struct opcode_desc opcode_descs[128]; extern const char * const conditional_modifier[16]; +extern const char *const pred_ctrl_align16[16]; void brw_emit_depthbuffer(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 169d092f90e..754da9fc3da 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -913,20 +913,15 @@ enum opcode { /** * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as - * individual sources instead of as a single payload blob: - * - * Source 0: [required] Color 0. - * Source 1: [optional] Color 1 (for dual source blend messages). - * Source 2: [optional] Src0 Alpha. - * Source 3: [optional] Source Depth (gl_FragDepth) - * Source 4: [optional (gen4-5)] Destination Depth passthrough from thread - * Source 5: [optional] Sample Mask (gl_SampleMask). - * Source 6: [required] Number of color components (as a UD immediate). + * individual sources instead of as a single payload blob. The + * position/ordering of the arguments are defined by the enum + * fb_write_logical_srcs. */ FS_OPCODE_FB_WRITE_LOGICAL, FS_OPCODE_BLORP_FB_WRITE, FS_OPCODE_REP_FB_WRITE, + FS_OPCODE_PACK_STENCIL_REF, SHADER_OPCODE_RCP, SHADER_OPCODE_RSQ, SHADER_OPCODE_SQRT, @@ -1332,6 +1327,17 @@ enum brw_urb_write_flags { BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE, }; +enum fb_write_logical_srcs { + FB_WRITE_LOGICAL_SRC_COLOR0, /* REQUIRED */ + FB_WRITE_LOGICAL_SRC_COLOR1, /* for dual source blend messages */ + FB_WRITE_LOGICAL_SRC_SRC0_ALPHA, + FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */ + FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GEN4-5: passthrough from thread */ + FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */ + FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */ + FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */ +}; + #ifdef __cplusplus /** * Allow brw_urb_write_flags enums to be ORed together. diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index 65172490da3..6372fb5c55f 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -311,7 +311,7 @@ static const struct brw_device_info brw_device_info_chv = { .max_gs_threads = 336, \ .max_hs_threads = 336, \ .max_ds_threads = 336, \ - .max_wm_threads = 64 * 6, \ + .max_wm_threads = 64 * 9, \ .max_cs_threads = 56, \ .urb = { \ .size = 384, \ @@ -335,6 +335,10 @@ static const struct brw_device_info brw_device_info_skl_gt3 = { GEN9_FEATURES, .gt = 3, }; +static const struct brw_device_info brw_device_info_skl_gt4 = { + GEN9_FEATURES, .gt = 4, +}; + static const struct brw_device_info brw_device_info_bxt = { GEN9_FEATURES, .is_broxton = 1, @@ -359,7 +363,7 @@ static const struct brw_device_info brw_device_info_bxt = { }; const struct brw_device_info * -brw_get_device_info(int devid, int revision) +brw_get_device_info(int devid) { const struct brw_device_info *devinfo; switch (devid) { diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h index 7bab5716b43..6f4a250e874 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.h +++ b/src/mesa/drivers/dri/i965/brw_device_info.h @@ -86,5 +86,5 @@ struct brw_device_info /** @} */ }; -const struct brw_device_info *brw_get_device_info(int devid, int revision); +const struct brw_device_info *brw_get_device_info(int devid); const char *brw_get_device_name(int devid); diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index db23a187a93..df747107188 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -252,7 +252,7 @@ static const char *const pred_inv[2] = { [1] = "-" }; -static const char *const pred_ctrl_align16[16] = { +const char *const pred_ctrl_align16[16] = { [1] = "", [2] = ".x", [3] = ".y", @@ -726,7 +726,7 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr) switch (_reg_nr & 0xf0) { case BRW_ARF_NULL: string(file, "null"); - return -1; + break; case BRW_ARF_ADDRESS: format(file, "a%d", _reg_nr & 0x0f); break; @@ -908,7 +908,6 @@ src_ia1(FILE *file, unsigned _addr_subreg_nr, unsigned _negate, unsigned __abs, - unsigned _addr_mode, unsigned _horiz_stride, unsigned _width, unsigned _vert_stride) { int err = 0; @@ -1143,7 +1142,6 @@ src0(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) brw_inst_src0_ia_subreg_nr(devinfo, inst), brw_inst_src0_negate(devinfo, inst), brw_inst_src0_abs(devinfo, inst), - brw_inst_src0_address_mode(devinfo, inst), brw_inst_src0_hstride(devinfo, inst), brw_inst_src0_width(devinfo, inst), brw_inst_src0_vstride(devinfo, inst)); @@ -1200,7 +1198,6 @@ src1(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) brw_inst_src1_ia_subreg_nr(devinfo, inst), brw_inst_src1_negate(devinfo, inst), brw_inst_src1_abs(devinfo, inst), - brw_inst_src1_address_mode(devinfo, inst), brw_inst_src1_hstride(devinfo, inst), brw_inst_src1_width(devinfo, inst), brw_inst_src1_vstride(devinfo, inst)); diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 1f4a3516fa2..40ec87d38f0 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -261,7 +261,7 @@ void brw_disassemble(const struct brw_device_info *devinfo, void *assembly, int start, int end, FILE *out) { - bool dump_hex = false; + bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0; for (int offset = start; offset < end;) { brw_inst *insn = assembly + offset; diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index f787ea3d4f8..07ace6bfbcb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -1407,6 +1407,9 @@ void brw_compact_instructions(struct brw_codegen *p, int start_offset, int num_annotations, struct annotation *annotation) { + if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION)) + return; + const struct brw_device_info *devinfo = p->devinfo; void *store = p->store + start_offset / 16; /* For an instruction at byte offset 16*i before compaction, this is the diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index bf2fee9ed48..a6fbb542919 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -410,7 +410,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset); } else { - brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.dw1.bits.indirect_offset); + brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset); } } @@ -2511,12 +2511,20 @@ brw_send_indirect_message(struct brw_codegen *p, struct brw_reg desc) { const struct brw_device_info *devinfo = p->devinfo; - struct brw_inst *send, *setup; + struct brw_inst *send; + int setup; assert(desc.type == BRW_REGISTER_TYPE_UD); + /* We hold on to the setup instruction (the SEND in the direct case, the OR + * in the indirect case) by its index in the instruction store. The + * pointer returned by next_insn() may become invalid if emitting the SEND + * in the indirect case reallocs the store. + */ + if (desc.file == BRW_IMMEDIATE_VALUE) { - setup = send = next_insn(p, BRW_OPCODE_SEND); + setup = p->nr_insn; + send = next_insn(p, BRW_OPCODE_SEND); brw_set_src1(p, send, desc); } else { @@ -2531,7 +2539,8 @@ brw_send_indirect_message(struct brw_codegen *p, * caller can specify additional descriptor bits with the usual * brw_set_*_message() helper functions. */ - setup = brw_OR(p, addr, desc, brw_imm_ud(0)); + setup = p->nr_insn; + brw_OR(p, addr, desc, brw_imm_ud(0)); brw_pop_insn_state(p); @@ -2543,7 +2552,7 @@ brw_send_indirect_message(struct brw_codegen *p, brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_inst_set_sfid(devinfo, send, sfid); - return setup; + return &p->store[setup]; } static struct brw_inst * @@ -2906,11 +2915,10 @@ brw_untyped_surface_read(struct brw_codegen *p, const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); - const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); struct brw_inst *insn = brw_send_indirect_surface_message( p, sfid, dst, payload, surface, msg_length, brw_surface_payload_size(p, num_channels, true, true), - align1); + false); brw_set_dp_untyped_surface_read_message( p, insn, num_channels); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 8320cd77299..e218a85a363 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -88,8 +88,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, case IMM: case UNIFORM: unreachable("Invalid destination register file"); - default: - unreachable("Invalid register file"); } this->writes_accumulator = false; @@ -538,18 +536,6 @@ fs_visitor::get_timestamp(const fs_builder &bld) */ bld.group(4, 0).exec_all().MOV(dst, ts); - /* The caller wants the low 32 bits of the timestamp. Since it's running - * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds, - * which is plenty of time for our purposes. It is identical across the - * EUs, but since it's tracking GPU core speed it will increment at a - * varying rate as render P-states change. - * - * The caller could also check if render P-states have changed (or anything - * else that might disrupt timing) by setting smear to 2 and checking if - * that field is != 0. - */ - dst.set_smear(0); - return dst; } @@ -557,6 +543,14 @@ void fs_visitor::emit_shader_time_begin() { shader_start_time = get_timestamp(bld.annotate("shader time start")); + + /* We want only the low 32 bits of the timestamp. Since it's running + * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds, + * which is plenty of time for our purposes. It is identical across the + * EUs, but since it's tracking GPU core speed it will increment at a + * varying rate as render P-states change. + */ + shader_start_time.set_smear(0); } void @@ -570,6 +564,15 @@ fs_visitor::emit_shader_time_end() fs_reg shader_end_time = get_timestamp(ibld); + /* We only use the low 32 bits of the timestamp - see + * emit_shader_time_begin()). + * + * We could also check if render P-states have changed (or anything + * else that might disrupt timing) by setting smear to 2 and checking if + * that field is != 0. + */ + shader_end_time.set_smear(0); + /* Check that there weren't any timestamp reset events (assuming these * were the only two timestamp reads that happened). */ @@ -700,10 +703,10 @@ fs_inst::components_read(unsigned i) const return 2; case FS_OPCODE_FB_WRITE_LOGICAL: - assert(src[6].file == IMM); + assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM); /* First/second FB write color. */ if (i < 2) - return src[6].fixed_hw_reg.dw1.ud; + return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud; else return 1; @@ -841,9 +844,8 @@ fs_inst::regs_read(int arg) const REG_SIZE); case MRF: unreachable("MRF registers are not allowed as sources"); - default: - unreachable("Invalid register file"); } + return 0; } bool @@ -1283,9 +1285,9 @@ fs_visitor::emit_sampleid_setup() fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type)); if (key->compute_sample_id) { - fs_reg t1 = vgrf(glsl_type::int_type); - fs_reg t2 = vgrf(glsl_type::int_type); - t2.type = BRW_REGISTER_TYPE_UW; + fs_reg t1(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_D); + t1.set_smear(0); + fs_reg t2(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_W); /* The PS will be run in MSDISPMODE_PERSAMPLE. For example with * 8x multisampling, subspan 0 will represent sample N (where N @@ -1306,13 +1308,13 @@ fs_visitor::emit_sampleid_setup() * are sample 1 of subspan 0; the third group is sample 0 of * subspan 1, and finally sample 1 of subspan 1. */ - abld.exec_all() - .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)), + abld.exec_all().group(1, 0) + .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), fs_reg(0xc0)); - abld.exec_all().SHR(t1, t1, fs_reg(5)); + abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5)); /* This works for both SIMD8 and SIMD16 */ - abld.exec_all() + abld.exec_all().group(4, 0) .MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)); /* This special instruction takes care of setting vstride=1, @@ -1443,6 +1445,9 @@ fs_visitor::calculate_urb_setup() } } } else { + bool include_vue_header = + nir->info.inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + /* We have enough input varyings that the SF/SBE pipeline stage can't * arbitrarily rearrange them to suit our whim; we have to put them * in an order that matches the output of the previous pipeline stage @@ -1452,15 +1457,14 @@ fs_visitor::calculate_urb_setup() brw_compute_vue_map(devinfo, &prev_stage_vue_map, key->input_slots_valid, nir->info.separate_shader); - int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET; + int first_slot = + include_vue_header ? 0 : 2 * BRW_SF_URB_ENTRY_READ_OFFSET; + assert(prev_stage_vue_map.num_slots <= first_slot + 32); for (int slot = first_slot; slot < prev_stage_vue_map.num_slots; slot++) { int varying = prev_stage_vue_map.slot_to_varying[slot]; - /* Note that varying == BRW_VARYING_SLOT_COUNT when a slot is - * unused. - */ - if (varying != BRW_VARYING_SLOT_COUNT && + if (varying != BRW_VARYING_SLOT_PAD && (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK & BITFIELD64_BIT(varying))) { prog_data->urb_setup[varying] = slot - first_slot; @@ -2615,7 +2619,7 @@ fs_visitor::eliminate_find_live_channel() case SHADER_OPCODE_FIND_LIVE_CHANNEL: if (depth == 0) { inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = fs_reg(0); + inst->src[0] = fs_reg(0u); inst->sources = 1; inst->force_writemask_all = true; progress = true; @@ -2643,8 +2647,9 @@ fs_visitor::emit_repclear_shader() fs_inst *mov; if (uniforms == 1) { - mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)), - fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); + mov = bld.exec_all().group(4, 0) + .MOV(brw_message_reg(color_mrf), + fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); } else { struct brw_reg reg = brw_reg(BRW_GENERAL_REGISTER_FILE, @@ -2653,8 +2658,8 @@ fs_visitor::emit_repclear_shader() BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)), - fs_reg(reg)); + mov = bld.exec_all().group(4, 0) + .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg)); } fs_inst *write; @@ -3366,15 +3371,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, const brw_wm_prog_key *key, const fs_visitor::thread_payload &payload) { - assert(inst->src[6].file == IMM); + assert(inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM); const brw_device_info *devinfo = bld.shader->devinfo; - const fs_reg &color0 = inst->src[0]; - const fs_reg &color1 = inst->src[1]; - const fs_reg &src0_alpha = inst->src[2]; - const fs_reg &src_depth = inst->src[3]; - const fs_reg &dst_depth = inst->src[4]; - fs_reg sample_mask = inst->src[5]; - const unsigned components = inst->src[6].fixed_hw_reg.dw1.ud; + const fs_reg &color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0]; + const fs_reg &color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1]; + const fs_reg &src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA]; + const fs_reg &src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH]; + const fs_reg &dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH]; + const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL]; + fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK]; + const unsigned components = + inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud; /* We can potentially have a message length of up to 15, so we have to set * base_mrf to either 0 or 1 in order to fit in m0..m15. @@ -3464,6 +3471,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, length++; } + if (src_stencil.file != BAD_FILE) { + assert(devinfo->gen >= 9); + assert(bld.dispatch_width() != 16); + + sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.exec_all().annotate("FB write OS") + .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length], + retype(src_stencil, BRW_REGISTER_TYPE_UB)); + length++; + } + fs_inst *load; if (devinfo->gen >= 7) { /* Send from the GRF */ @@ -4073,7 +4091,7 @@ fs_visitor::lower_logical_sends() case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: lower_surface_logical_send(ibld, inst, SHADER_OPCODE_UNTYPED_SURFACE_READ, - fs_reg(0xffff)); + fs_reg()); break; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: @@ -4202,10 +4220,12 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, /* Gen6 doesn't support SIMD16 depth writes but we cannot handle them * here. */ - assert(devinfo->gen != 6 || inst->src[3].file == BAD_FILE || + assert(devinfo->gen != 6 || + inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH].file == BAD_FILE || inst->exec_size == 8); /* Dual-source FB writes are unsupported in SIMD16 mode. */ - return (inst->src[1].file != BAD_FILE ? 8 : inst->exec_size); + return (inst->src[FB_WRITE_LOGICAL_SRC_COLOR1].file != BAD_FILE ? + 8 : inst->exec_size); case SHADER_OPCODE_TXD_LOGICAL: /* TXD is unsupported in SIMD16 mode. */ @@ -4499,9 +4519,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) if (inst->dst.fixed_hw_reg.subnr) fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr); break; - default: - fprintf(file, "???"); - break; + case IMM: + unreachable("not reached"); } fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type)); @@ -4594,9 +4613,6 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) if (inst->src[i].fixed_hw_reg.abs) fprintf(file, "|"); break; - default: - fprintf(file, "???"); - break; } if (inst->src[i].abs) fprintf(file, "|"); @@ -4977,8 +4993,7 @@ fs_visitor::allocate_registers() if (failed) return; - if (!allocated_without_spills) - schedule_instructions(SCHEDULE_POST); + schedule_instructions(SCHEDULE_POST); if (last_scratch > 0) prog_data->total_scratch = brw_get_scratch_size(last_scratch); @@ -5236,6 +5251,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, prog_data->uses_omask = shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); prog_data->computed_depth_mode = computed_depth_mode(shader); + prog_data->computed_stencil = + shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 50e98becf03..8058b344b7a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -145,6 +145,8 @@ public: void assign_vs_urb_setup(); bool assign_regs(bool allow_spilling); void assign_regs_trivial(); + void calculate_payload_ranges(int payload_node_count, + int *payload_last_use_ip); void setup_payload_interference(struct ra_graph *g, int payload_reg_count, int first_payload_node); int choose_spill_reg(struct ra_graph *g); @@ -337,6 +339,7 @@ public: int *push_constant_loc; fs_reg frag_depth; + fs_reg frag_stencil; fs_reg sample_mask; fs_reg outputs[VARYING_SLOT_MAX]; unsigned output_components[VARYING_SLOT_MAX]; @@ -427,6 +430,8 @@ private: void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload); void generate_urb_write(fs_inst *inst, struct brw_reg payload); void generate_cs_terminate(fs_inst *inst, struct brw_reg payload); + void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst, + struct brw_reg src); void generate_barrier(fs_inst *inst, struct brw_reg src); void generate_blorp_fb_write(fs_inst *inst); void generate_linterp(fs_inst *inst, struct brw_reg dst, diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index df10a9de293..f121f3463d3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -390,14 +390,21 @@ namespace brw { src_reg emit_uniformize(const src_reg &src) const { + /* FIXME: We use a vector chan_index and dst to allow constant and + * copy propagration to move result all the way into the consuming + * instruction (typically a surface index or sampler index for a + * send). This uses 1 or 3 extra hw registers in 16 or 32 wide + * dispatch. Once we teach const/copy propagation about scalars we + * should go back to scalar destinations here. + */ const fs_builder ubld = exec_all(); - const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0); - const dst_reg dst = component(vgrf(src.type), 0); + const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); + const dst_reg dst = vgrf(src.type); ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); - ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index); + ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0)); - return src_reg(dst); + return src_reg(component(dst, 0)); } /** diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 5589716239a..26204827156 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -416,9 +416,10 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) inst->src[arg].subreg_offset = offset % 32; } break; - default: - unreachable("Invalid register file"); - break; + + case MRF: + case IMM: + unreachable("not reached"); } if (has_source_modifiers) { @@ -612,6 +613,21 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } break; + case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: + case SHADER_OPCODE_TYPED_ATOMIC: + case SHADER_OPCODE_TYPED_SURFACE_READ: + case SHADER_OPCODE_TYPED_SURFACE_WRITE: + /* We only propagate into the surface argument of the + * instruction. Everything else goes through LOAD_PAYLOAD. + */ + if (i == 1) { + inst->src[i] = val; + progress = true; + } + break; + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case SHADER_OPCODE_BROADCAST: inst->src[i] = val; diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index c7628dcc2f4..3a28c8d591d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -93,7 +93,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case SHADER_OPCODE_LOAD_PAYLOAD: return !inst->is_copy_payload(v->alloc); default: - return inst->is_send_from_grf() && !inst->has_side_effects(); + return inst->is_send_from_grf() && !inst->has_side_effects() && + !inst->is_volatile(); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index bb7e792044f..e207a77fdc1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -42,9 +42,13 @@ static uint32_t brw_file_from_reg(fs_reg *reg) return BRW_MESSAGE_REGISTER_FILE; case IMM: return BRW_IMMEDIATE_VALUE; - default: + case BAD_FILE: + case HW_REG: + case ATTR: + case UNIFORM: unreachable("not reached"); } + return 0; } static struct brw_reg @@ -116,7 +120,8 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen) /* Probably unused. */ brw_reg = brw_null_reg(); break; - default: + case ATTR: + case UNIFORM: unreachable("not reached"); } if (reg->abs) @@ -317,6 +322,14 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) brw_imm_ud(inst->target)); } + /* Set computes stencil to render target */ + if (prog_data->computed_stencil) { + brw_OR(p, + vec1(retype(payload, BRW_REGISTER_TYPE_UD)), + vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)), + brw_imm_ud(0x1 << 14)); + } + implied_header = brw_null_reg(); } else { implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -437,6 +450,47 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload) } void +fs_generator::generate_stencil_ref_packing(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + assert(dispatch_width == 8); + assert(devinfo->gen >= 9); + + /* Stencil value updates are provided in 8 slots of 1 byte per slot. + * Presumably, in order to save memory bandwidth, the stencil reference + * values written from the FS need to be packed into 2 dwords (this makes + * sense because the stencil values are limited to 1 byte each and a SIMD8 + * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.) + * + * The spec is confusing here because in the payload definition of MDP_RTW_S8 + * (Message Data Payload for Render Target Writes with Stencil 8b) the + * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of + * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the + * packed values specified above and diagrammed below: + * + * 31 0 + * -------------------------------- + * DW | | + * 2-7 | IGNORED | + * | | + * -------------------------------- + * DW1 | STC | STC | STC | STC | + * | slot7 | slot6 | slot5 | slot4| + * -------------------------------- + * DW0 | STC | STC | STC | STC | + * | slot3 | slot2 | slot1 | slot0| + * -------------------------------- + */ + + src.vstride = BRW_VERTICAL_STRIDE_4; + src.width = BRW_WIDTH_1; + src.hstride = BRW_HORIZONTAL_STRIDE_0; + assert(src.type == BRW_REGISTER_TYPE_UB); + brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), src); +} + +void fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src) { brw_barrier(p, src); @@ -1455,18 +1509,18 @@ fs_generator::generate_set_sample_id(fs_inst *inst, assert(src0.type == BRW_REGISTER_TYPE_D || src0.type == BRW_REGISTER_TYPE_UD); - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - struct brw_reg reg = retype(stride(src1, 1, 4, 0), BRW_REGISTER_TYPE_UW); - if (dispatch_width == 8) { + struct brw_reg reg = stride(src1, 1, 4, 0); + if (devinfo->gen >= 8 || dispatch_width == 8) { brw_ADD(p, dst, src0, reg); } else if (dispatch_width == 16) { + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_ADD(p, firsthalf(dst), firsthalf(src0), reg); + brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_ADD(p, sechalf(dst), sechalf(src0), suboffset(reg, 2)); + brw_pop_insn_state(p); } - brw_pop_insn_state(p); } void @@ -2182,6 +2236,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_barrier(inst, src[0]); break; + case FS_OPCODE_PACK_STENCIL_REF: + generate_stencil_ref_packing(inst, dst, src[0]); + break; + default: unreachable("Unsupported opcode"); @@ -2216,9 +2274,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) if (unlikely(debug_flag)) { fprintf(stderr, "Native code for %s\n" - "SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d" + "SIMD%d shader: %d instructions. %d loops. %u cycles. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d" " bytes (%.0f%%)\n", - shader_name, dispatch_width, before_size / 16, loop_count, + shader_name, dispatch_width, before_size / 16, loop_count, cfg->cycle_count, spill_count, fill_count, promoted_constants, before_size, after_size, 100.0f * (before_size - after_size) / before_size); @@ -2228,12 +2286,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) } compiler->shader_debug_log(log_data, - "%s SIMD%d shader: %d inst, %d loops, " + "%s SIMD%d shader: %d inst, %d loops, %u cycles, " "%d:%d spills:fills, Promoted %u constants, " "compacted %d to %d bytes.\n", stage_abbrev, dispatch_width, before_size / 16, - loop_count, spill_count, fill_count, - promoted_constants, before_size, after_size); + loop_count, cfg->cycle_count, spill_count, + fill_count, promoted_constants, before_size, + after_size); return start_offset; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7b5a0482519..486741bea31 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -71,6 +71,14 @@ fs_visitor::nir_setup_inputs() var->data.origin_upper_left); emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), input, reg), 0xF); + } else if (var->data.location == VARYING_SLOT_LAYER) { + struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_LAYER, 1), 3); + reg.type = BRW_REGISTER_TYPE_D; + bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg); + } else if (var->data.location == VARYING_SLOT_VIEWPORT) { + struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_VIEWPORT, 2), 3); + reg.type = BRW_REGISTER_TYPE_D; + bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg); } else { emit_general_interpolation(input, var->name, var->type, (glsl_interp_qualifier) var->data.interpolation, @@ -114,6 +122,8 @@ fs_visitor::nir_setup_outputs() } } else if (var->data.location == FRAG_RESULT_DEPTH) { this->frag_depth = reg; + } else if (var->data.location == FRAG_RESULT_STENCIL) { + this->frag_stencil = reg; } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) { this->sample_mask = reg; } else { @@ -896,12 +906,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then * subtract the result from 31 to convert the MSB count into an LSB count. */ - bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ); - fs_reg neg_result(result); - neg_result.negate = true; - inst = bld.ADD(result, neg_result, fs_reg(31)); + + inst = bld.ADD(result, result, fs_reg(31)); inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; break; } @@ -1322,6 +1331,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_shader_clock: { + /* We cannot do anything if there is an event, so ignore it for now */ + fs_reg shader_clock = get_timestamp(bld); + const fs_reg srcs[] = { shader_clock.set_smear(0), shader_clock.set_smear(1) }; + + bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0); + break; + } + case nir_intrinsic_image_size: { /* Get the referenced image variable and type. */ const nir_variable *var = instr->variables[0]->var; @@ -1509,7 +1527,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[0]), fs_reg(stage_prog_data->binding_table.ssbo_start)); - surf_index = bld.emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. @@ -1520,34 +1537,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } /* Get the offset to read from */ - fs_reg offset_reg = vgrf(glsl_type::uint_type); - unsigned const_offset_bytes = 0; + fs_reg offset_reg; if (has_indirect) { - bld.MOV(offset_reg, get_nir_src(instr->src[1])); + offset_reg = get_nir_src(instr->src[1]); } else { - const_offset_bytes = instr->const_index[0]; - bld.MOV(offset_reg, fs_reg(const_offset_bytes)); + offset_reg = fs_reg(instr->const_index[0]); } /* Read the vector */ - for (int i = 0; i < instr->num_components; i++) { - fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, 1 /* size */, - BRW_PREDICATE_NONE); - read_result.type = dest.type; - bld.MOV(dest, read_result); - dest = offset(dest, bld, 1); - - /* Vector components are stored contiguous in memory */ - if (i < instr->num_components) { - if (!has_indirect) { - const_offset_bytes += 4; - bld.MOV(offset_reg, fs_reg(const_offset_bytes)); - } else { - bld.ADD(offset_reg, offset_reg, brw_imm_ud(4)); - } - } - } + fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, + instr->num_components, + BRW_PREDICATE_NONE); + read_result.type = dest.type; + for (int i = 0; i < instr->num_components; i++) + bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); break; } @@ -1765,52 +1769,46 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[1]), fs_reg(stage_prog_data->binding_table.ssbo_start)); - surf_index = bld.emit_uniformize(surf_index); brw_mark_surface_used(prog_data, stage_prog_data->binding_table.ssbo_start + nir->info.num_ssbos - 1); } - /* Offset */ - fs_reg offset_reg = vgrf(glsl_type::uint_type); - unsigned const_offset_bytes = 0; - if (has_indirect) { - bld.MOV(offset_reg, get_nir_src(instr->src[2])); - } else { - const_offset_bytes = instr->const_index[0]; - bld.MOV(offset_reg, fs_reg(const_offset_bytes)); - } - /* Value */ fs_reg val_reg = get_nir_src(instr->src[0]); /* Writemask */ unsigned writemask = instr->const_index[1]; - /* Write each component present in the writemask */ - unsigned skipped_channels = 0; - for (int i = 0; i < instr->num_components; i++) { - int component_mask = 1 << i; - if (writemask & component_mask) { - if (skipped_channels) { - if (!has_indirect) { - const_offset_bytes += 4 * skipped_channels; - bld.MOV(offset_reg, fs_reg(const_offset_bytes)); - } else { - bld.ADD(offset_reg, offset_reg, - brw_imm_ud(4 * skipped_channels)); - } - skipped_channels = 0; - } + /* Combine groups of consecutive enabled channels in one write + * message. We use ffs to find the first enabled channel and then ffs on + * the bit-inverse, down-shifted writemask to determine the length of + * the block of enabled bits. + */ + while (writemask) { + unsigned first_component = ffs(writemask) - 1; + unsigned length = ffs(~(writemask >> first_component)) - 1; + fs_reg offset_reg; - emit_untyped_write(bld, surf_index, offset_reg, - offset(val_reg, bld, i), - 1 /* dims */, 1 /* size */, - BRW_PREDICATE_NONE); + if (!has_indirect) { + offset_reg = fs_reg(instr->const_index[0] + 4 * first_component); + } else { + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD), + fs_reg(4 * first_component)); } - skipped_channels++; + emit_untyped_write(bld, surf_index, offset_reg, + offset(val_reg, bld, first_component), + 1 /* dims */, length, + BRW_PREDICATE_NONE); + + /* Clear the bits in the writemask that we just wrote, then try + * again to see if more channels are left. + */ + writemask &= (15 << (first_component + length)); } break; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 36388fad98d..9251d9552a5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -330,32 +330,12 @@ count_to_loop_end(const bblock_t *block) unreachable("not reached"); } -/** - * Sets up interference between thread payload registers and the virtual GRFs - * to be allocated for program temporaries. - * - * We want to be able to reallocate the payload for our virtual GRFs, notably - * because the setup coefficients for a full set of 16 FS inputs takes up 8 of - * our 128 registers. - * - * The layout of the payload registers is: - * - * 0..payload.num_regs-1: fixed function setup (including bary coordinates). - * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data - * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients. - * - * And we have payload_node_count nodes covering these registers in order - * (note that in SIMD16, a node is two registers). - */ -void -fs_visitor::setup_payload_interference(struct ra_graph *g, - int payload_node_count, - int first_payload_node) +void fs_visitor::calculate_payload_ranges(int payload_node_count, + int *payload_last_use_ip) { int loop_depth = 0; int loop_end_ip = 0; - int payload_last_use_ip[payload_node_count]; for (int i = 0; i < payload_node_count; i++) payload_last_use_ip[i] = -1; @@ -426,6 +406,33 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, ip++; } +} + + +/** + * Sets up interference between thread payload registers and the virtual GRFs + * to be allocated for program temporaries. + * + * We want to be able to reallocate the payload for our virtual GRFs, notably + * because the setup coefficients for a full set of 16 FS inputs takes up 8 of + * our 128 registers. + * + * The layout of the payload registers is: + * + * 0..payload.num_regs-1: fixed function setup (including bary coordinates). + * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data + * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients. + * + * And we have payload_node_count nodes covering these registers in order + * (note that in SIMD16, a node is two registers). + */ +void +fs_visitor::setup_payload_interference(struct ra_graph *g, + int payload_node_count, + int first_payload_node) +{ + int payload_last_use_ip[payload_node_count]; + calculate_payload_ranges(payload_node_count, payload_last_use_ip); for (int i = 0; i < payload_node_count; i++) { if (payload_last_use_ip[i] == -1) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 7cc4f3c927a..5c57944ca39 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -697,7 +697,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, const fs_reg dst_depth = (payload.dest_depth_reg ? fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) : fs_reg()); - fs_reg src_depth; + fs_reg src_depth, src_stencil; if (source_depth_to_render_target) { if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) @@ -706,10 +706,14 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)); } + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) + src_stencil = frag_stencil; + const fs_reg sources[] = { - color0, color1, src0_alpha, src_depth, dst_depth, sample_mask, - fs_reg(components) + color0, color1, src0_alpha, src_depth, dst_depth, src_stencil, + sample_mask, fs_reg(components) }; + assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS); fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(), sources, ARRAY_SIZE(sources)); @@ -740,6 +744,16 @@ fs_visitor::emit_fb_writes() no16("Missing support for simd16 depth writes on gen6\n"); } + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { + /* From the 'Render Target Write message' section of the docs: + * "Output Stencil is not supported with SIMD16 Render Target Write + * Messages." + * + * FINISHME: split 16 into 2 8s + */ + no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n"); + } + if (do_dual_src) { const fs_builder abld = bld.annotate("FB dual-source write"); diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c index 00125c0f405..76ed237d88a 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c @@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_GS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->gs.base, - &brw->gs.prog_data->base.base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], + &brw->gs.base, &brw->gs.prog_data->base.base); } } diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 7726e4b78a0..4417555f18e 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -97,7 +97,9 @@ byte_offset(fs_reg reg, unsigned delta) case MRF: reg.reg += delta / 32; break; - default: + case IMM: + case HW_REG: + case UNIFORM: assert(delta == 0); } reg.subreg_offset += delta % 32; @@ -119,7 +121,7 @@ horiz_offset(fs_reg reg, unsigned delta) case MRF: case ATTR: return byte_offset(reg, delta * reg.stride * type_sz(reg.type)); - default: + case HW_REG: assert(delta == 0); } return reg; @@ -163,7 +165,6 @@ half(fs_reg reg, unsigned idx) case ATTR: case HW_REG: - default: unreachable("Cannot take half of this register type"); } return reg; diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 1b57b65db27..29642c6d2a4 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -161,9 +161,6 @@ public: const src_reg &src1 = src_reg(), const src_reg &src2 = src_reg()); - struct brw_reg get_dst(unsigned gen); - struct brw_reg get_src(const struct brw_vue_prog_data *prog_data, int i); - dst_reg dst; src_reg src[3]; @@ -186,6 +183,27 @@ public: return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2; } + bool reads_flag(unsigned c) + { + if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2) + return true; + + switch (predicate) { + case BRW_PREDICATE_NONE: + return false; + case BRW_PREDICATE_ALIGN16_REPLICATE_X: + return c == 0; + case BRW_PREDICATE_ALIGN16_REPLICATE_Y: + return c == 1; + case BRW_PREDICATE_ALIGN16_REPLICATE_Z: + return c == 2; + case BRW_PREDICATE_ALIGN16_REPLICATE_W: + return c == 3; + default: + return true; + } + } + bool writes_flag() { return (conditional_mod && (opcode != BRW_OPCODE_SEL && diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 9a33188cb5c..8c1a34ee17a 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -205,6 +205,9 @@ brw_create_nir(struct brw_context *brw, if (shader_prog) { nir_lower_samplers(nir, shader_prog); nir_validate_shader(nir); + + nir_lower_atomics(nir, shader_prog); + nir_validate_shader(nir); } brw_postprocess_nir(nir, brw->intelScreen->devinfo, is_scalar); @@ -278,9 +281,6 @@ brw_postprocess_nir(nir_shader *nir, nir_lower_system_values(nir); nir_validate_shader(nir); - nir_lower_atomics(nir); - nir_validate_shader(nir); - nir_optimize(nir, is_scalar); if (devinfo->gen >= 6) { diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 87e7e011541..083c46a3726 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -205,7 +205,7 @@ enum PACKED brw_reg_type { /** @} */ /** Immediates only: @{ */ - BRW_REGISTER_TYPE_UV, + BRW_REGISTER_TYPE_UV, /* Gen6+ */ BRW_REGISTER_TYPE_V, BRW_REGISTER_TYPE_VF, /** @} */ diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index b710c60148c..88c45f74333 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -26,6 +26,7 @@ */ #include "brw_fs.h" +#include "brw_fs_live_variables.h" #include "brw_vec4.h" #include "brw_cfg.h" #include "brw_shader.h" @@ -400,22 +401,49 @@ schedule_node::set_latency_gen7(bool is_haswell) class instruction_scheduler { public: instruction_scheduler(backend_shader *s, int grf_count, + int hw_reg_count, int block_count, instruction_scheduler_mode mode) { this->bs = s; this->mem_ctx = ralloc_context(NULL); this->grf_count = grf_count; + this->hw_reg_count = hw_reg_count; this->instructions.make_empty(); this->instructions_to_schedule = 0; this->post_reg_alloc = (mode == SCHEDULE_POST); this->mode = mode; this->time = 0; if (!post_reg_alloc) { - this->remaining_grf_uses = rzalloc_array(mem_ctx, int, grf_count); - this->grf_active = rzalloc_array(mem_ctx, bool, grf_count); + this->reg_pressure_in = rzalloc_array(mem_ctx, int, block_count); + + this->livein = ralloc_array(mem_ctx, BITSET_WORD *, block_count); + for (int i = 0; i < block_count; i++) + this->livein[i] = rzalloc_array(mem_ctx, BITSET_WORD, + BITSET_WORDS(grf_count)); + + this->liveout = ralloc_array(mem_ctx, BITSET_WORD *, block_count); + for (int i = 0; i < block_count; i++) + this->liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD, + BITSET_WORDS(grf_count)); + + this->hw_liveout = ralloc_array(mem_ctx, BITSET_WORD *, block_count); + for (int i = 0; i < block_count; i++) + this->hw_liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD, + BITSET_WORDS(hw_reg_count)); + + this->written = rzalloc_array(mem_ctx, bool, grf_count); + + this->reads_remaining = rzalloc_array(mem_ctx, int, grf_count); + + this->hw_reads_remaining = rzalloc_array(mem_ctx, int, hw_reg_count); } else { - this->remaining_grf_uses = NULL; - this->grf_active = NULL; + this->reg_pressure_in = NULL; + this->livein = NULL; + this->liveout = NULL; + this->hw_liveout = NULL; + this->written = NULL; + this->reads_remaining = NULL; + this->hw_reads_remaining = NULL; } } @@ -442,7 +470,8 @@ public: */ virtual int issue_time(backend_instruction *inst) = 0; - virtual void count_remaining_grf_uses(backend_instruction *inst) = 0; + virtual void count_reads_remaining(backend_instruction *inst) = 0; + virtual void setup_liveness(cfg_t *cfg) = 0; virtual void update_register_pressure(backend_instruction *inst) = 0; virtual int get_register_pressure_benefit(backend_instruction *inst) = 0; @@ -453,33 +482,63 @@ public: bool post_reg_alloc; int instructions_to_schedule; int grf_count; + int hw_reg_count; int time; + int reg_pressure; + int block_idx; exec_list instructions; backend_shader *bs; instruction_scheduler_mode mode; - /** - * Number of instructions left to schedule that reference each vgrf. - * - * Used so that we can prefer scheduling instructions that will end the - * live intervals of multiple variables, to reduce register pressure. + /* + * The register pressure at the beginning of each basic block. */ - int *remaining_grf_uses; - /** - * Tracks whether each VGRF has had an instruction scheduled that uses it. - * - * This is used to estimate whether scheduling a new instruction will - * increase register pressure. + int *reg_pressure_in; + + /* + * The virtual GRF's whose range overlaps the beginning of each basic block. + */ + + BITSET_WORD **livein; + + /* + * The virtual GRF's whose range overlaps the end of each basic block. + */ + + BITSET_WORD **liveout; + + /* + * The hardware GRF's whose range overlaps the end of each basic block. + */ + + BITSET_WORD **hw_liveout; + + /* + * Whether we've scheduled a write for this virtual GRF yet. + */ + + bool *written; + + /* + * How many reads we haven't scheduled for this virtual GRF yet. + */ + + int *reads_remaining; + + /* + * How many reads we haven't scheduled for this hardware GRF yet. */ - bool *grf_active; + + int *hw_reads_remaining; }; class fs_instruction_scheduler : public instruction_scheduler { public: - fs_instruction_scheduler(fs_visitor *v, int grf_count, + fs_instruction_scheduler(fs_visitor *v, int grf_count, int hw_reg_count, + int block_count, instruction_scheduler_mode mode); void calculate_deps(); bool is_compressed(fs_inst *inst); @@ -487,35 +546,109 @@ public: int issue_time(backend_instruction *inst); fs_visitor *v; - void count_remaining_grf_uses(backend_instruction *inst); + void count_reads_remaining(backend_instruction *inst); + void setup_liveness(cfg_t *cfg); void update_register_pressure(backend_instruction *inst); int get_register_pressure_benefit(backend_instruction *inst); }; fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v, - int grf_count, + int grf_count, int hw_reg_count, + int block_count, instruction_scheduler_mode mode) - : instruction_scheduler(v, grf_count, mode), + : instruction_scheduler(v, grf_count, hw_reg_count, block_count, mode), v(v) { } +static bool +is_src_duplicate(fs_inst *inst, int src) +{ + for (int i = 0; i < src; i++) + if (inst->src[i].equals(inst->src[src])) + return true; + + return false; +} + void -fs_instruction_scheduler::count_remaining_grf_uses(backend_instruction *be) +fs_instruction_scheduler::count_reads_remaining(backend_instruction *be) { fs_inst *inst = (fs_inst *)be; - if (!remaining_grf_uses) + if (!reads_remaining) return; - if (inst->dst.file == GRF) - remaining_grf_uses[inst->dst.reg]++; - for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file != GRF) + if (is_src_duplicate(inst, i)) continue; - remaining_grf_uses[inst->src[i].reg]++; + if (inst->src[i].file == GRF) { + reads_remaining[inst->src[i].reg]++; + } else if (inst->src[i].file == HW_REG && + inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { + if (inst->src[i].fixed_hw_reg.nr >= hw_reg_count) + continue; + + for (int j = 0; j < inst->regs_read(i); j++) + hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + j]++; + } + } +} + +void +fs_instruction_scheduler::setup_liveness(cfg_t *cfg) +{ + /* First, compute liveness on a per-GRF level using the in/out sets from + * liveness calculation. + */ + for (int block = 0; block < cfg->num_blocks; block++) { + for (int i = 0; i < v->live_intervals->num_vars; i++) { + if (BITSET_TEST(v->live_intervals->block_data[block].livein, i)) { + int vgrf = v->live_intervals->vgrf_from_var[i]; + if (!BITSET_TEST(livein[block], vgrf)) { + reg_pressure_in[block] += v->alloc.sizes[vgrf]; + BITSET_SET(livein[block], vgrf); + } + } + + if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i)) + BITSET_SET(liveout[block], v->live_intervals->vgrf_from_var[i]); + } + } + + /* Now, extend the live in/live out sets for when a range crosses a block + * boundary, which matches what our register allocator/interference code + * does to account for force_writemask_all and incompatible exec_mask's. + */ + for (int block = 0; block < cfg->num_blocks - 1; block++) { + for (int i = 0; i < grf_count; i++) { + if (v->virtual_grf_start[i] <= cfg->blocks[block]->end_ip && + v->virtual_grf_end[i] >= cfg->blocks[block + 1]->start_ip) { + if (!BITSET_TEST(livein[block + 1], i)) { + reg_pressure_in[block + 1] += v->alloc.sizes[i]; + BITSET_SET(livein[block + 1], i); + } + + BITSET_SET(liveout[block], i); + } + } + } + + int payload_last_use_ip[hw_reg_count]; + v->calculate_payload_ranges(hw_reg_count, payload_last_use_ip); + + for (int i = 0; i < hw_reg_count; i++) { + if (payload_last_use_ip[i] == -1) + continue; + + for (int block = 0; block < cfg->num_blocks; block++) { + if (cfg->blocks[block]->start_ip <= payload_last_use_ip[i]) + reg_pressure_in[block]++; + + if (cfg->blocks[block]->end_ip <= payload_last_use_ip[i]) + BITSET_SET(hw_liveout[block], i); + } } } @@ -524,18 +657,24 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be) { fs_inst *inst = (fs_inst *)be; - if (!remaining_grf_uses) + if (!reads_remaining) return; if (inst->dst.file == GRF) { - remaining_grf_uses[inst->dst.reg]--; - grf_active[inst->dst.reg] = true; + written[inst->dst.reg] = true; } for (int i = 0; i < inst->sources; i++) { + if (is_src_duplicate(inst, i)) + continue; + if (inst->src[i].file == GRF) { - remaining_grf_uses[inst->src[i].reg]--; - grf_active[inst->src[i].reg] = true; + reads_remaining[inst->src[i].reg]--; + } else if (inst->src[i].file == HW_REG && + inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE && + inst->src[i].fixed_hw_reg.nr < hw_reg_count) { + for (int off = 0; off < inst->regs_read(i); off++) + hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + off]--; } } } @@ -547,20 +686,31 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) int benefit = 0; if (inst->dst.file == GRF) { - if (remaining_grf_uses[inst->dst.reg] == 1) - benefit += v->alloc.sizes[inst->dst.reg]; - if (!grf_active[inst->dst.reg]) + if (!BITSET_TEST(livein[block_idx], inst->dst.reg) && + !written[inst->dst.reg]) benefit -= v->alloc.sizes[inst->dst.reg]; } for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file != GRF) + if (is_src_duplicate(inst, i)) continue; - if (remaining_grf_uses[inst->src[i].reg] == 1) + if (inst->src[i].file == GRF && + !BITSET_TEST(liveout[block_idx], inst->src[i].reg) && + reads_remaining[inst->src[i].reg] == 1) benefit += v->alloc.sizes[inst->src[i].reg]; - if (!grf_active[inst->src[i].reg]) - benefit -= v->alloc.sizes[inst->src[i].reg]; + + if (inst->src[i].file == HW_REG && + inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE && + inst->src[i].fixed_hw_reg.nr < hw_reg_count) { + for (int off = 0; off < inst->regs_read(i); off++) { + int reg = inst->src[i].fixed_hw_reg.nr + off; + if (!BITSET_TEST(hw_liveout[block_idx], reg) && + hw_reads_remaining[reg] == 1) { + benefit++; + } + } + } } return benefit; @@ -575,20 +725,26 @@ public: int issue_time(backend_instruction *inst); vec4_visitor *v; - void count_remaining_grf_uses(backend_instruction *inst); + void count_reads_remaining(backend_instruction *inst); + void setup_liveness(cfg_t *cfg); void update_register_pressure(backend_instruction *inst); int get_register_pressure_benefit(backend_instruction *inst); }; vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v, int grf_count) - : instruction_scheduler(v, grf_count, SCHEDULE_POST), + : instruction_scheduler(v, grf_count, 0, 0, SCHEDULE_POST), v(v) { } void -vec4_instruction_scheduler::count_remaining_grf_uses(backend_instruction *be) +vec4_instruction_scheduler::count_reads_remaining(backend_instruction *be) +{ +} + +void +vec4_instruction_scheduler::setup_liveness(cfg_t *cfg) { } @@ -822,7 +978,7 @@ fs_instruction_scheduler::calculate_deps() inst->src[i].file != IMM && inst->src[i].file != UNIFORM && (inst->src[i].file != HW_REG || - inst->src[i].fixed_hw_reg.file != IMM)) { + inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) { assert(inst->src[i].file != MRF); add_barrier_deps(n); } @@ -927,10 +1083,10 @@ fs_instruction_scheduler::calculate_deps() if (inst->src[i].file == GRF) { if (post_reg_alloc) { for (int r = 0; r < inst->regs_read(i); r++) - add_dep(n, last_grf_write[inst->src[i].reg + r]); + add_dep(n, last_grf_write[inst->src[i].reg + r], 0); } else { for (int r = 0; r < inst->regs_read(i); r++) { - add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r]); + add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], 0); } } } else if (inst->src[i].file == HW_REG && @@ -941,17 +1097,17 @@ fs_instruction_scheduler::calculate_deps() if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0) size = 1; for (int r = 0; r < size; r++) - add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r]); + add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r], 0); } else { - add_dep(n, last_fixed_grf_write); + add_dep(n, last_fixed_grf_write, 0); } } else if (inst->src[i].is_accumulator()) { - add_dep(n, last_accumulator_write); + add_dep(n, last_accumulator_write, 0); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM && (inst->src[i].file != HW_REG || - inst->src[i].fixed_hw_reg.file != IMM)) { + inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) { assert(inst->src[i].file != MRF); add_barrier_deps(n); } @@ -1080,7 +1236,7 @@ vec4_instruction_scheduler::calculate_deps() inst->src[i].file != IMM && inst->src[i].file != UNIFORM && (inst->src[i].file != HW_REG || - inst->src[i].fixed_hw_reg.file != IMM)) { + inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) { /* No reads from MRF, and ATTR is already translated away */ assert(inst->src[i].file != MRF && inst->src[i].file != ATTR); @@ -1177,7 +1333,7 @@ vec4_instruction_scheduler::calculate_deps() inst->src[i].file != IMM && inst->src[i].file != UNIFORM && (inst->src[i].file != HW_REG || - inst->src[i].fixed_hw_reg.file != IMM)) { + inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) { assert(inst->src[i].file != MRF && inst->src[i].file != ATTR); add_barrier_deps(n); @@ -1387,6 +1543,9 @@ instruction_scheduler::schedule_instructions(bblock_t *block) const struct brw_device_info *devinfo = bs->devinfo; backend_instruction *inst = block->end(); time = 0; + if (!post_reg_alloc) + reg_pressure = reg_pressure_in[block->num]; + block_idx = block->num; /* Remove non-DAG heads from the list. */ foreach_in_list_safe(schedule_node, n, &instructions) { @@ -1403,23 +1562,30 @@ instruction_scheduler::schedule_instructions(bblock_t *block) chosen->remove(); inst->insert_before(block, chosen->inst); instructions_to_schedule--; - update_register_pressure(chosen->inst); - /* Update the clock for how soon an instruction could start after the - * chosen one. - */ - time += issue_time(chosen->inst); + if (!post_reg_alloc) { + reg_pressure -= get_register_pressure_benefit(chosen->inst); + update_register_pressure(chosen->inst); + } /* If we expected a delay for scheduling, then bump the clock to reflect - * that as well. In reality, the hardware will switch to another - * hyperthread and may not return to dispatching our thread for a while - * even after we're unblocked. + * that. In reality, the hardware will switch to another hyperthread + * and may not return to dispatching our thread for a while even after + * we're unblocked. After this, we have the time when the chosen + * instruction will start executing. */ time = MAX2(time, chosen->unblocked_time); + /* Update the clock for how soon an instruction could start after the + * chosen one. + */ + time += issue_time(chosen->inst); + if (debug) { fprintf(stderr, "clock %4d, scheduled: ", time); bs->dump_instruction(chosen->inst); + if (!post_reg_alloc) + fprintf(stderr, "(register pressure %d)\n", reg_pressure); } /* Now that we've scheduled a new instruction, some of its @@ -1466,30 +1632,53 @@ instruction_scheduler::schedule_instructions(bblock_t *block) if (block->end()->opcode == BRW_OPCODE_NOP) block->end()->remove(block); assert(instructions_to_schedule == 0); + + block->cycle_count = time; +} + +static unsigned get_cycle_count(cfg_t *cfg) +{ + unsigned count = 0, multiplier = 1; + foreach_block(block, cfg) { + if (block->start()->opcode == BRW_OPCODE_DO) + multiplier *= 10; /* assume that loops execute ~10 times */ + + count += block->cycle_count * multiplier; + + if (block->end()->opcode == BRW_OPCODE_WHILE) + multiplier /= 10; + } + + return count; } void instruction_scheduler::run(cfg_t *cfg) { - if (debug) { + if (debug && !post_reg_alloc) { fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n", post_reg_alloc); - bs->dump_instructions(); + bs->dump_instructions(); } - /* Populate the remaining GRF uses array to improve the pre-regalloc - * scheduling. - */ - if (remaining_grf_uses) { - foreach_block_and_inst(block, backend_instruction, inst, cfg) { - count_remaining_grf_uses(inst); - } - } + if (!post_reg_alloc) + setup_liveness(cfg); foreach_block(block, cfg) { if (block->end_ip - block->start_ip <= 1) continue; + if (reads_remaining) { + memset(reads_remaining, 0, + grf_count * sizeof(*reads_remaining)); + memset(hw_reads_remaining, 0, + hw_reg_count * sizeof(*hw_reads_remaining)); + memset(written, 0, grf_count * sizeof(*written)); + + foreach_inst_in_block(fs_inst, inst, block) + count_reads_remaining(inst); + } + add_insts_from_block(block); calculate_deps(); @@ -1501,23 +1690,29 @@ instruction_scheduler::run(cfg_t *cfg) schedule_instructions(block); } - if (debug) { + if (debug && !post_reg_alloc) { fprintf(stderr, "\nInstructions after scheduling (reg_alloc %d)\n", post_reg_alloc); bs->dump_instructions(); } + + cfg->cycle_count = get_cycle_count(cfg); } void fs_visitor::schedule_instructions(instruction_scheduler_mode mode) { + if (mode != SCHEDULE_POST) + calculate_live_intervals(); + int grf_count; if (mode == SCHEDULE_POST) grf_count = grf_used; else grf_count = alloc.count; - fs_instruction_scheduler sched(this, grf_count, mode); + fs_instruction_scheduler sched(this, grf_count, first_non_payload_grf, + cfg->num_blocks, mode); sched.run(cfg); if (unlikely(debug_enabled) && mode == SCHEDULE_POST) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index e48f559afa7..063cb84a958 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -298,6 +298,8 @@ brw_instruction_name(enum opcode op) return "fb_write"; case FS_OPCODE_FB_WRITE_LOGICAL: return "fb_write_logical"; + case FS_OPCODE_PACK_STENCIL_REF: + return "pack_stencil_ref"; case FS_OPCODE_BLORP_FB_WRITE: return "blorp_fb_write"; case FS_OPCODE_REP_FB_WRITE: @@ -988,6 +990,20 @@ backend_instruction::has_side_effects() const } } +bool +backend_instruction::is_volatile() const +{ + switch (opcode) { + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_TYPED_SURFACE_READ: + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + return true; + default: + return false; + } +} + #ifndef NDEBUG static bool inst_is_in_block(const bblock_t *block, const backend_instruction *inst) @@ -1178,9 +1194,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage, stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; } - if (shader_prog && shader_prog->NumAtomicBuffers) { + if (shader && shader->NumAtomicBuffers) { stage_prog_data->binding_table.abo_start = next_binding_table_offset; - next_binding_table_offset += shader_prog->NumAtomicBuffers; + next_binding_table_offset += shader->NumAtomicBuffers; } else { stage_prog_data->binding_table.abo_start = 0xd0d0d0d0; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 8899b30c1ae..f4647cca4f9 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -115,6 +115,12 @@ struct backend_instruction : public exec_node { * optimize these out unless you know what you are doing. */ bool has_side_effects() const; + + /** + * True if the instruction might be affected by side effects of other + * instructions. + */ + bool is_volatile() const; #else struct backend_instruction { struct exec_node link; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index dc2b9415673..2aa1248fea6 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -49,6 +49,7 @@ extern const struct brw_tracked_state brw_clip_unit; extern const struct brw_tracked_state brw_vs_pull_constants; extern const struct brw_tracked_state brw_gs_pull_constants; extern const struct brw_tracked_state brw_wm_pull_constants; +extern const struct brw_tracked_state brw_cs_pull_constants; extern const struct brw_tracked_state brw_constant_buffer; extern const struct brw_tracked_state brw_curbe_offsets; extern const struct brw_tracked_state brw_invariant_state; @@ -220,7 +221,7 @@ bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - uint32_t *inout_offset, void *out_aux); + uint32_t *inout_offset, void *inout_aux); void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); @@ -345,7 +346,8 @@ calculate_attr_overrides(const struct brw_context *brw, uint16_t *attr_overrides, uint32_t *point_sprite_enables, uint32_t *flat_enables, - uint32_t *urb_entry_read_length); + uint32_t *urb_entry_read_length, + uint32_t *urb_entry_read_offset); /* gen6_surface_state.c */ void gen6_init_vtable_surface_functions(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 2fbcd146750..f7c0a2037d9 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -137,7 +137,7 @@ bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - uint32_t *inout_offset, void *out_aux) + uint32_t *inout_offset, void *inout_aux) { struct brw_context *brw = cache->brw; struct brw_cache_item *item; @@ -155,11 +155,12 @@ brw_search_cache(struct brw_cache *cache, if (item == NULL) return false; - *(void **)out_aux = ((char *)item->key + item->key_size); + void *aux = ((char *) item->key) + item->key_size; - if (item->offset != *inout_offset) { + if (item->offset != *inout_offset || aux != *((void **) inout_aux)) { brw->ctx.NewDriverState |= (1 << cache_id); *inout_offset = item->offset; + *((void **) inout_aux) = aux; } return true; @@ -349,11 +350,6 @@ brw_init_caches(struct brw_context *brw) 4096, 64); if (brw->has_llc) drm_intel_gem_bo_map_unsynchronized(cache->bo); - - cache->aux_free[BRW_CACHE_VS_PROG] = brw_stage_prog_data_free; - cache->aux_free[BRW_CACHE_GS_PROG] = brw_stage_prog_data_free; - cache->aux_free[BRW_CACHE_FS_PROG] = brw_stage_prog_data_free; - cache->aux_free[BRW_CACHE_CS_PROG] = brw_stage_prog_data_free; } static void @@ -367,9 +363,12 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { next = c->next; - if (cache->aux_free[c->cache_id]) { + if (c->cache_id == BRW_CACHE_VS_PROG || + c->cache_id == BRW_CACHE_GS_PROG || + c->cache_id == BRW_CACHE_FS_PROG || + c->cache_id == BRW_CACHE_CS_PROG) { const void *item_aux = c->key + c->key_size; - cache->aux_free[c->cache_id](item_aux); + brw_stage_prog_data_free(item_aux); } free((void *)c->key); free(c); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 79b8301954e..0344b8a7fb0 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -259,6 +259,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] = &brw_state_base_address, &brw_cs_image_surfaces, &gen7_cs_push_constants, + &brw_cs_pull_constants, &brw_cs_ubo_surfaces, &brw_cs_abo_surfaces, &brw_texture_surfaces, @@ -353,6 +354,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] = &gen8_state_base_address, &brw_cs_image_surfaces, &gen7_cs_push_constants, + &brw_cs_pull_constants, &brw_cs_ubo_surfaces, &brw_cs_abo_surfaces, &brw_texture_surfaces, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 3e7078d0b32..01eb1580953 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1370,9 +1370,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) vec4_instruction *inst = (vec4_instruction *)be_inst; if (inst->predicate) { - fprintf(file, "(%cf0.%d) ", + fprintf(file, "(%cf0.%d%s) ", inst->predicate_inverse ? '-' : '+', - inst->flag_subreg); + inst->flag_subreg, + pred_ctrl_align16[inst->predicate]); } fprintf(file, "%s", brw_instruction_name(inst->opcode)); @@ -1426,9 +1427,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) case BAD_FILE: fprintf(file, "(null)"); break; - default: - fprintf(file, "???"); - break; + case IMM: + case ATTR: + case UNIFORM: + unreachable("not reached"); } if (inst->dst.writemask != WRITEMASK_XYZW) { fprintf(file, "."); @@ -1520,9 +1522,8 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) case BAD_FILE: fprintf(file, "(null)"); break; - default: - fprintf(file, "???"); - break; + case MRF: + unreachable("not reached"); } /* Don't print .0; and only VGRFs have reg_offsets and sizes */ @@ -1787,13 +1788,100 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE))); time.type = BRW_REGISTER_TYPE_UD; - emit(MOV(time, src_reg(value))); + emit(MOV(time, value)); vec4_instruction *inst = emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst)); inst->mlen = 2; } +void +vec4_visitor::convert_to_hw_regs() +{ + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + for (int i = 0; i < 3; i++) { + struct src_reg &src = inst->src[i]; + struct brw_reg reg; + switch (src.file) { + case GRF: + reg = brw_vec8_grf(src.reg + src.reg_offset, 0); + reg.type = src.type; + reg.dw1.bits.swizzle = src.swizzle; + reg.abs = src.abs; + reg.negate = src.negate; + break; + + case IMM: + reg = brw_imm_reg(src.type); + reg.dw1.ud = src.fixed_hw_reg.dw1.ud; + break; + + case UNIFORM: + reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg + + (src.reg + src.reg_offset) / 2, + ((src.reg + src.reg_offset) % 2) * 4), + 0, 4, 1); + reg.type = src.type; + reg.dw1.bits.swizzle = src.swizzle; + reg.abs = src.abs; + reg.negate = src.negate; + + /* This should have been moved to pull constants. */ + assert(!src.reladdr); + break; + + case HW_REG: + assert(src.type == src.fixed_hw_reg.type); + continue; + + case BAD_FILE: + /* Probably unused. */ + reg = brw_null_reg(); + break; + + case MRF: + case ATTR: + unreachable("not reached"); + } + src.fixed_hw_reg = reg; + } + + dst_reg &dst = inst->dst; + struct brw_reg reg; + + switch (inst->dst.file) { + case GRF: + reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); + reg.type = dst.type; + reg.dw1.bits.writemask = dst.writemask; + break; + + case MRF: + assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen)); + reg = brw_message_reg(dst.reg + dst.reg_offset); + reg.type = dst.type; + reg.dw1.bits.writemask = dst.writemask; + break; + + case HW_REG: + assert(dst.type == dst.fixed_hw_reg.type); + reg = dst.fixed_hw_reg; + break; + + case BAD_FILE: + reg = brw_null_reg(); + break; + + case IMM: + case ATTR: + case UNIFORM: + unreachable("not reached"); + } + + dst.fixed_hw_reg = reg; + } +} + bool vec4_visitor::run() { @@ -1862,6 +1950,7 @@ vec4_visitor::run() OPT(dead_code_eliminate); OPT(dead_control_flow_eliminate, this); OPT(opt_copy_propagation); + OPT(opt_cmod_propagation); OPT(opt_cse); OPT(opt_algebraic); OPT(opt_register_coalesce); @@ -1914,6 +2003,8 @@ vec4_visitor::run() opt_set_dependency_control(); + convert_to_hw_regs(); + if (last_scratch > 0) { prog_data->base.total_scratch = brw_get_scratch_size(last_scratch * REG_SIZE); @@ -2020,9 +2111,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, return NULL; } - vec4_generator g(compiler, log_data, &prog_data->base, - mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS"); - assembly = g.generate_assembly(v.cfg, final_assembly_size, shader); + assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, + shader, &prog_data->base, v.cfg, + final_assembly_size); } return assembly; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index d861b2e85df..ec8abf49cd8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -52,6 +52,15 @@ extern "C" { extern "C" { #endif +const unsigned * +brw_vec4_generate_assembly(const struct brw_compiler *compiler, + void *log_data, + void *mem_ctx, + const nir_shader *nir, + struct brw_vue_prog_data *prog_data, + const struct cfg_t *cfg, + unsigned *out_assembly_size); + #ifdef __cplusplus } /* extern "C" */ @@ -149,6 +158,7 @@ public: int var_range_start(unsigned v, unsigned n) const; int var_range_end(unsigned v, unsigned n) const; bool virtual_grf_interferes(int a, int b); + bool opt_cmod_propagation(); bool opt_copy_propagation(bool do_constant_prop = true); bool opt_cse_local(bblock_t *block); bool opt_cse(); @@ -158,6 +168,7 @@ public: bool is_dep_ctrl_unsafe(const vec4_instruction *inst); void opt_set_dependency_control(); void opt_schedule_instructions(); + void convert_to_hw_regs(); vec4_instruction *emit(vec4_instruction *inst); @@ -381,117 +392,6 @@ private: unsigned last_scratch; /**< measured in 32-byte (register size) units */ }; - -/** - * The vertex shader code generator. - * - * Translates VS IR to actual i965 assembly code. - */ -class vec4_generator -{ -public: - vec4_generator(const struct brw_compiler *compiler, void *log_data, - struct brw_vue_prog_data *prog_data, - void *mem_ctx, - bool debug_flag, - const char *stage_name, - const char *stage_abbrev); - ~vec4_generator(); - - const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size, - const nir_shader *nir); - -private: - void generate_code(const cfg_t *cfg, const nir_shader *nir); - - void generate_math1_gen4(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src); - void generate_math2_gen4(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_math_gen6(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - - void generate_tex(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg sampler_index); - - void generate_vs_urb_write(vec4_instruction *inst); - void generate_gs_urb_write(vec4_instruction *inst); - void generate_gs_urb_write_allocate(vec4_instruction *inst); - void generate_gs_thread_end(vec4_instruction *inst); - void generate_gs_set_write_offset(struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_gs_set_vertex_count(struct brw_reg dst, - struct brw_reg src); - void generate_gs_svb_write(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_gs_svb_set_destination_index(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src); - void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src); - void generate_gs_prepare_channel_masks(struct brw_reg dst); - void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src); - void generate_gs_get_instance_id(struct brw_reg dst); - void generate_gs_ff_sync_set_primitives(struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1, - struct brw_reg src2); - void generate_gs_ff_sync(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_gs_set_primitive_id(struct brw_reg dst); - void generate_oword_dual_block_offsets(struct brw_reg m1, - struct brw_reg index); - void generate_scratch_write(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg index); - void generate_scratch_read(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index); - void generate_pull_constant_load(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); - void generate_pull_constant_load_gen7(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg surf_index, - struct brw_reg offset); - void generate_set_simd4x2_header_gen9(vec4_instruction *inst, - struct brw_reg dst); - - void generate_get_buffer_size(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg index); - - void generate_unpack_flags(struct brw_reg dst); - - const struct brw_compiler *compiler; - void *log_data; /* Passed to compiler->*_log functions */ - - const struct brw_device_info *devinfo; - - struct brw_codegen *p; - - struct brw_vue_prog_data *prog_data; - - void *mem_ctx; - const char *stage_name; - const char *stage_abbrev; - const bool debug_flag; -}; - } /* namespace brw */ #endif /* __cplusplus */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp new file mode 100644 index 00000000000..329f24269ce --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp @@ -0,0 +1,157 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +/** @file brw_vec4_cmod_propagation.cpp + * + * Really similar to brw_fs_cmod_propagation but adapted to vec4 needs. Check + * brw_fs_cmod_propagation for further details on the rationale behind this + * optimization. + */ + +#include "brw_vec4.h" +#include "brw_cfg.h" + +namespace brw { + +static bool +opt_cmod_propagation_local(bblock_t *block) +{ + bool progress = false; + int ip = block->end_ip + 1; + + foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) { + ip--; + + if ((inst->opcode != BRW_OPCODE_AND && + inst->opcode != BRW_OPCODE_CMP && + inst->opcode != BRW_OPCODE_MOV) || + inst->predicate != BRW_PREDICATE_NONE || + !inst->dst.is_null() || + inst->src[0].file != GRF || + inst->src[0].abs) + continue; + + if (inst->opcode == BRW_OPCODE_AND && + !(inst->src[1].is_one() && + inst->conditional_mod == BRW_CONDITIONAL_NZ && + !inst->src[0].negate)) + continue; + + if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) + continue; + + if (inst->opcode == BRW_OPCODE_MOV && + inst->conditional_mod != BRW_CONDITIONAL_NZ) + continue; + + bool read_flag = false; + foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) { + if (inst->src[0].in_range(scan_inst->dst, + scan_inst->regs_written)) { + if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) || + scan_inst->dst.reg_offset != inst->src[0].reg_offset || + (scan_inst->dst.writemask != WRITEMASK_X && + scan_inst->dst.writemask != WRITEMASK_XYZW) || + (scan_inst->dst.writemask == WRITEMASK_XYZW && + inst->src[0].swizzle != BRW_SWIZZLE_XYZW) || + (inst->dst.writemask & ~scan_inst->dst.writemask) != 0) { + break; + } + + /* CMP's result is the same regardless of dest type. */ + if (inst->conditional_mod == BRW_CONDITIONAL_NZ && + scan_inst->opcode == BRW_OPCODE_CMP && + (inst->dst.type == BRW_REGISTER_TYPE_D || + inst->dst.type == BRW_REGISTER_TYPE_UD)) { + inst->remove(block); + progress = true; + break; + } + + /* If the AND wasn't handled by the previous case, it isn't safe + * to remove it. + */ + if (inst->opcode == BRW_OPCODE_AND) + break; + + /* Comparisons operate differently for ints and floats */ + if (scan_inst->dst.type != inst->dst.type && + (scan_inst->dst.type == BRW_REGISTER_TYPE_F || + inst->dst.type == BRW_REGISTER_TYPE_F)) + break; + + /* If the instruction generating inst's source also wrote the + * flag, and inst is doing a simple .nz comparison, then inst + * is redundant - the appropriate value is already in the flag + * register. Delete inst. + */ + if (inst->conditional_mod == BRW_CONDITIONAL_NZ && + !inst->src[0].negate && + scan_inst->writes_flag()) { + inst->remove(block); + progress = true; + break; + } + + /* Otherwise, try propagating the conditional. */ + enum brw_conditional_mod cond = + inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod) + : inst->conditional_mod; + + if (scan_inst->can_do_cmod() && + ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || + scan_inst->conditional_mod == cond)) { + scan_inst->conditional_mod = cond; + inst->remove(block); + progress = true; + } + break; + } + + if (scan_inst->writes_flag()) + break; + + read_flag = read_flag || scan_inst->reads_flag(); + } + } + + return progress; +} + +bool +vec4_visitor::opt_cmod_propagation() +{ + bool progress = false; + + foreach_block_reverse(block, cfg) { + progress = opt_cmod_propagation_local(block) || progress; + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp index 8fc7a365bfc..284e0a8d0a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp @@ -78,13 +78,19 @@ vec4_visitor::dead_code_eliminate() sizeof(BITSET_WORD)); foreach_inst_in_block_reverse(vec4_instruction, inst, block) { - if (inst->dst.file == GRF && !inst->has_side_effects()) { + if ((inst->dst.file == GRF && !inst->has_side_effects()) || + (inst->dst.is_null() && inst->writes_flag())){ bool result_live[4] = { false }; - for (unsigned i = 0; i < inst->regs_written; i++) { - for (int c = 0; c < 4; c++) - result_live[c] |= BITSET_TEST( - live, var_from_reg(alloc, offset(inst->dst, i), c)); + if (inst->dst.file == GRF) { + for (unsigned i = 0; i < inst->regs_written; i++) { + for (int c = 0; c < 4; c++) + result_live[c] |= BITSET_TEST( + live, var_from_reg(alloc, offset(inst->dst, i), c)); + } + } else { + for (unsigned c = 0; c < 4; c++) + result_live[c] = BITSET_TEST(flag_live, c); } /* If the instruction can't do writemasking, then it's all or @@ -117,7 +123,11 @@ vec4_visitor::dead_code_eliminate() } if (inst->dst.is_null() && inst->writes_flag()) { - if (!BITSET_TEST(flag_live, 0)) { + bool combined_live = false; + for (unsigned c = 0; c < 4; c++) + combined_live |= BITSET_TEST(flag_live, c); + + if (!combined_live) { inst->opcode = BRW_OPCODE_NOP; progress = true; continue; @@ -136,7 +146,8 @@ vec4_visitor::dead_code_eliminate() } if (inst->writes_flag()) { - BITSET_CLEAR(flag_live, 0); + for (unsigned c = 0; c < 4; c++) + BITSET_CLEAR(flag_live, c); } for (int i = 0; i < 3; i++) { @@ -150,8 +161,10 @@ vec4_visitor::dead_code_eliminate() } } - if (inst->reads_flag()) { - BITSET_SET(flag_live, 0); + for (unsigned c = 0; c < 4; c++) { + if (inst->reads_flag(c)) { + BITSET_SET(flag_live, c); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index a84f6c47471..8bc21df5ffc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -20,146 +20,17 @@ * IN THE SOFTWARE. */ -#include <ctype.h> #include "glsl/glsl_parser_extras.h" #include "brw_vec4.h" #include "brw_cfg.h" -extern "C" { -#include "brw_eu.h" -#include "main/macros.h" -#include "program/prog_print.h" -#include "program/prog_parameter.h" -}; +using namespace brw; -namespace brw { - -struct brw_reg -vec4_instruction::get_dst(unsigned gen) -{ - struct brw_reg brw_reg; - - switch (dst.file) { - case GRF: - brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); - brw_reg = retype(brw_reg, dst.type); - brw_reg.dw1.bits.writemask = dst.writemask; - break; - - case MRF: - assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(gen)); - brw_reg = brw_message_reg(dst.reg + dst.reg_offset); - brw_reg = retype(brw_reg, dst.type); - brw_reg.dw1.bits.writemask = dst.writemask; - break; - - case HW_REG: - assert(dst.type == dst.fixed_hw_reg.type); - brw_reg = dst.fixed_hw_reg; - break; - - case BAD_FILE: - brw_reg = brw_null_reg(); - break; - - default: - unreachable("not reached"); - } - return brw_reg; -} - -struct brw_reg -vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i) -{ - struct brw_reg brw_reg; - - switch (src[i].file) { - case GRF: - brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); - brw_reg = retype(brw_reg, src[i].type); - brw_reg.dw1.bits.swizzle = src[i].swizzle; - if (src[i].abs) - brw_reg = brw_abs(brw_reg); - if (src[i].negate) - brw_reg = negate(brw_reg); - break; - - case IMM: - switch (src[i].type) { - case BRW_REGISTER_TYPE_F: - brw_reg = brw_imm_f(src[i].fixed_hw_reg.dw1.f); - break; - case BRW_REGISTER_TYPE_D: - brw_reg = brw_imm_d(src[i].fixed_hw_reg.dw1.d); - break; - case BRW_REGISTER_TYPE_UD: - brw_reg = brw_imm_ud(src[i].fixed_hw_reg.dw1.ud); - break; - case BRW_REGISTER_TYPE_VF: - brw_reg = brw_imm_vf(src[i].fixed_hw_reg.dw1.ud); - break; - default: - unreachable("not reached"); - } - break; - - case UNIFORM: - brw_reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg + - (src[i].reg + src[i].reg_offset) / 2, - ((src[i].reg + src[i].reg_offset) % 2) * 4), - 0, 4, 1); - brw_reg = retype(brw_reg, src[i].type); - brw_reg.dw1.bits.swizzle = src[i].swizzle; - if (src[i].abs) - brw_reg = brw_abs(brw_reg); - if (src[i].negate) - brw_reg = negate(brw_reg); - - /* This should have been moved to pull constants. */ - assert(!src[i].reladdr); - break; - - case HW_REG: - assert(src[i].type == src[i].fixed_hw_reg.type); - brw_reg = src[i].fixed_hw_reg; - break; - - case BAD_FILE: - /* Probably unused. */ - brw_reg = brw_null_reg(); - break; - case ATTR: - default: - unreachable("not reached"); - } - - return brw_reg; -} - -vec4_generator::vec4_generator(const struct brw_compiler *compiler, - void *log_data, - struct brw_vue_prog_data *prog_data, - void *mem_ctx, - bool debug_flag, - const char *stage_name, - const char *stage_abbrev) - : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo), - prog_data(prog_data), - mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev), - debug_flag(debug_flag) -{ - p = rzalloc(mem_ctx, struct brw_codegen); - brw_init_codegen(devinfo, p, mem_ctx); -} - -vec4_generator::~vec4_generator() -{ -} - -void -vec4_generator::generate_math1_gen4(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src) +static void +generate_math1_gen4(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) { gen4_math(p, dst, @@ -178,11 +49,12 @@ check_gen6_math_src_arg(struct brw_reg src) assert(src.dw1.bits.swizzle == BRW_SWIZZLE_XYZW); } -void -vec4_generator::generate_math_gen6(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) +static void +generate_math_gen6(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { /* Can't do writemask because math can't be align16. */ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); @@ -196,11 +68,12 @@ vec4_generator::generate_math_gen6(vec4_instruction *inst, brw_set_default_access_mode(p, BRW_ALIGN_16); } -void -vec4_generator::generate_math2_gen4(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) +static void +generate_math2_gen4(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 * "Message Payload": @@ -229,12 +102,15 @@ vec4_generator::generate_math2_gen4(vec4_instruction *inst, BRW_MATH_PRECISION_FULL); } -void -vec4_generator::generate_tex(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg sampler_index) +static void +generate_tex(struct brw_codegen *p, + struct brw_vue_prog_data *prog_data, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg sampler_index) { + const struct brw_device_info *devinfo = p->devinfo; int msg_type = -1; if (devinfo->gen >= 5) { @@ -440,8 +316,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, } } -void -vec4_generator::generate_vs_urb_write(vec4_instruction *inst) +static void +generate_vs_urb_write(struct brw_codegen *p, vec4_instruction *inst) { brw_urb_WRITE(p, brw_null_reg(), /* dest */ @@ -454,8 +330,8 @@ vec4_generator::generate_vs_urb_write(vec4_instruction *inst) BRW_URB_SWIZZLE_INTERLEAVE); } -void -vec4_generator::generate_gs_urb_write(vec4_instruction *inst) +static void +generate_gs_urb_write(struct brw_codegen *p, vec4_instruction *inst) { struct brw_reg src = brw_message_reg(inst->base_mrf); brw_urb_WRITE(p, @@ -469,14 +345,14 @@ vec4_generator::generate_gs_urb_write(vec4_instruction *inst) BRW_URB_SWIZZLE_INTERLEAVE); } -void -vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst) +static void +generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst) { struct brw_reg src = brw_message_reg(inst->base_mrf); /* We pass the temporary passed in src0 as the writeback register */ brw_urb_WRITE(p, - inst->get_src(this->prog_data, 0), /* dest */ + inst->src[0].fixed_hw_reg, /* dest */ inst->base_mrf, /* starting mrf reg nr */ src, BRW_URB_WRITE_ALLOCATE_COMPLETE, @@ -489,14 +365,13 @@ vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst) brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, get_element_ud(inst->get_dst(devinfo->gen), 0), - get_element_ud(inst->get_src(this->prog_data, 0), 0)); - brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_MOV(p, get_element_ud(inst->dst.fixed_hw_reg, 0), + get_element_ud(inst->src[0].fixed_hw_reg, 0)); brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_thread_end(vec4_instruction *inst) +static void +generate_gs_thread_end(struct brw_codegen *p, vec4_instruction *inst) { struct brw_reg src = brw_message_reg(inst->base_mrf); brw_urb_WRITE(p, @@ -510,10 +385,11 @@ vec4_generator::generate_gs_thread_end(vec4_instruction *inst) BRW_URB_SWIZZLE_INTERLEAVE); } -void -vec4_generator::generate_gs_set_write_offset(struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) +static void +generate_gs_set_write_offset(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message * Header: M0.3): @@ -536,29 +412,29 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - assert(devinfo->gen >= 7 && + assert(p->devinfo->gen >= 7 && src1.file == BRW_IMMEDIATE_VALUE && src1.type == BRW_REGISTER_TYPE_UD && src1.dw1.ud <= USHRT_MAX); - if (src0.file == IMM) { + if (src0.file == BRW_IMMEDIATE_VALUE) { brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3), brw_imm_ud(src0.dw1.ud * src1.dw1.ud)); } else { brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), retype(src1, BRW_REGISTER_TYPE_UW)); } - brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst, - struct brw_reg src) +static void +generate_gs_set_vertex_count(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src) { brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - if (devinfo->gen >= 8) { + if (p->devinfo->gen >= 8) { /* Move the vertex count into the second MRF for the EOT write. */ brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD), src); @@ -580,16 +456,17 @@ vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst, brw_MOV(p, suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4), stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0)); - brw_set_default_access_mode(p, BRW_ALIGN_16); } brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_svb_write(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) +static void +generate_gs_svb_write(struct brw_codegen *p, + struct brw_vue_prog_data *prog_data, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { int binding = inst->sol_binding; bool final_write = inst->sol_final_write; @@ -623,12 +500,12 @@ vec4_generator::generate_gs_svb_write(vec4_instruction *inst, brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src) +static void +generate_gs_svb_set_destination_index(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) { - int vertex = inst->sol_vertex; brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); @@ -637,8 +514,10 @@ vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst, brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src) +static void +generate_gs_set_dword_2(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src) { brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); @@ -647,8 +526,9 @@ vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src) brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst) +static void +generate_gs_prepare_channel_masks(struct brw_codegen *p, + struct brw_reg dst) { /* We want to left shift just DWORD 4 (the x component belonging to the * second geometry shader invocation) by 4 bits. So generate the @@ -664,9 +544,10 @@ vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst) brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst, - struct brw_reg src) +static void +generate_gs_set_channel_masks(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src) { /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message * Header: M0.5): @@ -727,8 +608,9 @@ vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst, brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_get_instance_id(struct brw_reg dst) +static void +generate_gs_get_instance_id(struct brw_codegen *p, + struct brw_reg dst) { /* We want to right shift R0.0 & R0.1 by GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT * and store into dst.0 & dst.4. So generate the instruction: @@ -744,11 +626,12 @@ vec4_generator::generate_gs_get_instance_id(struct brw_reg dst) brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1, - struct brw_reg src2) +static void +generate_gs_ff_sync_set_primitives(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1, + struct brw_reg src2) { brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); @@ -765,11 +648,12 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_ff_sync(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) +static void +generate_gs_ff_sync(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { /* This opcode uses an implied MRF register for: * - the header of the ff_sync message. And as such it is expected to be @@ -811,8 +695,8 @@ vec4_generator::generate_gs_ff_sync(vec4_instruction *inst, brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst) +static void +generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst) { /* In gen6, PrimitiveID is delivered in R0.1 of the payload */ struct brw_reg src = brw_vec8_grf(0, 0); @@ -823,13 +707,14 @@ vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst) brw_pop_insn_state(p); } -void -vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, - struct brw_reg index) +static void +generate_oword_dual_block_offsets(struct brw_codegen *p, + struct brw_reg m1, + struct brw_reg index) { int second_vertex_offset; - if (devinfo->gen >= 6) + if (p->devinfo->gen >= 6) second_vertex_offset = 1; else second_vertex_offset = 16; @@ -860,8 +745,9 @@ vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, brw_pop_insn_state(p); } -void -vec4_generator::generate_unpack_flags(struct brw_reg dst) +static void +generate_unpack_flags(struct brw_codegen *p, + struct brw_reg dst) { brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -878,16 +764,18 @@ vec4_generator::generate_unpack_flags(struct brw_reg dst) brw_pop_insn_state(p); } -void -vec4_generator::generate_scratch_read(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index) +static void +generate_scratch_read(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index) { + const struct brw_device_info *devinfo = p->devinfo; struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); - generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1), index); uint32_t msg_type; @@ -906,7 +794,7 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst, brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (devinfo->gen < 6) - brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf); + brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf); brw_set_dp_read_message(p, send, 255, /* binding table index: stateless access */ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, @@ -917,12 +805,14 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst, 1 /* rlen */); } -void -vec4_generator::generate_scratch_write(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg index) +static void +generate_scratch_write(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index) { + const struct brw_device_info *devinfo = p->devinfo; struct brw_reg header = brw_vec8_grf(0, 0); bool write_commit; @@ -933,7 +823,7 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst, gen6_resolve_implied_move(p, &header, inst->base_mrf); - generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1), index); brw_MOV(p, @@ -990,12 +880,15 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst, write_commit); } -void -vec4_generator::generate_pull_constant_load(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset) +static void +generate_pull_constant_load(struct brw_codegen *p, + struct brw_vue_prog_data *prog_data, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index, + struct brw_reg offset) { + const struct brw_device_info *devinfo = p->devinfo; assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; @@ -1036,13 +929,15 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, brw_mark_surface_used(&prog_data->base, surf_index); } -void -vec4_generator::generate_get_buffer_size(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg surf_index) +static void +generate_get_buffer_size(struct brw_codegen *p, + struct brw_vue_prog_data *prog_data, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg surf_index) { - assert(devinfo->gen >= 7); + assert(p->devinfo->gen >= 7); assert(surf_index.type == BRW_REGISTER_TYPE_UD && surf_index.file == BRW_IMMEDIATE_VALUE); @@ -1062,11 +957,13 @@ vec4_generator::generate_get_buffer_size(vec4_instruction *inst, brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); } -void -vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg surf_index, - struct brw_reg offset) +static void +generate_pull_constant_load_gen7(struct brw_codegen *p, + struct brw_vue_prog_data *prog_data, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset) { assert(surf_index.type == BRW_REGISTER_TYPE_UD); @@ -1123,9 +1020,10 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, } } -void -vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst, - struct brw_reg dst) +static void +generate_set_simd4x2_header_gen9(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst) { brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -1140,9 +1038,18 @@ vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst, brw_pop_insn_state(p); } -void -vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) +static void +generate_code(struct brw_codegen *p, + const struct brw_compiler *compiler, + void *log_data, + const nir_shader *nir, + struct brw_vue_prog_data *prog_data, + const struct cfg_t *cfg) { + const struct brw_device_info *devinfo = p->devinfo; + const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->stage); + bool debug_flag = INTEL_DEBUG & + intel_debug_flag_for_shader_stage(nir->stage); struct annotation_info annotation; memset(&annotation, 0, sizeof(annotation)); int loop_count = 0; @@ -1154,9 +1061,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset); for (unsigned int i = 0; i < 3; i++) { - src[i] = inst->get_src(this->prog_data, i); + src[i] = inst->src[i].fixed_hw_reg; } - dst = inst->get_dst(devinfo->gen); + dst = inst->dst.fixed_hw_reg; brw_set_default_predicate_control(p, inst->predicate); brw_set_default_predicate_inverse(p, inst->predicate_inverse); @@ -1383,9 +1290,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) gen6_math(p, dst, brw_math_function(inst->opcode), src[0], brw_null_reg()); } else if (devinfo->gen == 6) { - generate_math_gen6(inst, dst, src[0], brw_null_reg()); + generate_math_gen6(p, inst, dst, src[0], brw_null_reg()); } else { - generate_math1_gen4(inst, dst, src[0]); + generate_math1_gen4(p, inst, dst, src[0]); } break; @@ -1396,9 +1303,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) if (devinfo->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); } else if (devinfo->gen == 6) { - generate_math_gen6(inst, dst, src[0], src[1]); + generate_math_gen6(p, inst, dst, src[0], src[1]); } else { - generate_math2_gen4(inst, dst, src[0], src[1]); + generate_math2_gen4(p, inst, dst, src[0], src[1]); } break; @@ -1412,92 +1319,92 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(inst, dst, src[0], src[1]); + generate_tex(p, prog_data, inst, dst, src[0], src[1]); break; case VS_OPCODE_URB_WRITE: - generate_vs_urb_write(inst); + generate_vs_urb_write(p, inst); break; case SHADER_OPCODE_GEN4_SCRATCH_READ: - generate_scratch_read(inst, dst, src[0]); + generate_scratch_read(p, inst, dst, src[0]); break; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - generate_scratch_write(inst, dst, src[0], src[1]); + generate_scratch_write(p, inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD: - generate_pull_constant_load(inst, dst, src[0], src[1]); + generate_pull_constant_load(p, prog_data, inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: - generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); + generate_pull_constant_load_gen7(p, prog_data, inst, dst, src[0], src[1]); break; case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: - generate_set_simd4x2_header_gen9(inst, dst); + generate_set_simd4x2_header_gen9(p, inst, dst); break; case VS_OPCODE_GET_BUFFER_SIZE: - generate_get_buffer_size(inst, dst, src[0], src[1]); + generate_get_buffer_size(p, prog_data, inst, dst, src[0], src[1]); break; case GS_OPCODE_URB_WRITE: - generate_gs_urb_write(inst); + generate_gs_urb_write(p, inst); break; case GS_OPCODE_URB_WRITE_ALLOCATE: - generate_gs_urb_write_allocate(inst); + generate_gs_urb_write_allocate(p, inst); break; case GS_OPCODE_SVB_WRITE: - generate_gs_svb_write(inst, dst, src[0], src[1]); + generate_gs_svb_write(p, prog_data, inst, dst, src[0], src[1]); break; case GS_OPCODE_SVB_SET_DST_INDEX: - generate_gs_svb_set_destination_index(inst, dst, src[0]); + generate_gs_svb_set_destination_index(p, inst, dst, src[0]); break; case GS_OPCODE_THREAD_END: - generate_gs_thread_end(inst); + generate_gs_thread_end(p, inst); break; case GS_OPCODE_SET_WRITE_OFFSET: - generate_gs_set_write_offset(dst, src[0], src[1]); + generate_gs_set_write_offset(p, dst, src[0], src[1]); break; case GS_OPCODE_SET_VERTEX_COUNT: - generate_gs_set_vertex_count(dst, src[0]); + generate_gs_set_vertex_count(p, dst, src[0]); break; case GS_OPCODE_FF_SYNC: - generate_gs_ff_sync(inst, dst, src[0], src[1]); + generate_gs_ff_sync(p, inst, dst, src[0], src[1]); break; case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: - generate_gs_ff_sync_set_primitives(dst, src[0], src[1], src[2]); + generate_gs_ff_sync_set_primitives(p, dst, src[0], src[1], src[2]); break; case GS_OPCODE_SET_PRIMITIVE_ID: - generate_gs_set_primitive_id(dst); + generate_gs_set_primitive_id(p, dst); break; case GS_OPCODE_SET_DWORD_2: - generate_gs_set_dword_2(dst, src[0]); + generate_gs_set_dword_2(p, dst, src[0]); break; case GS_OPCODE_PREPARE_CHANNEL_MASKS: - generate_gs_prepare_channel_masks(dst); + generate_gs_prepare_channel_masks(p, dst); break; case GS_OPCODE_SET_CHANNEL_MASKS: - generate_gs_set_channel_masks(dst, src[0]); + generate_gs_set_channel_masks(p, dst, src[0]); break; case GS_OPCODE_GET_INSTANCE_ID: - generate_gs_get_instance_id(dst); + generate_gs_get_instance_id(p, dst); break; case SHADER_OPCODE_SHADER_TIME_ADD: @@ -1556,7 +1463,7 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) break; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: - generate_unpack_flags(dst); + generate_unpack_flags(p, dst); break; case VEC4_OPCODE_MOV_BYTES: { @@ -1651,10 +1558,10 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) nir->info.label ? nir->info.label : "unnamed", _mesa_shader_stage_to_string(nir->stage), nir->info.name); - fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d" - " bytes (%.0f%%)\n", + fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles." + "Compacted %d to %d bytes (%.0f%%)\n", stage_abbrev, - before_size / 16, loop_count, before_size, after_size, + before_size / 16, loop_count, cfg->cycle_count, before_size, after_size, 100.0f * (before_size - after_size) / before_size); dump_assembly(p->store, annotation.ann_count, annotation.ann, @@ -1663,21 +1570,27 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) } compiler->shader_debug_log(log_data, - "%s vec4 shader: %d inst, %d loops, " + "%s vec4 shader: %d inst, %d loops, %u cycles, " "compacted %d to %d bytes.\n", - stage_abbrev, before_size / 16, loop_count, + stage_abbrev, before_size / 16, + loop_count, cfg->cycle_count, before_size, after_size); } -const unsigned * -vec4_generator::generate_assembly(const cfg_t *cfg, - unsigned *assembly_size, - const nir_shader *nir) +extern "C" const unsigned * +brw_vec4_generate_assembly(const struct brw_compiler *compiler, + void *log_data, + void *mem_ctx, + const nir_shader *nir, + struct brw_vue_prog_data *prog_data, + const struct cfg_t *cfg, + unsigned *out_assembly_size) { + struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen); + brw_init_codegen(compiler->devinfo, p, mem_ctx); brw_set_default_access_mode(p, BRW_ALIGN_16); - generate_code(cfg, nir); - return brw_get_program(p, assembly_size); -} + generate_code(p, compiler, log_data, nir, prog_data, cfg); -} /* namespace brw */ + return brw_get_program(p, out_assembly_size); +} diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 9402489e628..cfb5cd95cb1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -768,7 +768,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, output_size_bytes += 32; assert(output_size_bytes >= 1); - int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; + unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; if (compiler->devinfo->gen == 6) max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; if (output_size_bytes > max_output_size_bytes) @@ -824,9 +824,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader, mem_ctx, true /* no_spills */, shader_time_index); if (v.run()) { - vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx, - INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); - return g.generate_assembly(v.cfg, final_assembly_size, shader); + return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, + shader, &prog_data->base, v.cfg, + final_assembly_size); } } } @@ -875,9 +875,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, if (error_str) *error_str = ralloc_strdup(mem_ctx, gs->fail_msg); } else { - vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx, - INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); - ret = g.generate_assembly(gs->cfg, final_assembly_size, shader); + ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader, + &prog_data->base, gs->cfg, + final_assembly_size); } delete gs; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp index 678237901f2..aa9a6572eee 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp @@ -86,9 +86,10 @@ vec4_live_variables::setup_def_use() } } } - if (inst->reads_flag()) { - if (!BITSET_TEST(bd->flag_def, 0)) { - BITSET_SET(bd->flag_use, 0); + for (unsigned c = 0; c < 4; c++) { + if (inst->reads_flag(c) && + !BITSET_TEST(bd->flag_def, c)) { + BITSET_SET(bd->flag_use, c); } } @@ -110,8 +111,11 @@ vec4_live_variables::setup_def_use() } } if (inst->writes_flag()) { - if (!BITSET_TEST(bd->flag_use, 0)) { - BITSET_SET(bd->flag_def, 0); + for (unsigned c = 0; c < 4; c++) { + if ((inst->dst.writemask & (1 << c)) && + !BITSET_TEST(bd->flag_use, c)) { + BITSET_SET(bd->flag_def, c); + } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index e79a9f3b5b9..1fb1773f856 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -193,7 +193,9 @@ vec4_visitor::nir_emit_if(nir_if *if_stmt) vec4_instruction *inst = emit(MOV(dst_null_d(), condition)); inst->conditional_mod = BRW_CONDITIONAL_NZ; - emit(IF(BRW_PREDICATE_NORMAL)); + /* We can just predicate based on the X channel, as the condition only + * goes on its own line */ + emit(IF(BRW_PREDICATE_ALIGN16_REPLICATE_X)); nir_emit_cf_list(&if_stmt->then_list); @@ -806,6 +808,16 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } + case nir_intrinsic_shader_clock: { + /* We cannot do anything if there is an event, so ignore it for now */ + const src_reg shader_clock = get_timestamp(); + const enum brw_reg_type type = brw_type_for_base_type(glsl_type::uvec2_type); + + dest = get_nir_dest(instr->dest, type); + emit(MOV(dest, shader_clock)); + break; + } + default: unreachable("Unknown intrinsic"); } @@ -1144,26 +1156,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_ball_iequal3: case nir_op_ball_fequal4: case nir_op_ball_iequal4: { - dst_reg tmp = dst_reg(this, glsl_type::bool_type); - - switch (instr->op) { - case nir_op_ball_fequal2: - case nir_op_ball_iequal2: - tmp.writemask = WRITEMASK_XY; - break; - case nir_op_ball_fequal3: - case nir_op_ball_iequal3: - tmp.writemask = WRITEMASK_XYZ; - break; - case nir_op_ball_fequal4: - case nir_op_ball_iequal4: - tmp.writemask = WRITEMASK_XYZW; - break; - default: - unreachable("not reached"); - } + unsigned swiz = + brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); - emit(CMP(tmp, op[0], op[1], + emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), brw_conditional_for_nir_comparison(instr->op))); emit(MOV(dst, src_reg(0))); inst = emit(MOV(dst, src_reg(~0))); @@ -1177,26 +1173,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_bany_inequal3: case nir_op_bany_fnequal4: case nir_op_bany_inequal4: { - dst_reg tmp = dst_reg(this, glsl_type::bool_type); + unsigned swiz = + brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); - switch (instr->op) { - case nir_op_bany_fnequal2: - case nir_op_bany_inequal2: - tmp.writemask = WRITEMASK_XY; - break; - case nir_op_bany_fnequal3: - case nir_op_bany_inequal3: - tmp.writemask = WRITEMASK_XYZ; - break; - case nir_op_bany_fnequal4: - case nir_op_bany_inequal4: - tmp.writemask = WRITEMASK_XYZW; - break; - default: - unreachable("not reached"); - } - - emit(CMP(tmp, op[0], op[1], + emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), brw_conditional_for_nir_comparison(instr->op))); emit(MOV(dst, src_reg(0))); @@ -1321,26 +1301,18 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_ufind_msb: case nir_op_ifind_msb: { - src_reg temp = src_reg(this, glsl_type::uint_type); - - inst = emit(FBH(dst_reg(temp), op[0])); - inst->dst.writemask = WRITEMASK_XYZW; + emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); /* FBH counts from the MSB side, while GLSL's findMSB() wants the count * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then * subtract the result from 31 to convert the MSB count into an LSB count. */ + src_reg src(dst); + emit(CMP(dst_null_d(), src, src_reg(-1), BRW_CONDITIONAL_NZ)); - /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ - temp.swizzle = BRW_SWIZZLE_NOOP; - emit(MOV(dst, temp)); - - src_reg src_tmp = src_reg(dst); - emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ)); - - src_tmp.negate = true; - inst = emit(ADD(dst, src_tmp, src_reg(31))); + inst = emit(ADD(dst, src, src_reg(31))); inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; break; } @@ -1461,11 +1433,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_bany2: case nir_op_bany3: case nir_op_bany4: { - dst_reg tmp = dst_reg(this, glsl_type::bool_type); - tmp.writemask = brw_writemask_for_size(nir_op_infos[instr->op].input_sizes[0]); - - emit(CMP(tmp, op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + unsigned swiz = + brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); + emit(CMP(dst_null_d(), swizzle(op[0], swiz), src_reg(0), + BRW_CONDITIONAL_NZ)); emit(MOV(dst, src_reg(0))); inst = emit(MOV(dst, src_reg(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 6d155285820..92b089d7ff6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -883,6 +883,18 @@ vec4_visitor::emit_texture(ir_texture_opcode op, uint32_t sampler, src_reg sampler_reg) { + /* The sampler can only meaningfully compute LOD for fragment shader + * messages. For all other stages, we change the opcode to TXL and hardcode + * the LOD to 0. + * + * textureQueryLevels() is implemented in terms of TXS so we need to pass a + * valid LOD argument. + */ + if (op == ir_tex || op == ir_query_levels) { + assert(lod.file == BAD_FILE); + lod = src_reg(0.0f); + } + enum opcode opcode; switch (op) { case ir_tex: opcode = SHADER_OPCODE_TXL; break; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 5db4b3a86af..0b805b1c0c4 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -311,7 +311,8 @@ brw_vs_populate_key(struct brw_context *brw, key->program_string_id = vp->id; if (ctx->Transform.ClipPlanesEnabled != 0 && - ctx->API == API_OPENGL_COMPAT && + (ctx->API == API_OPENGL_COMPAT || + ctx->API == API_OPENGLES) && vp->program.Base.ClipDistanceArraySize == 0) { key->nr_userclip_plane_consts = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1; diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index f65258a52a5..d7473845c72 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_VS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->vs.base, - &brw->vs.prog_data->base.base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX], + &brw->vs.base, &brw->vs.prog_data->base.base); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 6ebe6481c32..f88f8d59196 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = { void brw_upload_abo_surfaces(struct brw_context *brw, - struct gl_shader_program *prog, + struct gl_shader *shader, struct brw_stage_state *stage_state, struct brw_stage_prog_data *prog_data) { @@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw, uint32_t *surf_offsets = &stage_state->surf_offset[prog_data->binding_table.abo_start]; - for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) { - struct gl_atomic_buffer_binding *binding = - &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding]; - struct intel_buffer_object *intel_bo = - intel_buffer_object(binding->BufferObject); - drm_intel_bo *bo = intel_bufferobj_buffer( - brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset); - - brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, - binding->Offset, BRW_SURFACEFORMAT_RAW, - bo->size - binding->Offset, 1, true); - } + if (shader && shader->NumAtomicBuffers) { + for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) { + struct gl_atomic_buffer_binding *binding = + &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding]; + struct intel_buffer_object *intel_bo = + intel_buffer_object(binding->BufferObject); + drm_intel_bo *bo = intel_bufferobj_buffer( + brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset); + + brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, + binding->Offset, BRW_SURFACEFORMAT_RAW, + bo->size - binding->Offset, 1, true); + } - if (prog->NumAtomicBuffers) brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + } } static void @@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_FS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->wm.base, - &brw->wm.prog_data->base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], + &brw->wm.base, &brw->wm.prog_data->base); } } @@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_CS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->cs.base, - &brw->cs.prog_data->base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE], + &brw->cs.base, &brw->cs.prog_data->base); } } diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 4068f2844a2..2634e6ba6fd 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -60,6 +60,23 @@ get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset, /* Find the VUE slot for this attribute. */ int slot = vue_map->varying_to_slot[fs_attr]; + /* Viewport and Layer are stored in the VUE header. We need to override + * them to zero if earlier stages didn't write them, as GL requires that + * they read back as zero when not explicitly set. + */ + if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) { + unsigned override = + ATTRIBUTE_0_OVERRIDE_X | ATTRIBUTE_0_OVERRIDE_W | + ATTRIBUTE_CONST_0000 << ATTRIBUTE_0_CONST_SOURCE_SHIFT; + + if (!(vue_map->slots_valid & VARYING_BIT_LAYER)) + override |= ATTRIBUTE_0_OVERRIDE_Y; + if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT)) + override |= ATTRIBUTE_0_OVERRIDE_Z; + + return override; + } + /* If there was only a back color written but not front, use back * as the color instead of undefined */ @@ -159,14 +176,30 @@ calculate_attr_overrides(const struct brw_context *brw, uint16_t *attr_overrides, uint32_t *point_sprite_enables, uint32_t *flat_enables, - uint32_t *urb_entry_read_length) + uint32_t *urb_entry_read_length, + uint32_t *urb_entry_read_offset) { - const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; uint32_t max_source_attr = 0; *point_sprite_enables = 0; *flat_enables = 0; + *urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; + + /* BRW_NEW_FRAGMENT_PROGRAM + * + * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in + * the full vertex header. Otherwise, we can program the SF to start + * reading at an offset of 1 (2 varying slots) to skip unnecessary data: + * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5 + * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+ + */ + + bool fs_needs_vue_header = brw->fragment_program->Base.InputsRead & + (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + + *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1; + /* _NEW_LIGHT */ bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT; @@ -228,7 +261,7 @@ calculate_attr_overrides(const struct brw_context *brw, /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */ uint16_t attr_override = point_sprite ? 0 : get_attr_override(&brw->vue_map_geom_out, - urb_entry_read_offset, attr, + *urb_entry_read_offset, attr, brw->ctx.VertexProgram._TwoSideEnabled, &max_source_attr); @@ -276,7 +309,6 @@ upload_sf_state(struct brw_context *brw) bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; - const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; float point_size; uint16_t attr_overrides[16]; uint32_t point_sprite_origin; @@ -411,8 +443,10 @@ upload_sf_state(struct brw_context *brw) * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA */ uint32_t urb_entry_read_length; + uint32_t urb_entry_read_offset; calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables, - &flat_enables, &urb_entry_read_length); + &flat_enables, &urb_entry_read_length, + &urb_entry_read_offset); dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c index 6aeb0cb243f..2d7c04f4ad2 100644 --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c @@ -285,3 +285,34 @@ const struct brw_tracked_state gen7_cs_push_constants = { }, .emit = gen7_upload_cs_push_constants, }; + +/** + * Creates a new CS constant buffer reflecting the current CS program's + * constants, if needed by the CS program. + */ +static void +brw_upload_cs_pull_constants(struct brw_context *brw) +{ + struct brw_stage_state *stage_state = &brw->cs.base; + + /* BRW_NEW_COMPUTE_PROGRAM */ + struct brw_compute_program *cp = + (struct brw_compute_program *) brw->compute_program; + + /* BRW_NEW_CS_PROG_DATA */ + const struct brw_stage_prog_data *prog_data = &brw->cs.prog_data->base; + + /* _NEW_PROGRAM_CONSTANTS */ + brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program.Base, + stage_state, prog_data, true); +} + +const struct brw_tracked_state brw_cs_pull_constants = { + .dirty = { + .mesa = _NEW_PROGRAM_CONSTANTS, + .brw = BRW_NEW_BATCH | + BRW_NEW_COMPUTE_PROGRAM | + BRW_NEW_CS_PROG_DATA, + }, + .emit = brw_upload_cs_pull_constants, +}; diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 698b3d491bc..b1f13aceba4 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -40,7 +40,6 @@ upload_sbe_state(struct brw_context *brw) uint32_t point_sprite_enables; uint32_t flat_enables; int i; - const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; uint16_t attr_overrides[16]; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); @@ -65,8 +64,10 @@ upload_sbe_state(struct brw_context *brw) * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA */ uint32_t urb_entry_read_length; + uint32_t urb_entry_read_offset; calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables, - &flat_enables, &urb_entry_read_length); + &flat_enables, &urb_entry_read_length, + &urb_entry_read_offset); dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 8f0507413a7..10e433b1d59 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -95,6 +95,11 @@ gen8_upload_ps_extra(struct brw_context *brw, !brw_color_buffer_write_enabled(brw)) dw1 |= GEN8_PSX_SHADER_HAS_UAV; + if (prog_data->computed_stencil) { + assert(brw->gen >= 9); + dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL; + } + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); OUT_BATCH(dw1); diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index 6b655ee493e..8b6f31f3be6 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -37,6 +37,7 @@ upload_sbe(struct brw_context *brw) uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs; uint16_t attr_overrides[VARYING_SLOT_MAX]; uint32_t urb_entry_read_length; + uint32_t urb_entry_read_offset; uint32_t point_sprite_enables; uint32_t flat_enables; int sbe_cmd_length; @@ -66,7 +67,8 @@ upload_sbe(struct brw_context *brw) calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables, &flat_enables, - &urb_entry_read_length); + &urb_entry_read_length, + &urb_entry_read_offset); /* Typically, the URB entry read length and offset should be programmed in * 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active stage @@ -78,7 +80,7 @@ upload_sbe(struct brw_context *brw) */ dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | - BRW_SF_URB_ENTRY_READ_OFFSET << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT | + urb_entry_read_offset << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT | GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH | GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET; diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 18b86652fd2..140a6544983 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -183,6 +183,14 @@ gen8_emit_buffer_surface_state(struct brw_context *brw, } static void +gen8_emit_fast_clear_color(struct brw_context *brw, + struct intel_mipmap_tree *mt, + uint32_t *surf) +{ + surf[7] |= mt->fast_clear_color_value; +} + +static void gen8_emit_texture_surface_state(struct brw_context *brw, struct intel_mipmap_tree *mt, GLenum target, @@ -284,11 +292,10 @@ gen8_emit_texture_surface_state(struct brw_context *brw, SET_FIELD((aux_mt->pitch / tile_w) - 1, GEN8_SURFACE_AUX_PITCH) | aux_mode; - } else { - surf[6] = 0; } - surf[7] = mt->fast_clear_color_value | + gen8_emit_fast_clear_color(brw, mt, surf); + surf[7] |= SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) | SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) | SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) | @@ -302,11 +309,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw, aux_mt->bo, 0, I915_GEM_DOMAIN_SAMPLER, (rw ? I915_GEM_DOMAIN_SAMPLER : 0)); - } else { - surf[10] = 0; - surf[11] = 0; } - surf[12] = 0; /* Emit relocation to surface contents */ drm_intel_bo_emit_reloc(brw->batch.bo, @@ -514,15 +517,13 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, SET_FIELD((aux_mt->pitch / tile_w) - 1, GEN8_SURFACE_AUX_PITCH) | aux_mode; - } else { - surf[6] = 0; } - surf[7] = mt->fast_clear_color_value | - SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); + gen8_emit_fast_clear_color(brw, mt, surf); + surf[7] |= SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); assert(mt->offset % mt->cpp == 0); *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */ @@ -533,11 +534,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, offset + 10 * 4, aux_mt->bo, 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); - } else { - surf[10] = 0; - surf[11] = 0; } - surf[12] = 0; drm_intel_bo_emit_reloc(brw->batch.bo, offset + 8 * 4, diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index f7c02c8a38d..c00d2e786f3 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -73,6 +73,8 @@ static const struct debug_control debug_control[] = { { "spill_fs", DEBUG_SPILL_FS }, { "spill_vec4", DEBUG_SPILL_VEC4 }, { "cs", DEBUG_CS }, + { "hex", DEBUG_HEX }, + { "nocompact", DEBUG_NO_COMPACTION }, { NULL, 0 } }; diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index 0a6e1b90b98..98bd7e93956 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -67,6 +67,8 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_SPILL_FS (1ull << 31) #define DEBUG_SPILL_VEC4 (1ull << 32) #define DEBUG_CS (1ull << 33) +#define DEBUG_HEX (1ull << 34) +#define DEBUG_NO_COMPACTION (1ull << 35) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 3f9afd16c71..4643ea3e87b 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -287,6 +287,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_conditional_render_inverted = true; ctx->Extensions.ARB_draw_buffers_blend = true; ctx->Extensions.ARB_ES3_compatibility = true; + ctx->Extensions.ARB_fragment_layer_viewport = true; ctx->Extensions.ARB_sample_shading = true; ctx->Extensions.ARB_shading_language_420pack = true; ctx->Extensions.ARB_shading_language_packing = true; @@ -324,6 +325,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_framebuffer_no_attachments = true; ctx->Extensions.ARB_gpu_shader5 = true; ctx->Extensions.ARB_shader_atomic_counters = true; + ctx->Extensions.ARB_shader_clock = true; ctx->Extensions.ARB_shader_image_load_store = true; ctx->Extensions.ARB_shader_image_size = true; ctx->Extensions.ARB_shader_texture_image_samples = true; @@ -358,6 +360,7 @@ intelInitExtensions(struct gl_context *ctx) if (brw->gen >= 9) { ctx->Extensions.KHR_texture_compression_astc_ldr = true; ctx->Extensions.KHR_texture_compression_astc_hdr = true; + ctx->Extensions.ARB_shader_stencil_export = true; } if (ctx->API == API_OPENGL_CORE) diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 5a6b0dd1ec5..3a4a53a07e6 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -343,19 +343,15 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, if (image->planar_format && image->planar_format->nplanes > 1) { _mesa_error(ctx, GL_INVALID_OPERATION, "glEGLImageTargetRenderbufferStorage(planar buffers are not " - "supported as render targets."); + "supported as render targets.)"); return; } /* __DRIimage is opaque to the core so it has to be checked here */ - switch (image->format) { - case MESA_FORMAT_R8G8B8A8_UNORM: + if (!brw->format_supported_as_render_target[image->format]) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glEGLImageTargetRenderbufferStorage(unsupported image format"); + "glEGLImageTargetRenderbufferStorage(unsupported image format)"); return; - break; - default: - break; } irb = intel_renderbuffer(rb); diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 590c45d93ea..fb95fb629ad 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1357,7 +1357,16 @@ set_max_gl_versions(struct intel_screen *screen) } } -static int +/** + * Return the revision (generally the revid field of the PCI header) of the + * graphics device. + * + * XXX: This function is useful to keep around even if it is not currently in + * use. It is necessary for new platforms and revision specific workarounds or + * features. Please don't remove it so that we know it at least continues to + * build. + */ +static __attribute__((__unused__)) int brw_get_revision(int fd) { struct drm_i915_getparam gp; @@ -1416,8 +1425,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) return false; intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr); - intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID, - brw_get_revision(psp->fd)); + intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID); if (!intelScreen->devinfo) return false; diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index 5f80f90a91d..62d39f70ec4 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -84,7 +84,7 @@ instruction(bblock_t *block, int num) static bool cmod_propagation(fs_visitor *v) { - const bool print = false; + const bool print = getenv("TEST_DEBUG"); if (print) { fprintf(stderr, "= Before =\n"); diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp new file mode 100644 index 00000000000..9aa2fcc7907 --- /dev/null +++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp @@ -0,0 +1,822 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Based on test_fs_cmod_propagation.cpp + */ + +#include <gtest/gtest.h> +#include "brw_vec4.h" +#include "brw_vec4_builder.h" +#include "brw_cfg.h" +#include "program/program.h" + +using namespace brw; + +class cmod_propagation_test : public ::testing::Test { + virtual void SetUp(); + +public: + struct brw_compiler *compiler; + struct brw_device_info *devinfo; + struct gl_context *ctx; + struct gl_shader_program *shader_prog; + struct brw_vertex_program *vp; + vec4_visitor *v; +}; + +class cmod_propagation_vec4_visitor : public vec4_visitor +{ +public: + cmod_propagation_vec4_visitor(struct brw_compiler *compiler, + nir_shader *shader) + : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL, + false, -1) {} + +protected: + /* Dummy implementation for pure virtual methods */ + virtual dst_reg *make_reg_for_system_value(int location, + const glsl_type *type) + { + unreachable("Not reached"); + } + + virtual void setup_payload() + { + unreachable("Not reached"); + } + + virtual void emit_prolog() + { + unreachable("Not reached"); + } + + virtual void emit_program_code() + { + unreachable("Not reached"); + } + + virtual void emit_thread_end() + { + unreachable("Not reached"); + } + + virtual void emit_urb_write_header(int mrf) + { + unreachable("Not reached"); + } + + virtual vec4_instruction *emit_urb_write_opcode(bool complete) + { + unreachable("Not reached"); + } +}; + + +void cmod_propagation_test::SetUp() +{ + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; + + vp = ralloc(NULL, struct brw_vertex_program); + + nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL); + + v = new cmod_propagation_vec4_visitor(compiler, shader); + + _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0); + + devinfo->gen = 4; +} + +static vec4_instruction * +instruction(bblock_t *block, int num) +{ + vec4_instruction *inst = (vec4_instruction *)block->start(); + for (int i = 0; i < num; i++) { + inst = (vec4_instruction *)inst->next; + } + return inst; +} + +static bool +cmod_propagation(vec4_visitor *v) +{ + const bool print = getenv("TEST_DEBUG"); + + if (print) { + fprintf(stderr, "= Before =\n"); + v->dump_instructions(); + } + + bool ret = v->opt_cmod_propagation(); + + if (print) { + fprintf(stderr, "\n= After =\n"); + v->dump_instructions(); + } + + return ret; +} + +TEST_F(cmod_propagation_test, basic) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + bld.ADD(dest, src0, src1); + bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest.x src0.xxxx src1.xxxx + * 1: cmp.ge.f0 null.x dest.xxxx 0.0f + * + * = After = + * 0: add.ge.f0 dest.x src0.xxxx src1.xxxx + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, basic_different_dst_writemask) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + dst_reg dest_null = bld.null_reg_f(); + + bld.ADD(dest, src0, src1); + bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest.x src0 src1 + * 1: cmp.ge.f0 null.xyzw dest 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, andz_one) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::int_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + src_reg one(1); + + bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_Z, + bld.AND(bld.null_reg_d(), src_reg(dest), one)); + + /* = Before = + * 0: cmp.l.f0 dest:F src0:F 0F + * 1: and.z.f0 null:D dest:D 1D + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, non_cmod_instruction) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::uint_type); + src_reg src0 = src_reg(v, glsl_type::uint_type); + src_reg zero(0u); + bld.FBL(dest, src0); + bld.CMP(bld.null_reg_ud(), src_reg(dest), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: fbl dest src0 + * 1: cmp.ge.f0 null dest 0u + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_FBL, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, intervening_flag_write) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg src2 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE); + bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest src0 src1 + * 1: cmp.ge.f0 null src2 0.0f + * 2: cmp.ge.f0 null dest 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod); +} + +TEST_F(cmod_propagation_test, intervening_flag_read) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest0 = dst_reg(v, glsl_type::float_type); + dst_reg dest1 = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg src2 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + bld.ADD(dest0, src0, src1); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); + bld.CMP(bld.null_reg_f(), src_reg(dest0), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest0 src0 src1 + * 1: (+f0) sel dest1 src2 0.0f + * 2: cmp.ge.f0 null dest0 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod); +} + +TEST_F(cmod_propagation_test, intervening_dest_write) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg src2 = src_reg(v, glsl_type::vec2_type); + src_reg zero(0.0f); + bld.ADD(offset(dest, 2), src0, src1); + bld.emit(SHADER_OPCODE_TEX, dest, src2) + ->regs_written = 4; + bld.CMP(bld.null_reg_f(), offset(src_reg(dest), 2), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest+2 src0 src1 + * 1: tex rlen 4 dest+0 src2 + * 2: cmp.ge.f0 null dest+2 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(SHADER_OPCODE_TEX, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod); +} + +TEST_F(cmod_propagation_test, intervening_flag_read_same_value) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest0 = dst_reg(v, glsl_type::float_type); + dst_reg dest1 = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg src2 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1)); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); + bld.CMP(dest_null, src_reg(dest0), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add.ge.f0 dest0 src0 src1 + * 1: (+f0) sel dest1 src2 0.0f + * 2: cmp.ge.f0 null.x dest0 0.0f + * + * = After = + * 0: add.ge.f0 dest0 src0 src1 + * 1: (+f0) sel dest1 src2 0.0f + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); +} + +TEST_F(cmod_propagation_test, negate) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + bld.ADD(dest, src0, src1); + src_reg tmp_src = src_reg(dest); + tmp_src.negate = true; + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + bld.CMP(dest_null, tmp_src, zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest src0 src1 + * 1: cmp.ge.f0 null.x -dest 0.0f + * + * = After = + * 0: add.le.f0 dest src0 src1 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, movnz) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + bld.CMP(dest, src0, src1, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(dest_null, src_reg(dest))); + + /* = Before = + * + * 0: cmp.l.f0 dest:F src0:F src1:F + * 1: mov.nz.f0 null.x dest:F + * + * = After = + * 0: cmp.l.f0 dest src0:F src1:F + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, different_types_cmod_with_zero) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::int_type); + src_reg src0 = src_reg(v, glsl_type::int_type); + src_reg src1 = src_reg(v, glsl_type::int_type); + src_reg zero(0.0f); + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), retype(src_reg(dest), BRW_REGISTER_TYPE_F), zero, + BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest:D src0:D src1:D + * 1: cmp.ge.f0 null:F dest:F 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, andnz_non_one) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::int_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + src_reg nonone(38); + + bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.null_reg_d(), src_reg(dest), nonone)); + + /* = Before = + * 0: cmp.l.f0 dest:F src0:F 0F + * 1: and.nz.f0 null:D dest:D 38D + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); +} + +/* Note that basic is using glsl_type:float types, while this one is using + * glsl_type::vec4 */ +TEST_F(cmod_propagation_test, basic_vec4) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + src_reg zero(0.0f); + + bld.MUL(dest, src0, src1); + bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_NZ); + + /* = Before = + * 0: mul dest.xyzw src0.xyzw src1.xyzw + * 1: cmp.nz.f0.0 null.xyzw dest.xyzw 0.0f + * + * = After = + * 0: mul.nz.f0.0 dest.xyzw src0.xyzw src1.xyzw + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, basic_vec4_different_dst_writemask) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + dest.writemask = WRITEMASK_X; + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + src_reg zero(0.0f); + dst_reg dest_null = bld.null_reg_f(); + + bld.MUL(dest, src0, src1); + bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_NZ); + + /* = Before = + * 0: mul dest.x src0 src1 + * 1: cmp.nz.f0.0 null dest 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, mad_one_component_vec4) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + dest.writemask = WRITEMASK_X; + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + src_reg src2 = src_reg(v, glsl_type::vec4_type); + src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX; + src2.negate = true; + src_reg zero(0.0f); + src_reg tmp(dest); + tmp.swizzle = BRW_SWIZZLE_XXXX; + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + bld.MAD(dest, src0, src1, src2); + bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L); + + /* = Before = + * + * 0: mad dest.x:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F + * 1: cmp.l.f0.0 null.x:F dest.xxxx:F 0.0f + * + * = After = + * 0: mad.l.f0 dest.x:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, mad_more_one_component_vec4) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + dest.writemask = WRITEMASK_XW; + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + src_reg src2 = src_reg(v, glsl_type::vec4_type); + src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX; + src2.negate = true; + src_reg zero(0.0f); + src_reg tmp(dest); + tmp.swizzle = BRW_SWIZZLE_XXXX; + dst_reg dest_null = bld.null_reg_f(); + + bld.MAD(dest, src0, src1, src2); + bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L); + + /* = Before = + * + * 0: mad dest.xw:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F + * 1: cmp.l.f0.0 null:F dest.xxxx:F zeroF + * + * = After = + * (No changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, cmp_mov_vec4) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::ivec4_type); + dest.writemask = WRITEMASK_X; + src_reg src0 = src_reg(v, glsl_type::ivec4_type); + src0.swizzle = BRW_SWIZZLE_XXXX; + src0.file = UNIFORM; + src_reg nonone = retype(src_reg(16), BRW_REGISTER_TYPE_D); + src_reg mov_src = src_reg(dest); + mov_src.swizzle = BRW_SWIZZLE_XXXX; + dst_reg dest_null = bld.null_reg_d(); + dest_null.writemask = WRITEMASK_X; + + bld.CMP(dest, src0, nonone, BRW_CONDITIONAL_GE); + set_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(dest_null, mov_src)); + + /* = Before = + * + * 0: cmp.ge.f0 dest.x:D u.xxxx:D 16D + * 1: mov.nz.f0 null.x:D dest.xxxx:D + * + * = After = + * 0: cmp.ge.f0 dest.x:D u.xxxx:D 16D + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + src_reg zero(0.0f); + src_reg cmp_src = src_reg(dest); + cmp_src.swizzle = BRW_SWIZZLE4(0,1,3,2); + + bld.MUL(dest, src0, src1); + bld.CMP(bld.null_reg_f(), cmp_src, zero, BRW_CONDITIONAL_NZ); + + /* = Before = + * 0: mul dest src0 src1 + * 1: cmp.nz.f0.0 null dest.xywz 0.0f + * + * = After = + * (No changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); +} diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index a049d9b8de7..cb854b81933 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -188,7 +188,7 @@ nouveau_context_init(struct gl_context *ctx, gl_api api, ctx->Extensions.EXT_blend_minmax = true; ctx->Extensions.EXT_texture_filter_anisotropic = true; ctx->Extensions.NV_texture_env_combine4 = true; - ctx->Const.MaxColorAttachments = 1; + ctx->Const.MaxDrawBuffers = ctx->Const.MaxColorAttachments = 1; /* This effectively disables 3D textures */ ctx->Const.Max3DTextureLevels = 1; diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index a46c1944e96..a49018953ae 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -698,16 +698,39 @@ valid_draw_indirect(struct gl_context *ctx, { const GLsizeiptr end = (GLsizeiptr)indirect + size; + /* OpenGL ES 3.1 spec. section 10.5: + * + * "DrawArraysIndirect requires that all data sourced for the + * command, including the DrawArraysIndirectCommand + * structure, be in buffer objects, and may not be called when + * the default vertex array object is bound." + */ + if (ctx->Array.VAO == ctx->Array.DefaultVAO) { + _mesa_error(ctx, GL_INVALID_OPERATION, "(no VAO bound)"); + return GL_FALSE; + } + if (!_mesa_valid_prim_mode(ctx, mode, name)) return GL_FALSE; + /* OpenGL ES 3.1 specification, section 10.5: + * + * "An INVALID_OPERATION error is generated if + * transform feedback is active and not paused." + */ + if (_mesa_is_gles31(ctx) && _mesa_is_xfb_active_and_unpaused(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(TransformFeedback is active and not paused)", name); + } - /* From the ARB_draw_indirect specification: - * "An INVALID_OPERATION error is generated [...] if <indirect> is no - * word aligned." + /* From OpenGL version 4.4. section 10.5 + * and OpenGL ES 3.1, section 10.6: + * + * "An INVALID_VALUE error is generated if indirect is not a + * multiple of the size, in basic machine units, of uint." */ if ((GLsizeiptr)indirect & (sizeof(GLuint) - 1)) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, GL_INVALID_VALUE, "%s(indirect is not aligned)", name); return GL_FALSE; } @@ -895,7 +918,12 @@ check_valid_to_compute(struct gl_context *ctx, const char *function) return false; } - prog = ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE]; + /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: + * + * "An INVALID_OPERATION error is generated if there is no active program + * for the compute shader stage." + */ + prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; if (prog == NULL || prog->_LinkedShaders[MESA_SHADER_COMPUTE] == NULL) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(no active compute shader)", @@ -917,6 +945,24 @@ _mesa_validate_DispatchCompute(struct gl_context *ctx, return GL_FALSE; for (i = 0; i < 3; i++) { + /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: + * + * "An INVALID_VALUE error is generated if any of num_groups_x, + * num_groups_y and num_groups_z are greater than or equal to the + * maximum work group count for the corresponding dimension." + * + * However, the "or equal to" portions appears to be a specification + * bug. In all other areas, the specification appears to indicate that + * the number of workgroups can match the MAX_COMPUTE_WORK_GROUP_COUNT + * value. For example, under DispatchComputeIndirect: + * + * "If any of num_groups_x, num_groups_y or num_groups_z is greater than + * the value of MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding + * dimension then the results are undefined." + * + * Additionally, the OpenGLES 3.1 specification does not contain "or + * equal to" as an error condition. + */ if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) { _mesa_error(ctx, GL_INVALID_VALUE, "glDispatchCompute(num_groups_%c)", 'x' + i); @@ -937,24 +983,29 @@ valid_dispatch_indirect(struct gl_context *ctx, if (!check_valid_to_compute(ctx, name)) return GL_FALSE; - /* From the ARB_compute_shader specification: + /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: * - * "An INVALID_OPERATION error is generated [...] if <indirect> is less - * than zero or not a multiple of the size, in basic machine units, of - * uint." + * "An INVALID_VALUE error is generated if indirect is negative or is not a + * multiple of four." */ if ((GLintptr)indirect & (sizeof(GLuint) - 1)) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, GL_INVALID_VALUE, "%s(indirect is not aligned)", name); return GL_FALSE; } if ((GLintptr)indirect < 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, GL_INVALID_VALUE, "%s(indirect is less than zero)", name); return GL_FALSE; } + /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: + * + * "An INVALID_OPERATION error is generated if no buffer is bound to the + * DRAW_INDIRECT_BUFFER binding, or if the command would source data + * beyond the end of the buffer object." + */ if (!_mesa_is_bufferobj(ctx->DispatchIndirectBuffer)) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s: no buffer bound to DISPATCH_INDIRECT_BUFFER", name); @@ -967,11 +1018,6 @@ valid_dispatch_indirect(struct gl_context *ctx, return GL_FALSE; } - /* From the ARB_compute_shader specification: - * - * "An INVALID_OPERATION error is generated if this command sources data - * beyond the end of the buffer object [...]" - */ if (ctx->DispatchIndirectBuffer->Size < end) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(DISPATCH_INDIRECT_BUFFER too small)", name); diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index b2c88c37366..d964f030ecb 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -152,6 +152,7 @@ static const struct extension extension_table[] = { { "GL_ARB_separate_shader_objects", o(dummy_true), GL, 2010 }, { "GL_ARB_shader_atomic_counters", o(ARB_shader_atomic_counters), GL, 2011 }, { "GL_ARB_shader_bit_encoding", o(ARB_shader_bit_encoding), GL, 2010 }, + { "GL_ARB_shader_clock", o(ARB_shader_clock), GL, 2015 }, { "GL_ARB_shader_image_load_store", o(ARB_shader_image_load_store), GL, 2011 }, { "GL_ARB_shader_image_size", o(ARB_shader_image_size), GL, 2012 }, { "GL_ARB_shader_objects", o(dummy_true), GL, 2002 }, @@ -229,6 +230,7 @@ static const struct extension extension_table[] = { { "GL_EXT_depth_bounds_test", o(EXT_depth_bounds_test), GL, 2002 }, { "GL_EXT_draw_buffers", o(dummy_true), ES2, 2012 }, { "GL_EXT_draw_buffers2", o(EXT_draw_buffers2), GL, 2006 }, + { "GL_EXT_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), ES2, 2014 }, { "GL_EXT_draw_instanced", o(ARB_draw_instanced), GL, 2006 }, { "GL_EXT_draw_range_elements", o(dummy_true), GLL, 1997 }, { "GL_EXT_fog_coord", o(dummy_true), GLL, 1999 }, @@ -305,6 +307,7 @@ static const struct extension extension_table[] = { { "GL_OES_depth32", o(dummy_false), DISABLE, 2005 }, { "GL_OES_depth_texture", o(ARB_depth_texture), ES2, 2006 }, { "GL_OES_depth_texture_cube_map", o(OES_depth_texture_cube_map), ES2, 2012 }, + { "GL_OES_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), ES2, 2014 }, { "GL_OES_draw_texture", o(OES_draw_texture), ES1, 2004 }, { "GL_OES_EGL_sync", o(dummy_true), ES1 | ES2, 2010 }, /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */ diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index c295615b475..fbc7b8f8602 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -460,6 +460,7 @@ descriptor=[ [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ], [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ], [ "DISPATCH_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_compute_shader_es31" ], + [ "MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents), extra_ARB_compute_shader_es31" ], # GL_ARB_framebuffer_no_attachments / GLES 3.1 ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"], diff --git a/src/mesa/main/lines.c b/src/mesa/main/lines.c index c020fb3eb9e..93b80af0dc4 100644 --- a/src/mesa/main/lines.c +++ b/src/mesa/main/lines.c @@ -45,6 +45,10 @@ _mesa_LineWidth( GLfloat width ) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glLineWidth %f\n", width); + /* If width is unchanged, there can't be an error */ + if (ctx->Line.Width == width) + return; + if (width <= 0.0F) { _mesa_error( ctx, GL_INVALID_VALUE, "glLineWidth" ); return; @@ -68,9 +72,6 @@ _mesa_LineWidth( GLfloat width ) return; } - if (ctx->Line.Width == width) - return; - FLUSH_VERTICES(ctx, _NEW_LINE); ctx->Line.Width = width; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index ab4fa083672..02dd257d79d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2292,6 +2292,7 @@ struct gl_shader struct exec_list *ir; struct exec_list *packed_varyings; + struct exec_list *fragdata_arrays; struct glsl_symbol_table *symbols; bool uses_builtin_functions; @@ -2389,6 +2390,9 @@ struct gl_shader */ GLuint NumImages; + struct gl_active_atomic_buffer **AtomicBuffers; + unsigned NumAtomicBuffers; + /** * Whether early fragment tests are enabled as defined by * ARB_shader_image_load_store. @@ -3680,6 +3684,7 @@ struct gl_extensions GLboolean ARB_seamless_cube_map; GLboolean ARB_shader_atomic_counters; GLboolean ARB_shader_bit_encoding; + GLboolean ARB_shader_clock; GLboolean ARB_shader_image_load_store; GLboolean ARB_shader_image_size; GLboolean ARB_shader_precision; @@ -4501,7 +4506,7 @@ static inline bool _mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx) { return ctx->Shader._CurrentFragmentProgram != NULL && - ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0; + ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers > 0; } #ifdef __cplusplus diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 51ee10ff858..699a2ae47eb 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -230,6 +230,10 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program) struct gl_shader_program *shProg = NULL; GLbitfield any_valid_stages; + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glUseProgramStages(%u, 0x%x, %u)\n", + pipeline, stages, program); + if (!pipe) { _mesa_error(ctx, GL_INVALID_OPERATION, "glUseProgramStages(pipeline)"); return; @@ -251,6 +255,8 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program) if (_mesa_has_tessellation(ctx)) any_valid_stages |= GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT; + if (_mesa_has_compute_shaders(ctx)) + any_valid_stages |= GL_COMPUTE_SHADER_BIT; if (stages != GL_ALL_SHADER_BITS && (stages & ~any_valid_stages) != 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glUseProgramStages(Stages)"); @@ -332,6 +338,9 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program) if ((stages & GL_TESS_EVALUATION_SHADER_BIT) != 0) _mesa_use_shader_program(ctx, GL_TESS_EVALUATION_SHADER, shProg, pipe); + + if ((stages & GL_COMPUTE_SHADER_BIT) != 0) + _mesa_use_shader_program(ctx, GL_COMPUTE_SHADER, shProg, pipe); } /** @@ -345,6 +354,9 @@ _mesa_ActiveShaderProgram(GLuint pipeline, GLuint program) struct gl_shader_program *shProg = NULL; struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glActiveShaderProgram(%u, %u)\n", pipeline, program); + if (program != 0) { shProg = _mesa_lookup_shader_program_err(ctx, program, "glActiveShaderProgram(program)"); @@ -380,6 +392,9 @@ _mesa_BindProgramPipeline(GLuint pipeline) GET_CURRENT_CONTEXT(ctx); struct gl_pipeline_object *newObj = NULL; + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glBindProgramPipeline(%u)\n", pipeline); + /* Rebinding the same pipeline object: no change. */ if (ctx->_Shader->Name == pipeline) @@ -467,6 +482,9 @@ _mesa_DeleteProgramPipelines(GLsizei n, const GLuint *pipelines) GET_CURRENT_CONTEXT(ctx); GLsizei i; + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glDeleteProgramPipelines(%d, %p)\n", n, pipelines); + if (n < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glDeleteProgramPipelines(n<0)"); return; @@ -551,6 +569,9 @@ _mesa_GenProgramPipelines(GLsizei n, GLuint *pipelines) { GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glGenProgramPipelines(%d, %p)\n", n, pipelines); + create_program_pipelines(ctx, n, pipelines, false); } @@ -559,6 +580,9 @@ _mesa_CreateProgramPipelines(GLsizei n, GLuint *pipelines) { GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glCreateProgramPipelines(%d, %p)\n", n, pipelines); + create_program_pipelines(ctx, n, pipelines, true); } @@ -574,6 +598,9 @@ _mesa_IsProgramPipeline(GLuint pipeline) { GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glIsProgramPipeline(%u)\n", pipeline); + struct gl_pipeline_object *obj = _mesa_lookup_pipeline_object(ctx, pipeline); if (obj == NULL) return GL_FALSE; @@ -590,6 +617,10 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) GET_CURRENT_CONTEXT(ctx); struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glGetProgramPipelineiv(%u, %d, %p)\n", + pipeline, pname, params); + /* Are geometry shaders available in this context? */ const bool has_gs = _mesa_has_geometry_shaders(ctx); @@ -643,6 +674,12 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) *params = pipe->CurrentProgram[MESA_SHADER_FRAGMENT] ? pipe->CurrentProgram[MESA_SHADER_FRAGMENT]->Name : 0; return; + case GL_COMPUTE_SHADER: + if (!_mesa_has_compute_shaders(ctx)) + break; + *params = pipe->CurrentProgram[MESA_SHADER_COMPUTE] + ? pipe->CurrentProgram[MESA_SHADER_COMPUTE]->Name : 0; + return; default: break; } @@ -857,6 +894,9 @@ _mesa_ValidateProgramPipeline(GLuint pipeline) { GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glValidateProgramPipeline(%u)\n", pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); if (!pipe) { @@ -875,6 +915,10 @@ _mesa_GetProgramPipelineInfoLog(GLuint pipeline, GLsizei bufSize, { GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glGetProgramPipelineInfoLog(%u, %d, %p, %p)\n", + pipeline, bufSize, length, infoLog); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); if (!pipe) { diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c index eb71fdde703..b7e25fe3840 100644 --- a/src/mesa/main/program_resource.c +++ b/src/mesa/main/program_resource.c @@ -119,7 +119,6 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface, case GL_MAX_NUM_ACTIVE_VARIABLES: switch (programInterface) { case GL_UNIFORM_BLOCK: - case GL_SHADER_STORAGE_BLOCK: for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) { if (shProg->ProgramResourceList[i].Type == programInterface) { struct gl_uniform_block *block = @@ -129,6 +128,26 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface, } } break; + case GL_SHADER_STORAGE_BLOCK: + for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) { + if (shProg->ProgramResourceList[i].Type == programInterface) { + struct gl_uniform_block *block = + (struct gl_uniform_block *) + shProg->ProgramResourceList[i].Data; + GLint block_params = 0; + for (unsigned j = 0; j < block->NumUniforms; j++) { + const char *iname = block->Uniforms[j].IndexName; + struct gl_program_resource *uni = + _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE, + iname, NULL); + if (!uni) + continue; + block_params++; + } + *params = MAX2(*params, block_params); + } + } + break; case GL_ATOMIC_COUNTER_BUFFER: for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) { if (shProg->ProgramResourceList[i].Type == programInterface) { diff --git a/src/mesa/main/rastpos.c b/src/mesa/main/rastpos.c index 54b2125a80f..b468219e688 100644 --- a/src/mesa/main/rastpos.c +++ b/src/mesa/main/rastpos.c @@ -36,6 +36,447 @@ #include "rastpos.h" #include "state.h" #include "main/dispatch.h" +#include "main/viewport.h" +#include "util/simple_list.h" + + + +/** + * Clip a point against the view volume. + * + * \param v vertex vector describing the point to clip. + * + * \return zero if outside view volume, or one if inside. + */ +static GLuint +viewclip_point_xy( const GLfloat v[] ) +{ + if ( v[0] > v[3] || v[0] < -v[3] + || v[1] > v[3] || v[1] < -v[3] ) { + return 0; + } + else { + return 1; + } +} + + +/** + * Clip a point against the far/near Z clipping planes. + * + * \param v vertex vector describing the point to clip. + * + * \return zero if outside view volume, or one if inside. + */ +static GLuint +viewclip_point_z( const GLfloat v[] ) +{ + if (v[2] > v[3] || v[2] < -v[3] ) { + return 0; + } + else { + return 1; + } +} + + +/** + * Clip a point against the user clipping planes. + * + * \param ctx GL context. + * \param v vertex vector describing the point to clip. + * + * \return zero if the point was clipped, or one otherwise. + */ +static GLuint +userclip_point( struct gl_context *ctx, const GLfloat v[] ) +{ + GLuint p; + + for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { + GLfloat dot = v[0] * ctx->Transform._ClipUserPlane[p][0] + + v[1] * ctx->Transform._ClipUserPlane[p][1] + + v[2] * ctx->Transform._ClipUserPlane[p][2] + + v[3] * ctx->Transform._ClipUserPlane[p][3]; + if (dot < 0.0F) { + return 0; + } + } + } + + return 1; +} + + +/** + * Compute lighting for the raster position. RGB modes computed. + * \param ctx the context + * \param vertex vertex location + * \param normal normal vector + * \param Rcolor returned color + * \param Rspec returned specular color (if separate specular enabled) + */ +static void +shade_rastpos(struct gl_context *ctx, + const GLfloat vertex[4], + const GLfloat normal[3], + GLfloat Rcolor[4], + GLfloat Rspec[4]) +{ + /*const*/ GLfloat (*base)[3] = ctx->Light._BaseColor; + const struct gl_light *light; + GLfloat diffuseColor[4], specularColor[4]; /* for RGB mode only */ + + COPY_3V(diffuseColor, base[0]); + diffuseColor[3] = CLAMP( + ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE][3], 0.0F, 1.0F ); + ASSIGN_4V(specularColor, 0.0, 0.0, 0.0, 1.0); + + foreach (light, &ctx->Light.EnabledList) { + GLfloat attenuation = 1.0; + GLfloat VP[3]; /* vector from vertex to light pos */ + GLfloat n_dot_VP; + GLfloat diffuseContrib[3], specularContrib[3]; + + if (!(light->_Flags & LIGHT_POSITIONAL)) { + /* light at infinity */ + COPY_3V(VP, light->_VP_inf_norm); + attenuation = light->_VP_inf_spot_attenuation; + } + else { + /* local/positional light */ + GLfloat d; + + /* VP = vector from vertex pos to light[i].pos */ + SUB_3V(VP, light->_Position, vertex); + /* d = length(VP) */ + d = (GLfloat) LEN_3FV( VP ); + if (d > 1.0e-6F) { + /* normalize VP */ + GLfloat invd = 1.0F / d; + SELF_SCALE_SCALAR_3V(VP, invd); + } + + /* atti */ + attenuation = 1.0F / (light->ConstantAttenuation + d * + (light->LinearAttenuation + d * + light->QuadraticAttenuation)); + + if (light->_Flags & LIGHT_SPOT) { + GLfloat PV_dot_dir = - DOT3(VP, light->_NormSpotDirection); + + if (PV_dot_dir<light->_CosCutoff) { + continue; + } + else { + GLfloat spot = powf(PV_dot_dir, light->SpotExponent); + attenuation *= spot; + } + } + } + + if (attenuation < 1e-3F) + continue; + + n_dot_VP = DOT3( normal, VP ); + + if (n_dot_VP < 0.0F) { + ACC_SCALE_SCALAR_3V(diffuseColor, attenuation, light->_MatAmbient[0]); + continue; + } + + /* Ambient + diffuse */ + COPY_3V(diffuseContrib, light->_MatAmbient[0]); + ACC_SCALE_SCALAR_3V(diffuseContrib, n_dot_VP, light->_MatDiffuse[0]); + + /* Specular */ + { + const GLfloat *h; + GLfloat n_dot_h; + + ASSIGN_3V(specularContrib, 0.0, 0.0, 0.0); + + if (ctx->Light.Model.LocalViewer) { + GLfloat v[3]; + COPY_3V(v, vertex); + NORMALIZE_3FV(v); + SUB_3V(VP, VP, v); + NORMALIZE_3FV(VP); + h = VP; + } + else if (light->_Flags & LIGHT_POSITIONAL) { + ACC_3V(VP, ctx->_EyeZDir); + NORMALIZE_3FV(VP); + h = VP; + } + else { + h = light->_h_inf_norm; + } + + n_dot_h = DOT3(normal, h); + + if (n_dot_h > 0.0F) { + GLfloat shine; + GLfloat spec_coef; + + shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0]; + spec_coef = powf(n_dot_h, shine); + + if (spec_coef > 1.0e-10F) { + if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) { + ACC_SCALE_SCALAR_3V( specularContrib, spec_coef, + light->_MatSpecular[0]); + } + else { + ACC_SCALE_SCALAR_3V( diffuseContrib, spec_coef, + light->_MatSpecular[0]); + } + } + } + } + + ACC_SCALE_SCALAR_3V( diffuseColor, attenuation, diffuseContrib ); + ACC_SCALE_SCALAR_3V( specularColor, attenuation, specularContrib ); + } + + Rcolor[0] = CLAMP(diffuseColor[0], 0.0F, 1.0F); + Rcolor[1] = CLAMP(diffuseColor[1], 0.0F, 1.0F); + Rcolor[2] = CLAMP(diffuseColor[2], 0.0F, 1.0F); + Rcolor[3] = CLAMP(diffuseColor[3], 0.0F, 1.0F); + Rspec[0] = CLAMP(specularColor[0], 0.0F, 1.0F); + Rspec[1] = CLAMP(specularColor[1], 0.0F, 1.0F); + Rspec[2] = CLAMP(specularColor[2], 0.0F, 1.0F); + Rspec[3] = CLAMP(specularColor[3], 0.0F, 1.0F); +} + + +/** + * Do texgen needed for glRasterPos. + * \param ctx rendering context + * \param vObj object-space vertex coordinate + * \param vEye eye-space vertex coordinate + * \param normal vertex normal + * \param unit texture unit number + * \param texcoord incoming texcoord and resulting texcoord + */ +static void +compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye[4], + const GLfloat normal[3], GLuint unit, GLfloat texcoord[4]) +{ + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + + /* always compute sphere map terms, just in case */ + GLfloat u[3], two_nu, rx, ry, rz, m, mInv; + COPY_3V(u, vEye); + NORMALIZE_3FV(u); + two_nu = 2.0F * DOT3(normal, u); + rx = u[0] - normal[0] * two_nu; + ry = u[1] - normal[1] * two_nu; + rz = u[2] - normal[2] * two_nu; + m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F); + if (m > 0.0F) + mInv = 0.5F * (1.0f / sqrtf(m)); + else + mInv = 0.0F; + + if (texUnit->TexGenEnabled & S_BIT) { + switch (texUnit->GenS.Mode) { + case GL_OBJECT_LINEAR: + texcoord[0] = DOT4(vObj, texUnit->GenS.ObjectPlane); + break; + case GL_EYE_LINEAR: + texcoord[0] = DOT4(vEye, texUnit->GenS.EyePlane); + break; + case GL_SPHERE_MAP: + texcoord[0] = rx * mInv + 0.5F; + break; + case GL_REFLECTION_MAP: + texcoord[0] = rx; + break; + case GL_NORMAL_MAP: + texcoord[0] = normal[0]; + break; + default: + _mesa_problem(ctx, "Bad S texgen in compute_texgen()"); + return; + } + } + + if (texUnit->TexGenEnabled & T_BIT) { + switch (texUnit->GenT.Mode) { + case GL_OBJECT_LINEAR: + texcoord[1] = DOT4(vObj, texUnit->GenT.ObjectPlane); + break; + case GL_EYE_LINEAR: + texcoord[1] = DOT4(vEye, texUnit->GenT.EyePlane); + break; + case GL_SPHERE_MAP: + texcoord[1] = ry * mInv + 0.5F; + break; + case GL_REFLECTION_MAP: + texcoord[1] = ry; + break; + case GL_NORMAL_MAP: + texcoord[1] = normal[1]; + break; + default: + _mesa_problem(ctx, "Bad T texgen in compute_texgen()"); + return; + } + } + + if (texUnit->TexGenEnabled & R_BIT) { + switch (texUnit->GenR.Mode) { + case GL_OBJECT_LINEAR: + texcoord[2] = DOT4(vObj, texUnit->GenR.ObjectPlane); + break; + case GL_EYE_LINEAR: + texcoord[2] = DOT4(vEye, texUnit->GenR.EyePlane); + break; + case GL_REFLECTION_MAP: + texcoord[2] = rz; + break; + case GL_NORMAL_MAP: + texcoord[2] = normal[2]; + break; + default: + _mesa_problem(ctx, "Bad R texgen in compute_texgen()"); + return; + } + } + + if (texUnit->TexGenEnabled & Q_BIT) { + switch (texUnit->GenQ.Mode) { + case GL_OBJECT_LINEAR: + texcoord[3] = DOT4(vObj, texUnit->GenQ.ObjectPlane); + break; + case GL_EYE_LINEAR: + texcoord[3] = DOT4(vEye, texUnit->GenQ.EyePlane); + break; + default: + _mesa_problem(ctx, "Bad Q texgen in compute_texgen()"); + return; + } + } +} + + +/** + * glRasterPos transformation. Typically called via ctx->Driver.RasterPos(). + * + * \param vObj vertex position in object space + */ +void +_mesa_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]) +{ + if (ctx->VertexProgram._Enabled) { + /* XXX implement this */ + _mesa_problem(ctx, "Vertex programs not implemented for glRasterPos"); + return; + } + else { + GLfloat eye[4], clip[4], ndc[3], d; + GLfloat *norm, eyenorm[3]; + GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL]; + float scale[3], translate[3]; + + /* apply modelview matrix: eye = MV * obj */ + TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj ); + /* apply projection matrix: clip = Proj * eye */ + TRANSFORM_POINT( clip, ctx->ProjectionMatrixStack.Top->m, eye ); + + /* clip to view volume. */ + if (!ctx->Transform.DepthClamp) { + if (viewclip_point_z(clip) == 0) { + ctx->Current.RasterPosValid = GL_FALSE; + return; + } + } + if (!ctx->Transform.RasterPositionUnclipped) { + if (viewclip_point_xy(clip) == 0) { + ctx->Current.RasterPosValid = GL_FALSE; + return; + } + } + + /* clip to user clipping planes */ + if (ctx->Transform.ClipPlanesEnabled && !userclip_point(ctx, clip)) { + ctx->Current.RasterPosValid = GL_FALSE; + return; + } + + /* ndc = clip / W */ + d = (clip[3] == 0.0F) ? 1.0F : 1.0F / clip[3]; + ndc[0] = clip[0] * d; + ndc[1] = clip[1] * d; + ndc[2] = clip[2] * d; + /* wincoord = viewport_mapping(ndc) */ + _mesa_get_viewport_xform(ctx, 0, scale, translate); + ctx->Current.RasterPos[0] = ndc[0] * scale[0] + translate[0]; + ctx->Current.RasterPos[1] = ndc[1] * scale[1] + translate[1]; + ctx->Current.RasterPos[2] = ndc[2] * scale[2] + translate[2]; + ctx->Current.RasterPos[3] = clip[3]; + + if (ctx->Transform.DepthClamp) { + ctx->Current.RasterPos[3] = CLAMP(ctx->Current.RasterPos[3], + ctx->ViewportArray[0].Near, + ctx->ViewportArray[0].Far); + } + + /* compute raster distance */ + if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT) + ctx->Current.RasterDistance = ctx->Current.Attrib[VERT_ATTRIB_FOG][0]; + else + ctx->Current.RasterDistance = + sqrtf( eye[0]*eye[0] + eye[1]*eye[1] + eye[2]*eye[2] ); + + /* compute transformed normal vector (for lighting or texgen) */ + if (ctx->_NeedEyeCoords) { + const GLfloat *inv = ctx->ModelviewMatrixStack.Top->inv; + TRANSFORM_NORMAL( eyenorm, objnorm, inv ); + norm = eyenorm; + } + else { + norm = objnorm; + } + + /* update raster color */ + if (ctx->Light.Enabled) { + /* lighting */ + shade_rastpos( ctx, vObj, norm, + ctx->Current.RasterColor, + ctx->Current.RasterSecondaryColor ); + } + else { + /* use current color */ + COPY_4FV(ctx->Current.RasterColor, + ctx->Current.Attrib[VERT_ATTRIB_COLOR0]); + COPY_4FV(ctx->Current.RasterSecondaryColor, + ctx->Current.Attrib[VERT_ATTRIB_COLOR1]); + } + + /* texture coords */ + { + GLuint u; + for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { + GLfloat tc[4]; + COPY_4V(tc, ctx->Current.Attrib[VERT_ATTRIB_TEX0 + u]); + if (ctx->Texture.Unit[u].TexGenEnabled) { + compute_texgen(ctx, vObj, eye, norm, u, tc); + } + TRANSFORM_POINT(ctx->Current.RasterTexCoords[u], + ctx->TextureMatrixStack[u].Top->m, tc); + } + } + + ctx->Current.RasterPosValid = GL_TRUE; + } + + if (ctx->RenderMode == GL_SELECT) { + _mesa_update_hitflag( ctx, ctx->Current.RasterPos[2] ); + } +} /** diff --git a/src/mesa/main/rastpos.h b/src/mesa/main/rastpos.h index dc28c68d41b..90b8f957b9f 100644 --- a/src/mesa/main/rastpos.h +++ b/src/mesa/main/rastpos.h @@ -41,6 +41,9 @@ struct gl_context; extern void _mesa_init_rastpos(struct gl_context *ctx); +void +_mesa_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]); + void GLAPIENTRY _mesa_RasterPos2d(GLdouble x, GLdouble y); void GLAPIENTRY diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 8182d3dcc04..dd51bba3386 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -543,13 +543,55 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg, /* Resource basename. */ const char *rname = _mesa_program_resource_name(res); unsigned baselen = strlen(rname); + unsigned baselen_without_array_index = baselen; + const char *rname_last_square_bracket = strrchr(rname, '['); + bool found = false; + bool rname_has_array_index_zero = false; + /* From ARB_program_interface_query spec: + * + * "uint GetProgramResourceIndex(uint program, enum programInterface, + * const char *name); + * [...] + * If <name> exactly matches the name string of one of the active + * resources for <programInterface>, the index of the matched resource is + * returned. Additionally, if <name> would exactly match the name string + * of an active resource if "[0]" were appended to <name>, the index of + * the matched resource is returned. [...]" + * + * "A string provided to GetProgramResourceLocation or + * GetProgramResourceLocationIndex is considered to match an active variable + * if: + * + * * the string exactly matches the name of the active variable; + * + * * if the string identifies the base name of an active array, where the + * string would exactly match the name of the variable if the suffix + * "[0]" were appended to the string; [...]" + */ + /* Remove array's index from interface block name comparison only if + * array's index is zero and the resulting string length is the same + * than the provided name's length. + */ + if (rname_last_square_bracket) { + baselen_without_array_index -= strlen(rname_last_square_bracket); + rname_has_array_index_zero = + (strncmp(rname_last_square_bracket, "[0]\0", 4) == 0) && + (baselen_without_array_index == strlen(name)); + } + + if (strncmp(rname, name, baselen) == 0) + found = true; + else if (rname_has_array_index_zero && + strncmp(rname, name, baselen_without_array_index) == 0) + found = true; - if (strncmp(rname, name, baselen) == 0) { + if (found) { switch (programInterface) { case GL_UNIFORM_BLOCK: case GL_SHADER_STORAGE_BLOCK: /* Basename match, check if array or struct. */ - if (name[baselen] == '\0' || + if (rname_has_array_index_zero || + name[baselen] == '\0' || name[baselen] == '[' || name[baselen] == '.') { return res; @@ -627,6 +669,20 @@ _mesa_program_resource_index(struct gl_shader_program *shProg, } } +/** + * Find a program resource that points to given data. + */ +static struct gl_program_resource* +program_resource_find_data(struct gl_shader_program *shProg, void *data) +{ + struct gl_program_resource *res = shProg->ProgramResourceList; + for (unsigned i = 0; i < shProg->NumProgramResourceList; i++, res++) { + if (res->Data == data) + return res; + } + return NULL; +} + /* Find a program resource with specific index in given interface. */ struct gl_program_resource * @@ -808,6 +864,14 @@ program_resource_location(struct gl_shader_program *shProg, if (RESOURCE_UNI(res)->builtin) return -1; + /* From page 79 of the OpenGL 4.2 spec: + * + * "A valid name cannot be a structure, an array of structures, or any + * portion of a single vector or a matrix." + */ + if (RESOURCE_UNI(res)->type->without_array()->is_record()) + return -1; + /* From the GL_ARB_uniform_buffer_object spec: * * "The value -1 will be returned if <name> does not correspond to an @@ -1016,8 +1080,18 @@ get_buffer_property(struct gl_shader_program *shProg, *val = RESOURCE_ATC(res)->NumUniforms; return 1; case GL_ACTIVE_VARIABLES: - for (unsigned i = 0; i < RESOURCE_ATC(res)->NumUniforms; i++) - *val++ = RESOURCE_ATC(res)->Uniforms[i]; + for (unsigned i = 0; i < RESOURCE_ATC(res)->NumUniforms; i++) { + /* Active atomic buffer contains index to UniformStorage. Find + * out gl_program_resource via data pointer and then calculate + * index of that uniform. + */ + unsigned idx = RESOURCE_ATC(res)->Uniforms[i]; + struct gl_program_resource *uni = + program_resource_find_data(shProg, + &shProg->UniformStorage[idx]); + assert(uni); + *val++ = _mesa_program_resource_index(shProg, uni); + } return RESOURCE_ATC(res)->NumUniforms; } } diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 765602e50db..ac40891f435 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -630,9 +630,16 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, case GL_ACTIVE_ATTRIBUTE_MAX_LENGTH: *params = _mesa_longest_attribute_name_length(shProg); return; - case GL_ACTIVE_UNIFORMS: - *params = shProg->NumUniformStorage - shProg->NumHiddenUniforms; + case GL_ACTIVE_UNIFORMS: { + unsigned i; + const unsigned num_uniforms = + shProg->NumUniformStorage - shProg->NumHiddenUniforms; + for (*params = 0, i = 0; i < num_uniforms; i++) { + if (!shProg->UniformStorage[i].is_shader_storage) + (*params)++; + } return; + } case GL_ACTIVE_UNIFORM_MAX_LENGTH: { unsigned i; GLint max_len = 0; @@ -640,6 +647,9 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, shProg->NumUniformStorage - shProg->NumHiddenUniforms; for (i = 0; i < num_uniforms; i++) { + if (shProg->UniformStorage[i].is_shader_storage) + continue; + /* Add one for the terminating NUL character for a non-array, and * 4 for the "[0]" and the NUL for an array. */ diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index 84973d3fe5d..a8ac19e40d7 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -243,28 +243,6 @@ _mesa_gl_compressed_format_base_format(GLenum format) * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and * GL_COMPRESSED_TEXTURE_FORMATS return." * - * The KHR_texture_compression_astc_hdr spec says: - * - * "Interactions with OpenGL 4.2 - * - * OpenGL 4.2 supports the feature that compressed textures can be - * compressed online, by passing the compressed texture format enum as - * the internal format when uploading a texture using TexImage1D, - * TexImage2D or TexImage3D (see Section 3.9.3, Texture Image - * Specification, subsection Encoding of Special Internal Formats). - * - * Due to the complexity of the ASTC compression algorithm, it is not - * usually suitable for online use, and therefore ASTC support will be - * limited to pre-compressed textures only. Where on-device compression - * is required, a domain-specific limited compressor will typically - * be used, and this is therefore not suitable for implementation in - * the driver. - * - * In particular, the ASTC format specifiers will not be added to - * Table 3.14, and thus will not be accepted by the TexImage*D - * functions, and will not be returned by the (already deprecated) - * COMPRESSED_TEXTURE_FORMATS query." - * * There is no formal spec for GL_ATI_texture_compression_3dc. Since the * formats added by this extension are luminance-alpha formats, it is * reasonable to expect them to follow the same rules as @@ -286,7 +264,8 @@ GLuint _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats) { GLuint n = 0; - if (ctx->Extensions.TDFX_texture_compression_FXT1) { + if (_mesa_is_desktop_gl(ctx) && + ctx->Extensions.TDFX_texture_compression_FXT1) { if (formats) { formats[n++] = GL_COMPRESSED_RGB_FXT1_3DFX; formats[n++] = GL_COMPRESSED_RGBA_FXT1_3DFX; @@ -396,6 +375,69 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats) n += 10; } } + + /* The KHR_texture_compression_astc_hdr spec says: + * + * "Interactions with OpenGL 4.2 + * + * OpenGL 4.2 supports the feature that compressed textures can be + * compressed online, by passing the compressed texture format enum as + * the internal format when uploading a texture using TexImage1D, + * TexImage2D or TexImage3D (see Section 3.9.3, Texture Image + * Specification, subsection Encoding of Special Internal Formats). + * + * Due to the complexity of the ASTC compression algorithm, it is not + * usually suitable for online use, and therefore ASTC support will be + * limited to pre-compressed textures only. Where on-device compression + * is required, a domain-specific limited compressor will typically + * be used, and this is therefore not suitable for implementation in + * the driver. + * + * In particular, the ASTC format specifiers will not be added to + * Table 3.14, and thus will not be accepted by the TexImage*D + * functions, and will not be returned by the (already deprecated) + * COMPRESSED_TEXTURE_FORMATS query." + * + * The ES and the desktop specs diverge here. In OpenGL ES, the COMPRESSED_TEXTURE_FORMATS + * query returns the set of supported specific compressed formats. + */ + if (ctx->API == API_OPENGLES2 && + ctx->Extensions.KHR_texture_compression_astc_ldr) { + if (formats) { + formats[n++] = GL_COMPRESSED_RGBA_ASTC_4x4_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_5x4_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_5x5_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_6x5_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_6x6_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x5_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x6_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x8_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x5_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x6_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x8_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x10_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_12x10_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_12x12_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR; + } + else { + n += 28; + } + } + return n; } diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 682b72755c7..945890aeeb5 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -297,8 +297,7 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions, uint8_t rebaseSwizzle[4]; /* Decompress into temp float buffer, then pack into user buffer */ - tempImage = malloc(width * height * depth - * 4 * sizeof(GLfloat)); + tempImage = malloc(width * height * depth * 4 * sizeof(GLfloat)); if (!tempImage) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()"); return; diff --git a/src/mesa/main/vdpau.c b/src/mesa/main/vdpau.c index 0efa56e4f41..44be3a37443 100644 --- a/src/mesa/main/vdpau.c +++ b/src/mesa/main/vdpau.c @@ -163,9 +163,10 @@ register_surface(struct gl_context *ctx, GLboolean isOutput, return (GLintptr)NULL; } - if (tex->Target == 0) + if (tex->Target == 0) { tex->Target = target; - else if (tex->Target != target) { + tex->TargetIndex = _mesa_tex_target_to_index(ctx, target); + } else if (tex->Target != target) { _mesa_unlock_texture(ctx, tex); free(surf); _mesa_error(ctx, GL_INVALID_OPERATION, diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index acaa85d9356..20f8b3df99d 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -73,7 +73,8 @@ void st_upload_constants( struct st_context *st, * the parameters list are explicitly set by the user with glUniform, * glProgramParameter(), etc. */ - _mesa_load_state_parameters(st->ctx, params); + if (params->StateFlags) + _mesa_load_state_parameters(st->ctx, params); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. diff --git a/src/mesa/state_tracker/st_cb_copyimage.c b/src/mesa/state_tracker/st_cb_copyimage.c new file mode 100644 index 00000000000..75114cdb712 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_copyimage.c @@ -0,0 +1,582 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "state_tracker/st_context.h" +#include "state_tracker/st_cb_copyimage.h" +#include "state_tracker/st_cb_fbo.h" +#include "state_tracker/st_texture.h" + +#include "util/u_box.h" +#include "util/u_format.h" +#include "util/u_inlines.h" + + +/** + * Return an equivalent canonical format without "X" channels. + * + * Copying between incompatible formats is easier when the format is + * canonicalized, meaning that it is in a standard form. + * + * The returned format has the same component sizes and swizzles as + * the source format, the type is changed to UINT or UNORM, depending on + * which one has the most swizzle combinations in their group. + * + * If it's not an array format, return a memcpy-equivalent array format. + * + * The key feature is that swizzled versions of formats of the same + * component size always return the same component type. + * + * X returns A. + * Luminance, intensity, alpha, depth, stencil, and 8-bit and 16-bit packed + * formats are not supported. (same as ARB_copy_image) + */ +static enum pipe_format +get_canonical_format(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + /* Packed formats. Return the equivalent array format. */ + if (format == PIPE_FORMAT_R11G11B10_FLOAT || + format == PIPE_FORMAT_R9G9B9E5_FLOAT) + return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT); + + if (desc->nr_channels == 4 && + desc->channel[0].size == 10 && + desc->channel[1].size == 10 && + desc->channel[2].size == 10 && + desc->channel[3].size == 2) { + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_Y && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_Z) + return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT); + + return PIPE_FORMAT_NONE; + } + +#define RETURN_FOR_SWIZZLE1(x, format) \ + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x) \ + return format + +#define RETURN_FOR_SWIZZLE2(x, y, format) \ + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \ + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y) \ + return format + +#define RETURN_FOR_SWIZZLE3(x, y, z, format) \ + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \ + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y && \ + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_##z) \ + return format + +#define RETURN_FOR_SWIZZLE4(x, y, z, w, format) \ + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \ + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y && \ + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_##z && \ + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_##w) \ + return format + + /* Array formats. */ + if (desc->is_array) { + switch (desc->nr_channels) { + case 1: + switch (desc->channel[0].size) { + case 8: + RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R8_UINT); + break; + + case 16: + RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R16_UINT); + break; + + case 32: + RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R32_UINT); + break; + } + break; + + case 2: + switch (desc->channel[0].size) { + case 8: + /* All formats in each group must be of the same type. + * We can't use UINT for R8G8 while using UNORM for G8R8. + */ + RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R8G8_UNORM); + RETURN_FOR_SWIZZLE2(Y, X, PIPE_FORMAT_G8R8_UNORM); + break; + + case 16: + RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R16G16_UNORM); + RETURN_FOR_SWIZZLE2(Y, X, PIPE_FORMAT_G16R16_UNORM); + break; + + case 32: + RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R32G32_UINT); + break; + } + break; + + case 3: + switch (desc->channel[0].size) { + case 8: + RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R8G8B8_UINT); + break; + + case 16: + RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R16G16B16_UINT); + break; + + case 32: + RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R32G32B32_UINT); + break; + } + break; + + case 4: + switch (desc->channel[0].size) { + case 8: + RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R8G8B8A8_UNORM); + RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R8G8B8A8_UNORM); + RETURN_FOR_SWIZZLE4(Z, Y, X, W, PIPE_FORMAT_B8G8R8A8_UNORM); + RETURN_FOR_SWIZZLE4(Z, Y, X, 1, PIPE_FORMAT_B8G8R8A8_UNORM); + RETURN_FOR_SWIZZLE4(W, Z, Y, X, PIPE_FORMAT_A8B8G8R8_UNORM); + RETURN_FOR_SWIZZLE4(1, Z, Y, X, PIPE_FORMAT_A8B8G8R8_UNORM); + RETURN_FOR_SWIZZLE4(W, X, Y, Z, PIPE_FORMAT_A8R8G8B8_UNORM); + RETURN_FOR_SWIZZLE4(1, X, Y, Z, PIPE_FORMAT_A8R8G8B8_UNORM); + break; + + case 16: + RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R16G16B16A16_UINT); + RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R16G16B16A16_UINT); + break; + + case 32: + RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R32G32B32A32_UINT); + RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R32G32B32A32_UINT); + break; + } + } + + assert(!"unknown array format"); + return PIPE_FORMAT_NONE; + } + + assert(!"unknown packed format"); + return PIPE_FORMAT_NONE; +} + +/** + * Return true if the swizzle is XYZW in case of a 4-channel format, + * XY in case of a 2-channel format, or X in case of a 1-channel format. + */ +static bool +has_identity_swizzle(const struct util_format_description *desc) +{ + int i; + + for (i = 0; i < desc->nr_channels; i++) + if (desc->swizzle[i] != UTIL_FORMAT_SWIZZLE_X + i) + return false; + + return true; +} + +/** + * Return a canonical format for the given bits and channel size. + */ +static enum pipe_format +canonical_format_from_bits(unsigned bits, unsigned channel_size) +{ + switch (bits) { + case 8: + if (channel_size == 8) + return get_canonical_format(PIPE_FORMAT_R8_UINT); + break; + + case 16: + if (channel_size == 8) + return get_canonical_format(PIPE_FORMAT_R8G8_UINT); + if (channel_size == 16) + return get_canonical_format(PIPE_FORMAT_R16_UINT); + break; + + case 32: + if (channel_size == 8) + return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT); + if (channel_size == 16) + return get_canonical_format(PIPE_FORMAT_R16G16_UINT); + if (channel_size == 32) + return get_canonical_format(PIPE_FORMAT_R32_UINT); + break; + + case 64: + if (channel_size == 16) + return get_canonical_format(PIPE_FORMAT_R16G16B16A16_UINT); + if (channel_size == 32) + return get_canonical_format(PIPE_FORMAT_R32G32_UINT); + break; + + case 128: + if (channel_size == 32) + return get_canonical_format(PIPE_FORMAT_R32G32B32A32_UINT); + break; + } + + assert(!"impossible format"); + return PIPE_FORMAT_NONE; +} + +static void +blit(struct pipe_context *pipe, + struct pipe_resource *dst, + enum pipe_format dst_format, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + enum pipe_format src_format, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct pipe_blit_info blit = {{0}}; + + blit.src.resource = src; + blit.dst.resource = dst; + blit.src.format = src_format; + blit.dst.format = dst_format; + blit.src.level = src_level; + blit.dst.level = dst_level; + blit.src.box = *src_box; + u_box_3d(dstx, dsty, dstz, src_box->width, src_box->height, + src_box->depth, &blit.dst.box); + blit.mask = PIPE_MASK_RGBA; + blit.filter = PIPE_TEX_FILTER_NEAREST; + + pipe->blit(pipe, &blit); +} + +static void +swizzled_copy(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + const struct util_format_description *src_desc, *dst_desc; + unsigned bits; + enum pipe_format blit_src_format, blit_dst_format; + + /* Get equivalent canonical formats. Those are always array formats and + * copying between compatible canonical formats behaves either like + * memcpy or like swizzled memcpy. The idea is that we won't have to care + * about the channel type from this point on. + * Only the swizzle and channel size. + */ + blit_src_format = get_canonical_format(src->format); + blit_dst_format = get_canonical_format(dst->format); + + assert(blit_src_format != PIPE_FORMAT_NONE); + assert(blit_dst_format != PIPE_FORMAT_NONE); + + src_desc = util_format_description(blit_src_format); + dst_desc = util_format_description(blit_dst_format); + + assert(src_desc->block.bits == dst_desc->block.bits); + bits = src_desc->block.bits; + + if (dst_desc->channel[0].size == src_desc->channel[0].size) { + /* Only the swizzle is different, which means we can just blit, + * e.g. RGBA -> BGRA. + */ + } else if (has_identity_swizzle(src_desc)) { + /* Src is unswizzled and dst can be swizzled, so src is typecast + * to an equivalent dst-compatible format. + * e.g. R32 -> BGRA8 is realized as RGBA8 -> BGRA8 + */ + blit_src_format = + canonical_format_from_bits(bits, dst_desc->channel[0].size); + } else if (has_identity_swizzle(dst_desc)) { + /* Dst is unswizzled and src can be swizzled, so dst is typecast + * to an equivalent src-compatible format. + * e.g. BGRA8 -> R32 is realized as BGRA8 -> RGBA8 + */ + blit_dst_format = + canonical_format_from_bits(bits, src_desc->channel[0].size); + } else { + assert(!"This should have been handled by handle_complex_copy."); + return; + } + + blit(pipe, dst, blit_dst_format, dst_level, dstx, dsty, dstz, + src, blit_src_format, src_level, src_box); +} + +static bool +same_size_and_swizzle(const struct util_format_description *d1, + const struct util_format_description *d2) +{ + int i; + + if (d1->layout != d2->layout || + d1->nr_channels != d2->nr_channels || + d1->is_array != d2->is_array) + return false; + + for (i = 0; i < d1->nr_channels; i++) { + if (d1->channel[i].size != d2->channel[i].size) + return false; + + if (d1->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W && + d2->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W && + d1->swizzle[i] != d2->swizzle[i]) + return false; + } + + return true; +} + +static struct pipe_resource * +create_texture(struct pipe_screen *screen, enum pipe_format format, + unsigned nr_samples, + unsigned width, unsigned height, unsigned depth) +{ + struct pipe_resource templ; + + memset(&templ, 0, sizeof(templ)); + templ.format = format; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; + templ.array_size = depth; + templ.nr_samples = nr_samples; + templ.usage = PIPE_USAGE_DEFAULT; + templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; + + if (depth > 1) + templ.target = PIPE_TEXTURE_2D_ARRAY; + else + templ.target = PIPE_TEXTURE_2D; + + return screen->resource_create(screen, &templ); +} + +/** + * Handle complex format conversions using 2 blits with a temporary texture + * in between, e.g. blitting from B10G10R10A2 to G16R16. + * + * This example is implemented this way: + * 1) First, blit from B10G10R10A2 to R10G10B10A2, which is canonical, so it + * can be reinterpreted as a different canonical format of the same bpp, + * such as R16G16. This blit only swaps R and B 10-bit components. + * 2) Finally, blit the result, which is R10G10B10A2, as R16G16 to G16R16. + * This blit only swaps R and G 16-bit components. + */ +static bool +handle_complex_copy(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box, + enum pipe_format noncanon_format, + enum pipe_format canon_format) +{ + struct pipe_box temp_box; + struct pipe_resource *temp = NULL; + const struct util_format_description *src_desc, *dst_desc; + const struct util_format_description *canon_desc, *noncanon_desc; + bool src_is_canon; + bool src_is_noncanon; + bool dst_is_canon; + bool dst_is_noncanon; + + src_desc = util_format_description(src->format); + dst_desc = util_format_description(dst->format); + canon_desc = util_format_description(canon_format); + noncanon_desc = util_format_description(noncanon_format); + + src_is_canon = same_size_and_swizzle(src_desc, canon_desc); + dst_is_canon = same_size_and_swizzle(dst_desc, canon_desc); + src_is_noncanon = same_size_and_swizzle(src_desc, noncanon_desc); + dst_is_noncanon = same_size_and_swizzle(dst_desc, noncanon_desc); + + if (src_is_noncanon) { + /* Simple case - only types differ (e.g. UNORM and UINT). */ + if (dst_is_noncanon) { + blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, src, + noncanon_format, src_level, src_box); + return true; + } + + /* Simple case - only types and swizzles differ. */ + if (dst_is_canon) { + blit(pipe, dst, canon_format, dst_level, dstx, dsty, dstz, src, + noncanon_format, src_level, src_box); + return true; + } + + /* Use the temporary texture. Src is converted to a canonical format, + * then proceed the generic swizzled_copy. + */ + temp = create_texture(pipe->screen, canon_format, src->nr_samples, + src_box->width, + src_box->height, src_box->depth); + + u_box_3d(0, 0, 0, src_box->width, src_box->height, src_box->depth, + &temp_box); + + blit(pipe, temp, canon_format, 0, 0, 0, 0, src, noncanon_format, + src_level, src_box); + swizzled_copy(pipe, dst, dst_level, dstx, dsty, dstz, temp, 0, + &temp_box); + pipe_resource_reference(&temp, NULL); + return true; + } + + if (dst_is_noncanon) { + /* Simple case - only types and swizzles differ. */ + if (src_is_canon) { + blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, src, + canon_format, src_level, src_box); + return true; + } + + /* Use the temporary texture. First, use the generic copy, but use + * a canonical format in the destination. Then convert */ + temp = create_texture(pipe->screen, canon_format, dst->nr_samples, + src_box->width, + src_box->height, src_box->depth); + + u_box_3d(0, 0, 0, src_box->width, src_box->height, src_box->depth, + &temp_box); + + swizzled_copy(pipe, temp, 0, 0, 0, 0, src, src_level, src_box); + blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, temp, + canon_format, 0, &temp_box); + pipe_resource_reference(&temp, NULL); + return true; + } + + return false; +} + +static void +copy_image(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + if (src->format == dst->format || + util_format_is_compressed(src->format) || + util_format_is_compressed(dst->format)) { + pipe->resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + + /* Copying to/from B10G10R10*2 needs 2 blits with R10G10B10A2 + * as a temporary texture in between. + */ + if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src, + src_level, src_box, PIPE_FORMAT_B10G10R10A2_UINT, + PIPE_FORMAT_R10G10B10A2_UINT)) + return; + + /* Copying to/from G8R8 needs 2 blits with R8G8 as a temporary texture + * in between. + */ + if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src, + src_level, src_box, PIPE_FORMAT_G8R8_UNORM, + PIPE_FORMAT_R8G8_UNORM)) + return; + + /* Copying to/from G16R16 needs 2 blits with R16G16 as a temporary texture + * in between. + */ + if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src, + src_level, src_box, PIPE_FORMAT_G16R16_UNORM, + PIPE_FORMAT_R16G16_UNORM)) + return; + + /* Only allow non-identity swizzling on RGBA8 formats. */ + + /* Simple copy, memcpy with swizzling, no format conversion. */ + swizzled_copy(pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, + src_box); +} + +static void +st_CopyImageSubData(struct gl_context *ctx, + struct gl_texture_image *src_image, + struct gl_renderbuffer *src_renderbuffer, + int src_x, int src_y, int src_z, + struct gl_texture_image *dst_image, + struct gl_renderbuffer *dst_renderbuffer, + int dst_x, int dst_y, int dst_z, + int src_width, int src_height) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_resource *src_res, *dst_res; + struct pipe_box box; + int src_level, dst_level; + + if (src_image) { + struct st_texture_image *src = st_texture_image(src_image); + src_res = src->pt; + src_level = src_image->Level; + src_z += src_image->Face; + } else { + struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer); + src_res = src->texture; + src_level = 0; + } + + if (dst_image) { + struct st_texture_image *dst = st_texture_image(dst_image); + dst_res = dst->pt; + dst_level = dst_image->Level; + dst_z += dst_image->Face; + } else { + struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer); + dst_res = dst->texture; + dst_level = 0; + } + + u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box); + + copy_image(pipe, dst_res, dst_level, dst_x, dst_y, dst_z, + src_res, src_level, &box); +} + +void +st_init_copy_image_functions(struct dd_function_table *functions) +{ + functions->CopyImageSubData = st_CopyImageSubData; +} diff --git a/src/mesa/state_tracker/st_cb_copyimage.h b/src/mesa/state_tracker/st_cb_copyimage.h new file mode 100644 index 00000000000..d17f35c0953 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_copyimage.h @@ -0,0 +1,33 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef ST_CB_COPY_IMAGE_H +#define ST_CB_COPY_IMAGE_H + +struct dd_function_table; + +extern void +st_init_copy_image_functions(struct dd_function_table *functions); + +#endif /* ST_CB_COPY_IMAGE_H */ diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index b9997dacfd2..747b41464ae 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -39,6 +39,7 @@ #include "main/imports.h" #include "main/macros.h" #include "main/feedback.h" +#include "main/rastpos.h" #include "st_context.h" #include "st_atom.h" @@ -224,6 +225,15 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4]) struct rastpos_stage *rs; const struct gl_client_array **saved_arrays = ctx->Array._DrawArrays; + if (ctx->VertexProgram._Current == NULL || + ctx->VertexProgram._Current == ctx->VertexProgram._TnlProgram) { + /* No vertex shader/program is enabled, used the simple/fast fixed- + * function implementation of RasterPos. + */ + _mesa_RasterPos(ctx, v); + return; + } + if (st->rastpos_stage) { /* get rastpos stage info */ rs = rastpos_stage(st->rastpos_stage); diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 5d25fed317e..d4c916e8057 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1873,55 +1873,6 @@ st_TextureView(struct gl_context *ctx, return GL_TRUE; } -/* HACK: this is only enough for the most basic uses of CopyImage. Must fix - * before actually exposing the extension. - */ -static void -st_CopyImageSubData(struct gl_context *ctx, - struct gl_texture_image *src_image, - struct gl_renderbuffer *src_renderbuffer, - int src_x, int src_y, int src_z, - struct gl_texture_image *dst_image, - struct gl_renderbuffer *dst_renderbuffer, - int dst_x, int dst_y, int dst_z, - int src_width, int src_height) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_resource *src_res, *dst_res; - struct pipe_box box; - int src_level, dst_level; - - if (src_image) { - struct st_texture_image *src = st_texture_image(src_image); - src_res = src->pt; - src_level = src_image->Level; - } - else { - struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer); - src_res = src->texture; - src_level = 0; - } - - if (dst_image) { - struct st_texture_image *dst = st_texture_image(dst_image); - dst_res = dst->pt; - dst_level = dst_image->Level; - } - else { - struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer); - dst_res = dst->texture; - dst_level = 0; - } - - u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box); - pipe->resource_copy_region(pipe, dst_res, dst_level, - dst_x, dst_y, dst_z, - src_res, src_level, - &box); -} - - void st_init_texture_functions(struct dd_function_table *functions) { @@ -1953,6 +1904,4 @@ st_init_texture_functions(struct dd_function_table *functions) functions->AllocTextureStorage = st_AllocTextureStorage; functions->TextureView = st_TextureView; - - functions->CopyImageSubData = st_CopyImageSubData; } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 5abb17385c2..6e20fd1fda2 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -44,6 +44,7 @@ #include "st_cb_bufferobjects.h" #include "st_cb_clear.h" #include "st_cb_condrender.h" +#include "st_cb_copyimage.h" #include "st_cb_drawpixels.h" #include "st_cb_rasterpos.h" #include "st_cb_drawtex.h" @@ -430,6 +431,7 @@ void st_init_driver_functions(struct pipe_screen *screen, st_init_bufferobject_functions(functions); st_init_clear_functions(functions); st_init_bitmap_functions(functions); + st_init_copy_image_functions(functions); st_init_drawpixels_functions(functions); st_init_rasterpos_functions(functions); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index d4724b46e0a..bd7cbccc20c 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -439,6 +439,7 @@ void st_init_extensions(struct pipe_screen *screen, { o(ARB_base_instance), PIPE_CAP_START_INSTANCE }, { o(ARB_buffer_storage), PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT }, { o(ARB_color_buffer_float), PIPE_CAP_VERTEX_COLOR_UNCLAMPED }, + { o(ARB_copy_image), PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS }, { o(ARB_depth_clamp), PIPE_CAP_DEPTH_CLIP_DISABLE }, { o(ARB_depth_texture), PIPE_CAP_TEXTURE_SHADOW_MAP }, { o(ARB_draw_buffers_blend), PIPE_CAP_INDEP_BLEND_FUNC }, diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 26e1c21f6c5..b3700406df0 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -61,6 +61,8 @@ compute_num_levels(struct gl_context *ctx, numLevels = texObj->BaseLevel + baseImage->MaxNumLevels; numLevels = MIN2(numLevels, (GLuint) texObj->MaxLevel + 1); + if (texObj->Immutable) + numLevels = MIN2(numLevels, texObj->NumLevels); assert(numLevels >= 1); return numLevels; @@ -99,38 +101,40 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, */ stObj->lastLevel = lastLevel; - if (pt->last_level < lastLevel) { - /* The current gallium texture doesn't have space for all the - * mipmap levels we need to generate. So allocate a new texture. - */ - struct pipe_resource *oldTex = stObj->pt; - - /* create new texture with space for more levels */ - stObj->pt = st_texture_create(st, - oldTex->target, - oldTex->format, - lastLevel, - oldTex->width0, - oldTex->height0, - oldTex->depth0, - oldTex->array_size, - 0, - oldTex->bind); - - /* This will copy the old texture's base image into the new texture - * which we just allocated. - */ - st_finalize_texture(ctx, st->pipe, texObj); - - /* release the old tex (will likely be freed too) */ - pipe_resource_reference(&oldTex, NULL); - st_texture_release_all_sampler_views(st, stObj); - } - else { - /* Make sure that the base texture image data is present in the - * texture buffer. - */ - st_finalize_texture(ctx, st->pipe, texObj); + if (!texObj->Immutable) { + if (pt->last_level < lastLevel) { + /* The current gallium texture doesn't have space for all the + * mipmap levels we need to generate. So allocate a new texture. + */ + struct pipe_resource *oldTex = stObj->pt; + + /* create new texture with space for more levels */ + stObj->pt = st_texture_create(st, + oldTex->target, + oldTex->format, + lastLevel, + oldTex->width0, + oldTex->height0, + oldTex->depth0, + oldTex->array_size, + 0, + oldTex->bind); + + /* This will copy the old texture's base image into the new texture + * which we just allocated. + */ + st_finalize_texture(ctx, st->pipe, texObj); + + /* release the old tex (will likely be freed too) */ + pipe_resource_reference(&oldTex, NULL); + st_texture_release_all_sampler_views(st, stObj); + } + else { + /* Make sure that the base texture image data is present in the + * texture buffer. + */ + st_finalize_texture(ctx, st->pipe, texObj); + } } pt = stObj->pt; diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c deleted file mode 100644 index 4bd9ac8539e..00000000000 --- a/src/mesa/tnl/t_rasterpos.c +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "c99_math.h" -#include "main/glheader.h" -#include "main/feedback.h" -#include "main/light.h" -#include "main/macros.h" -#include "util/simple_list.h" -#include "main/mtypes.h" -#include "main/viewport.h" - -#include "math/m_matrix.h" -#include "tnl/tnl.h" - - - -/** - * Clip a point against the view volume. - * - * \param v vertex vector describing the point to clip. - * - * \return zero if outside view volume, or one if inside. - */ -static GLuint -viewclip_point_xy( const GLfloat v[] ) -{ - if ( v[0] > v[3] || v[0] < -v[3] - || v[1] > v[3] || v[1] < -v[3] ) { - return 0; - } - else { - return 1; - } -} - - -/** - * Clip a point against the far/near Z clipping planes. - * - * \param v vertex vector describing the point to clip. - * - * \return zero if outside view volume, or one if inside. - */ -static GLuint -viewclip_point_z( const GLfloat v[] ) -{ - if (v[2] > v[3] || v[2] < -v[3] ) { - return 0; - } - else { - return 1; - } -} - - -/** - * Clip a point against the user clipping planes. - * - * \param ctx GL context. - * \param v vertex vector describing the point to clip. - * - * \return zero if the point was clipped, or one otherwise. - */ -static GLuint -userclip_point( struct gl_context *ctx, const GLfloat v[] ) -{ - GLuint p; - - for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { - if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { - GLfloat dot = v[0] * ctx->Transform._ClipUserPlane[p][0] - + v[1] * ctx->Transform._ClipUserPlane[p][1] - + v[2] * ctx->Transform._ClipUserPlane[p][2] - + v[3] * ctx->Transform._ClipUserPlane[p][3]; - if (dot < 0.0F) { - return 0; - } - } - } - - return 1; -} - - -/** - * Compute lighting for the raster position. RGB modes computed. - * \param ctx the context - * \param vertex vertex location - * \param normal normal vector - * \param Rcolor returned color - * \param Rspec returned specular color (if separate specular enabled) - */ -static void -shade_rastpos(struct gl_context *ctx, - const GLfloat vertex[4], - const GLfloat normal[3], - GLfloat Rcolor[4], - GLfloat Rspec[4]) -{ - /*const*/ GLfloat (*base)[3] = ctx->Light._BaseColor; - const struct gl_light *light; - GLfloat diffuseColor[4], specularColor[4]; /* for RGB mode only */ - - COPY_3V(diffuseColor, base[0]); - diffuseColor[3] = CLAMP( - ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE][3], 0.0F, 1.0F ); - ASSIGN_4V(specularColor, 0.0, 0.0, 0.0, 1.0); - - foreach (light, &ctx->Light.EnabledList) { - GLfloat attenuation = 1.0; - GLfloat VP[3]; /* vector from vertex to light pos */ - GLfloat n_dot_VP; - GLfloat diffuseContrib[3], specularContrib[3]; - - if (!(light->_Flags & LIGHT_POSITIONAL)) { - /* light at infinity */ - COPY_3V(VP, light->_VP_inf_norm); - attenuation = light->_VP_inf_spot_attenuation; - } - else { - /* local/positional light */ - GLfloat d; - - /* VP = vector from vertex pos to light[i].pos */ - SUB_3V(VP, light->_Position, vertex); - /* d = length(VP) */ - d = (GLfloat) LEN_3FV( VP ); - if (d > 1.0e-6F) { - /* normalize VP */ - GLfloat invd = 1.0F / d; - SELF_SCALE_SCALAR_3V(VP, invd); - } - - /* atti */ - attenuation = 1.0F / (light->ConstantAttenuation + d * - (light->LinearAttenuation + d * - light->QuadraticAttenuation)); - - if (light->_Flags & LIGHT_SPOT) { - GLfloat PV_dot_dir = - DOT3(VP, light->_NormSpotDirection); - - if (PV_dot_dir<light->_CosCutoff) { - continue; - } - else { - GLfloat spot = powf(PV_dot_dir, light->SpotExponent); - attenuation *= spot; - } - } - } - - if (attenuation < 1e-3F) - continue; - - n_dot_VP = DOT3( normal, VP ); - - if (n_dot_VP < 0.0F) { - ACC_SCALE_SCALAR_3V(diffuseColor, attenuation, light->_MatAmbient[0]); - continue; - } - - /* Ambient + diffuse */ - COPY_3V(diffuseContrib, light->_MatAmbient[0]); - ACC_SCALE_SCALAR_3V(diffuseContrib, n_dot_VP, light->_MatDiffuse[0]); - - /* Specular */ - { - const GLfloat *h; - GLfloat n_dot_h; - - ASSIGN_3V(specularContrib, 0.0, 0.0, 0.0); - - if (ctx->Light.Model.LocalViewer) { - GLfloat v[3]; - COPY_3V(v, vertex); - NORMALIZE_3FV(v); - SUB_3V(VP, VP, v); - NORMALIZE_3FV(VP); - h = VP; - } - else if (light->_Flags & LIGHT_POSITIONAL) { - ACC_3V(VP, ctx->_EyeZDir); - NORMALIZE_3FV(VP); - h = VP; - } - else { - h = light->_h_inf_norm; - } - - n_dot_h = DOT3(normal, h); - - if (n_dot_h > 0.0F) { - GLfloat shine; - GLfloat spec_coef; - - shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0]; - spec_coef = powf(n_dot_h, shine); - - if (spec_coef > 1.0e-10F) { - if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) { - ACC_SCALE_SCALAR_3V( specularContrib, spec_coef, - light->_MatSpecular[0]); - } - else { - ACC_SCALE_SCALAR_3V( diffuseContrib, spec_coef, - light->_MatSpecular[0]); - } - } - } - } - - ACC_SCALE_SCALAR_3V( diffuseColor, attenuation, diffuseContrib ); - ACC_SCALE_SCALAR_3V( specularColor, attenuation, specularContrib ); - } - - Rcolor[0] = CLAMP(diffuseColor[0], 0.0F, 1.0F); - Rcolor[1] = CLAMP(diffuseColor[1], 0.0F, 1.0F); - Rcolor[2] = CLAMP(diffuseColor[2], 0.0F, 1.0F); - Rcolor[3] = CLAMP(diffuseColor[3], 0.0F, 1.0F); - Rspec[0] = CLAMP(specularColor[0], 0.0F, 1.0F); - Rspec[1] = CLAMP(specularColor[1], 0.0F, 1.0F); - Rspec[2] = CLAMP(specularColor[2], 0.0F, 1.0F); - Rspec[3] = CLAMP(specularColor[3], 0.0F, 1.0F); -} - - -/** - * Do texgen needed for glRasterPos. - * \param ctx rendering context - * \param vObj object-space vertex coordinate - * \param vEye eye-space vertex coordinate - * \param normal vertex normal - * \param unit texture unit number - * \param texcoord incoming texcoord and resulting texcoord - */ -static void -compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye[4], - const GLfloat normal[3], GLuint unit, GLfloat texcoord[4]) -{ - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - - /* always compute sphere map terms, just in case */ - GLfloat u[3], two_nu, rx, ry, rz, m, mInv; - COPY_3V(u, vEye); - NORMALIZE_3FV(u); - two_nu = 2.0F * DOT3(normal, u); - rx = u[0] - normal[0] * two_nu; - ry = u[1] - normal[1] * two_nu; - rz = u[2] - normal[2] * two_nu; - m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F); - if (m > 0.0F) - mInv = 0.5F * (1.0f / sqrtf(m)); - else - mInv = 0.0F; - - if (texUnit->TexGenEnabled & S_BIT) { - switch (texUnit->GenS.Mode) { - case GL_OBJECT_LINEAR: - texcoord[0] = DOT4(vObj, texUnit->GenS.ObjectPlane); - break; - case GL_EYE_LINEAR: - texcoord[0] = DOT4(vEye, texUnit->GenS.EyePlane); - break; - case GL_SPHERE_MAP: - texcoord[0] = rx * mInv + 0.5F; - break; - case GL_REFLECTION_MAP: - texcoord[0] = rx; - break; - case GL_NORMAL_MAP: - texcoord[0] = normal[0]; - break; - default: - _mesa_problem(ctx, "Bad S texgen in compute_texgen()"); - return; - } - } - - if (texUnit->TexGenEnabled & T_BIT) { - switch (texUnit->GenT.Mode) { - case GL_OBJECT_LINEAR: - texcoord[1] = DOT4(vObj, texUnit->GenT.ObjectPlane); - break; - case GL_EYE_LINEAR: - texcoord[1] = DOT4(vEye, texUnit->GenT.EyePlane); - break; - case GL_SPHERE_MAP: - texcoord[1] = ry * mInv + 0.5F; - break; - case GL_REFLECTION_MAP: - texcoord[1] = ry; - break; - case GL_NORMAL_MAP: - texcoord[1] = normal[1]; - break; - default: - _mesa_problem(ctx, "Bad T texgen in compute_texgen()"); - return; - } - } - - if (texUnit->TexGenEnabled & R_BIT) { - switch (texUnit->GenR.Mode) { - case GL_OBJECT_LINEAR: - texcoord[2] = DOT4(vObj, texUnit->GenR.ObjectPlane); - break; - case GL_EYE_LINEAR: - texcoord[2] = DOT4(vEye, texUnit->GenR.EyePlane); - break; - case GL_REFLECTION_MAP: - texcoord[2] = rz; - break; - case GL_NORMAL_MAP: - texcoord[2] = normal[2]; - break; - default: - _mesa_problem(ctx, "Bad R texgen in compute_texgen()"); - return; - } - } - - if (texUnit->TexGenEnabled & Q_BIT) { - switch (texUnit->GenQ.Mode) { - case GL_OBJECT_LINEAR: - texcoord[3] = DOT4(vObj, texUnit->GenQ.ObjectPlane); - break; - case GL_EYE_LINEAR: - texcoord[3] = DOT4(vEye, texUnit->GenQ.EyePlane); - break; - default: - _mesa_problem(ctx, "Bad Q texgen in compute_texgen()"); - return; - } - } -} - - -/** - * glRasterPos transformation. Typically called via ctx->Driver.RasterPos(). - * XXX some of this code (such as viewport xform, clip testing and setting - * of ctx->Current.Raster* fields) could get lifted up into the - * main/rasterpos.c code. - * - * \param vObj vertex position in object space - */ -void -_tnl_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]) -{ - if (ctx->VertexProgram._Enabled) { - /* XXX implement this */ - _mesa_problem(ctx, "Vertex programs not implemented for glRasterPos"); - return; - } - else { - GLfloat eye[4], clip[4], ndc[3], d; - GLfloat *norm, eyenorm[3]; - GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL]; - float scale[3], translate[3]; - - /* apply modelview matrix: eye = MV * obj */ - TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj ); - /* apply projection matrix: clip = Proj * eye */ - TRANSFORM_POINT( clip, ctx->ProjectionMatrixStack.Top->m, eye ); - - /* clip to view volume. */ - if (!ctx->Transform.DepthClamp) { - if (viewclip_point_z(clip) == 0) { - ctx->Current.RasterPosValid = GL_FALSE; - return; - } - } - if (!ctx->Transform.RasterPositionUnclipped) { - if (viewclip_point_xy(clip) == 0) { - ctx->Current.RasterPosValid = GL_FALSE; - return; - } - } - - /* clip to user clipping planes */ - if (ctx->Transform.ClipPlanesEnabled && !userclip_point(ctx, clip)) { - ctx->Current.RasterPosValid = GL_FALSE; - return; - } - - /* ndc = clip / W */ - d = (clip[3] == 0.0F) ? 1.0F : 1.0F / clip[3]; - ndc[0] = clip[0] * d; - ndc[1] = clip[1] * d; - ndc[2] = clip[2] * d; - /* wincoord = viewport_mapping(ndc) */ - _mesa_get_viewport_xform(ctx, 0, scale, translate); - ctx->Current.RasterPos[0] = ndc[0] * scale[0] + translate[0]; - ctx->Current.RasterPos[1] = ndc[1] * scale[1] + translate[1]; - ctx->Current.RasterPos[2] = ndc[2] * scale[2] + translate[2]; - ctx->Current.RasterPos[3] = clip[3]; - - if (ctx->Transform.DepthClamp) { - ctx->Current.RasterPos[3] = CLAMP(ctx->Current.RasterPos[3], - ctx->ViewportArray[0].Near, - ctx->ViewportArray[0].Far); - } - - /* compute raster distance */ - if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT) - ctx->Current.RasterDistance = ctx->Current.Attrib[VERT_ATTRIB_FOG][0]; - else - ctx->Current.RasterDistance = - sqrtf( eye[0]*eye[0] + eye[1]*eye[1] + eye[2]*eye[2] ); - - /* compute transformed normal vector (for lighting or texgen) */ - if (ctx->_NeedEyeCoords) { - const GLfloat *inv = ctx->ModelviewMatrixStack.Top->inv; - TRANSFORM_NORMAL( eyenorm, objnorm, inv ); - norm = eyenorm; - } - else { - norm = objnorm; - } - - /* update raster color */ - if (ctx->Light.Enabled) { - /* lighting */ - shade_rastpos( ctx, vObj, norm, - ctx->Current.RasterColor, - ctx->Current.RasterSecondaryColor ); - } - else { - /* use current color */ - COPY_4FV(ctx->Current.RasterColor, - ctx->Current.Attrib[VERT_ATTRIB_COLOR0]); - COPY_4FV(ctx->Current.RasterSecondaryColor, - ctx->Current.Attrib[VERT_ATTRIB_COLOR1]); - } - - /* texture coords */ - { - GLuint u; - for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { - GLfloat tc[4]; - COPY_4V(tc, ctx->Current.Attrib[VERT_ATTRIB_TEX0 + u]); - if (ctx->Texture.Unit[u].TexGenEnabled) { - compute_texgen(ctx, vObj, eye, norm, u, tc); - } - TRANSFORM_POINT(ctx->Current.RasterTexCoords[u], - ctx->TextureMatrixStack[u].Top->m, tc); - } - } - - ctx->Current.RasterPosValid = GL_TRUE; - } - - if (ctx->RenderMode == GL_SELECT) { - _mesa_update_hitflag( ctx, ctx->Current.RasterPos[2] ); - } -} diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h index e6b9d890d5f..6293a8b9edc 100644 --- a/src/mesa/vbo/vbo_context.h +++ b/src/mesa/vbo/vbo_context.h @@ -207,7 +207,8 @@ vbo_compute_max_verts(const struct vbo_exec_context *exec) { unsigned n = (VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / (exec->vtx.vertex_size * sizeof(GLfloat)); - assert(n > 0); + if (n == 0) + return 0; /* Subtract one so we're always sure to have room for an extra * vertex for GL_LINE_LOOP -> GL_LINE_STRIP conversion. */ diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index a23d5aa08aa..a614b26cae4 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -132,8 +132,7 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec ) static void vbo_exec_vtx_wrap(struct vbo_exec_context *exec) { - fi_type *data = exec->vtx.copied.buffer; - GLuint i; + unsigned numComponents; /* Run pipeline on current vertices, copy wrapped vertices * to exec->vtx.copied. @@ -149,13 +148,12 @@ vbo_exec_vtx_wrap(struct vbo_exec_context *exec) */ assert(exec->vtx.max_vert - exec->vtx.vert_count > exec->vtx.copied.nr); - for (i = 0 ; i < exec->vtx.copied.nr ; i++) { - memcpy( exec->vtx.buffer_ptr, data, - exec->vtx.vertex_size * sizeof(GLfloat)); - exec->vtx.buffer_ptr += exec->vtx.vertex_size; - data += exec->vtx.vertex_size; - exec->vtx.vert_count++; - } + numComponents = exec->vtx.copied.nr * exec->vtx.vertex_size; + memcpy(exec->vtx.buffer_ptr, + exec->vtx.copied.buffer, + numComponents * sizeof(fi_type)); + exec->vtx.buffer_ptr += numComponents; + exec->vtx.vert_count += exec->vtx.copied.nr; exec->vtx.copied.nr = 0; } diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 34d2c1d3d6b..e27fdd90532 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -1807,13 +1807,20 @@ vbo_initialize_exec_dispatch(const struct gl_context *ctx, SET_EvalMesh2(exec, vbo_exec_EvalMesh2); } - if (_mesa_is_desktop_gl(ctx)) { + if (ctx->API != API_OPENGLES && + ctx->Extensions.ARB_draw_elements_base_vertex) { SET_DrawElementsBaseVertex(exec, vbo_exec_DrawElementsBaseVertex); - SET_DrawRangeElementsBaseVertex(exec, vbo_exec_DrawRangeElementsBaseVertex); SET_MultiDrawElementsBaseVertex(exec, vbo_exec_MultiDrawElementsBaseVertex); + + if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) { + SET_DrawRangeElementsBaseVertex(exec, vbo_exec_DrawRangeElementsBaseVertex); + SET_DrawElementsInstancedBaseVertex(exec, vbo_exec_DrawElementsInstancedBaseVertex); + } + } + + if (_mesa_is_desktop_gl(ctx)) { SET_DrawArraysInstancedBaseInstance(exec, vbo_exec_DrawArraysInstancedBaseInstance); SET_DrawElementsInstancedBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseInstance); - SET_DrawElementsInstancedBaseVertex(exec, vbo_exec_DrawElementsInstancedBaseVertex); SET_DrawElementsInstancedBaseVertexBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseVertexBaseInstance); } diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index d49aa15b1b7..97a1dfdeb3f 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -601,8 +601,7 @@ static void _save_wrap_filled_vertex(struct gl_context *ctx) { struct vbo_save_context *save = &vbo_context(ctx)->save; - fi_type *data = save->copied.buffer; - GLuint i; + unsigned numComponents; /* Emit a glEnd to close off the last vertex list. */ @@ -612,12 +611,12 @@ _save_wrap_filled_vertex(struct gl_context *ctx) */ assert(save->max_vert - save->vert_count > save->copied.nr); - for (i = 0; i < save->copied.nr; i++) { - memcpy(save->buffer_ptr, data, save->vertex_size * sizeof(GLfloat)); - data += save->vertex_size; - save->buffer_ptr += save->vertex_size; - save->vert_count++; - } + numComponents = save->copied.nr * save->vertex_size; + memcpy(save->buffer_ptr, + save->copied.buffer, + numComponents * sizeof(fi_type)); + save->buffer_ptr += numComponents; + save->vert_count += save->copied.nr; } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 3ab2a245de4..26d0fe57a42 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -74,7 +74,7 @@ anv_physical_device_init(struct anv_physical_device *device, } device->name = brw_get_device_name(device->chipset_id); - device->info = brw_get_device_info(device->chipset_id, -1); + device->info = brw_get_device_info(device->chipset_id); if (!device->info) { result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, "failed to get device info"); |