diff options
330 files changed, 13633 insertions, 6211 deletions
diff --git a/Makefile.am b/Makefile.am index 149610c7c69..a9ed31ee123 100644 --- a/Makefile.am +++ b/Makefile.am @@ -32,6 +32,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \ --enable-vdpau \ --enable-xa \ --enable-xvmc \ + --disable-llvm-shared-libs \ --with-egl-platforms=x11,wayland,drm \ --with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \ --with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast @@ -1 +1 @@ -11.1.0-devel +11.2.0-devel diff --git a/configure.ac b/configure.ac index 758751c4b94..ebea7fcb61a 100644 --- a/configure.ac +++ b/configure.ac @@ -768,6 +768,11 @@ linux*) dri3_default=no ;; esac + +if test "x$enable_dri" = xno; then + dri3_default=no +fi + AC_ARG_ENABLE([dri3], [AS_HELP_STRING([--enable-dri3], [enable DRI3 @<:@default=auto@:>@])], @@ -957,8 +962,13 @@ gnu*|cygwin*) dri_platform='drm' ;; esac +if test "x$enable_dri" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes; then + have_drisw_kms='yes' +fi + AM_CONDITIONAL(HAVE_DRICOMMON, test "x$enable_dri" = xyes ) AM_CONDITIONAL(HAVE_DRISW, test "x$enable_dri" = xyes ) +AM_CONDITIONAL(HAVE_DRISW_KMS, test "x$have_drisw_kms" = xyes ) AM_CONDITIONAL(HAVE_DRI2, test "x$enable_dri" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes ) AM_CONDITIONAL(HAVE_DRI3, test "x$enable_dri3" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes ) AM_CONDITIONAL(HAVE_APPLEDRI, test "x$enable_dri" = xyes -a "x$dri_platform" = xapple ) @@ -993,10 +1003,6 @@ if test -n "$with_gallium_drivers" -a "x$enable_glx$enable_xlib_glx" = xyesyes; NEED_WINSYS_XLIB="yes" fi -if test "x$enable_dri" = xyes; then - enable_gallium_loader="$enable_shared_pipe_drivers" -fi - if test "x$enable_gallium_osmesa" = xyes; then if ! echo "$with_gallium_drivers" | grep -q 'swrast'; then AC_MSG_ERROR([gallium_osmesa requires the gallium swrast driver]) @@ -1227,7 +1233,8 @@ xyesno) if test x"$enable_dri3" = xyes; then PKG_CHECK_EXISTS([xcb >= $XCB_REQUIRED], [], AC_MSG_ERROR([DRI3 requires xcb >= $XCB_REQUIRED])) - dri_modules="$dri_modules xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED" + dri3_modules="xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED" + PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules]) fi fi if test x"$dri_platform" = xapple ; then @@ -1570,6 +1577,12 @@ if test "x$enable_egl" = xyes; then if test "x$enable_shared_glapi" = xno; then AC_MSG_ERROR([egl_dri2 requires --enable-shared-glapi]) fi + if test "x$enable_dri3" = xyes; then + HAVE_EGL_DRIVER_DRI3=1 + if test "x$enable_shared_glapi" = xno; then + AC_MSG_ERROR([egl_dri3 requires --enable-shared-glapi]) + fi + fi else # Avoid building an "empty" libEGL. Drop/update this # when other backends (haiku?) come along. @@ -1595,7 +1608,6 @@ if test "x$enable_xa" = xyes; then enabling XA. Example: ./configure --enable-xa --with-gallium-drivers=svga...]) fi - enable_gallium_loader=$enable_shared_pipe_drivers gallium_st="$gallium_st xa" fi AM_CONDITIONAL(HAVE_ST_XA, test "x$enable_xa" = xyes) @@ -1641,28 +1653,24 @@ AM_CONDITIONAL(NEED_GALLIUM_VL_WINSYS, test "x$need_gallium_vl_winsys" = xyes) if test "x$enable_xvmc" = xyes; then PKG_CHECK_MODULES([XVMC], [xvmc >= $XVMC_REQUIRED]) - enable_gallium_loader=$enable_shared_pipe_drivers gallium_st="$gallium_st xvmc" fi AM_CONDITIONAL(HAVE_ST_XVMC, test "x$enable_xvmc" = xyes) if test "x$enable_vdpau" = xyes; then PKG_CHECK_MODULES([VDPAU], [vdpau >= $VDPAU_REQUIRED]) - enable_gallium_loader=$enable_shared_pipe_drivers gallium_st="$gallium_st vdpau" fi AM_CONDITIONAL(HAVE_ST_VDPAU, test "x$enable_vdpau" = xyes) if test "x$enable_omx" = xyes; then PKG_CHECK_MODULES([OMX], [libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED]) - enable_gallium_loader=$enable_shared_pipe_drivers gallium_st="$gallium_st omx" fi AM_CONDITIONAL(HAVE_ST_OMX, test "x$enable_omx" = xyes) if test "x$enable_va" = xyes; then PKG_CHECK_MODULES([VA], [libva >= $LIBVA_REQUIRED]) - enable_gallium_loader=$enable_shared_pipe_drivers gallium_st="$gallium_st va" fi AM_CONDITIONAL(HAVE_ST_VA, test "x$enable_va" = xyes) @@ -1685,7 +1693,6 @@ if test "x$enable_nine" = xyes; then AC_MSG_WARN([using nine together with wine requires DRI3 enabled system]) fi - enable_gallium_loader=$enable_shared_pipe_drivers gallium_st="$gallium_st nine" fi AM_CONDITIONAL(HAVE_ST_NINE, test "x$enable_nine" = xyes) @@ -1724,8 +1731,6 @@ if test "x$enable_opencl" = xyes; then AC_SUBST([LIBCLC_LIBEXECDIR]) fi - # XXX: Use $enable_shared_pipe_drivers once converted to use static/shared pipe-drivers - enable_gallium_loader=yes gallium_st="$gallium_st clover" if test "x$enable_opencl_icd" = xyes; then @@ -2006,10 +2011,6 @@ AC_SUBST([XVMC_LIB_INSTALL_DIR]) dnl dnl Gallium Tests dnl -if test "x$enable_gallium_tests" = xyes; then - # XXX: Use $enable_shared_pipe_drivers once converted to use static/shared pipe-drivers - enable_gallium_loader=yes -fi AM_CONDITIONAL(HAVE_GALLIUM_TESTS, test "x$enable_gallium_tests" = xyes) dnl Directory for VDPAU libs @@ -2064,14 +2065,8 @@ gallium_require_llvm() { } gallium_require_drm_loader() { - if test "x$enable_gallium_loader" = xyes; then - if test "x$need_pci_id$have_pci_id" = xyesno; then - AC_MSG_ERROR([Gallium drm loader requires libudev >= $LIBUDEV_REQUIRED or sysfs]) - fi - enable_gallium_drm_loader=yes - fi - if test "x$enable_va" = xyes && test "x$7" != x; then - GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $7" + if test "x$need_pci_id$have_pci_id" = xyesno; then + AC_MSG_ERROR([Gallium drm loader requires libudev >= $LIBUDEV_REQUIRED or sysfs]) fi } @@ -2186,7 +2181,9 @@ if test -n "$with_gallium_drivers"; then gallium_require_drm_loader PKG_CHECK_MODULES([SIMPENROSE], [simpenrose], - [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no]) + [USE_VC4_SIMULATOR=yes; + DEFINES="$DEFINES -DUSE_VC4_SIMULATOR"], + [USE_VC4_SIMULATOR=no]) ;; xvirgl) HAVE_GALLIUM_VIRGL=yes @@ -2269,22 +2266,14 @@ AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno) -# NOTE: anything using xcb or other client side libs ends up in separate -# _CLIENT variables. The pipe loader is built in two variants, -# one that is standalone and does not link any x client libs (for -# use by XA tracker in particular, but could be used in any case -# where communication with xserver is not desired). -if test "x$enable_gallium_loader" = xyes; then - if test "x$enable_dri" = xyes; then - GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRI" - fi - - if test "x$enable_gallium_drm_loader" = xyes; then - GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRM" - fi +if test "x$enable_dri" = xyes; then + GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRI" +fi - AC_SUBST([GALLIUM_PIPE_LOADER_DEFINES]) +if test "x$have_drisw_kms" = xyes; then + GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_KMS" fi +AC_SUBST([GALLIUM_PIPE_LOADER_DEFINES]) AM_CONDITIONAL(HAVE_I915_DRI, test x$HAVE_I915_DRI = xyes) AM_CONDITIONAL(HAVE_I965_DRI, test x$HAVE_I965_DRI = xyes) @@ -2299,8 +2288,6 @@ AM_CONDITIONAL(NEED_RADEON_DRM_WINSYS, test "x$HAVE_GALLIUM_R300" = xyes -o \ AM_CONDITIONAL(NEED_WINSYS_XLIB, test "x$NEED_WINSYS_XLIB" = xyes) AM_CONDITIONAL(NEED_RADEON_LLVM, test x$NEED_RADEON_LLVM = xyes) AM_CONDITIONAL(USE_R600_LLVM_COMPILER, test x$USE_R600_LLVM_COMPILER = xyes) -AM_CONDITIONAL(HAVE_LOADER_GALLIUM, test x$enable_gallium_loader = xyes) -AM_CONDITIONAL(HAVE_DRM_LOADER_GALLIUM, test x$enable_gallium_drm_loader = xyes) AM_CONDITIONAL(HAVE_GALLIUM_COMPUTE, test x$enable_opencl = xyes) AM_CONDITIONAL(HAVE_MESA_LLVM, test x$MESA_LLVM = x1) AM_CONDITIONAL(USE_VC4_SIMULATOR, test x$USE_VC4_SIMULATOR = xyes) @@ -2532,6 +2519,9 @@ if test "$enable_egl" = yes; then if test "x$HAVE_EGL_DRIVER_DRI2" != "x"; then egl_drivers="$egl_drivers builtin:egl_dri2" fi + if test "x$HAVE_EGL_DRIVER_DRI3" != "x"; then + egl_drivers="$egl_drivers builtin:egl_dri3" + fi echo " EGL drivers: $egl_drivers" fi diff --git a/docs/GL3.txt b/docs/GL3.txt index b768eea789a..ad6b95e992b 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -179,7 +179,7 @@ GL 4.4, GLSL 4.40: GL_ARB_buffer_storage DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_clear_texture DONE (i965, nv50, nvc0) GL_ARB_enhanced_layouts in progress (Timothy) - - compile-time constant expressions in progress + - compile-time constant expressions DONE - explicit byte offsets for blocks in progress - forced alignment within blocks in progress - specified vec4-slot component numbers in progress diff --git a/docs/envvars.html b/docs/envvars.html index bdfe9991a6c..1b2c03ef377 100644 --- a/docs/envvars.html +++ b/docs/envvars.html @@ -179,6 +179,14 @@ Mesa EGL supports different sets of environment variables. See the <li>GALLIUM_HUD - draws various information on the screen, like framerate, cpu load, driver statistics, performance counters, etc. Set GALLIUM_HUD=help and run e.g. glxgears for more info. +<li>GALLIUM_HUD_PERIOD - sets the hud update rate in seconds (float). Use zero + to update every frame. The default period is 1/2 second. +<li>GALLIUM_HUD_VISIBLE - control default visibility, defaults to true. +<li>GALLIUM_HUD_TOGGLE_SIGNAL - toggle visibility via user specified signal. + Especially useful to toggle hud at specific points of application and + disable for unencumbered viewing the rest of the time. For example, set + GALLIUM_HUD_VISIBLE to false and GALLIUM_HUD_SIGNAL_TOGGLE to 10 (SIGUSR1). + Use kill -10 <pid> to toggle the hud as desired. <li>GALLIUM_LOG_FILE - specifies a file for logging all errors, warnings, etc. rather than stderr. <li>GALLIUM_PRINT_OPTIONS - if non-zero, print all the Gallium environment diff --git a/docs/index.html b/docs/index.html index 0e317fdf411..1f50d6b40d3 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,6 +16,12 @@ <h1>News</h1> +<h2>November 21, 2015</h2> +<p> +<a href="relnotes/11.0.6.html">Mesa 11.0.6</a> is released. +This is a bug-fix release. +</p> + <h2>November 11, 2015</h2> <p> <a href="relnotes/11.0.5.html">Mesa 11.0.5</a> is released. diff --git a/docs/relnotes.html b/docs/relnotes.html index 588cf0b2c83..6a8ed324eb5 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release. </p> <ul> +<li><a href="relnotes/11.0.6.html">11.0.6 release notes</a> <li><a href="relnotes/11.0.5.html">11.0.5 release notes</a> <li><a href="relnotes/11.0.4.html">11.0.4 release notes</a> <li><a href="relnotes/11.0.3.html">11.0.3 release notes</a> diff --git a/docs/relnotes/11.0.6.html b/docs/relnotes/11.0.6.html new file mode 100644 index 00000000000..4a1083c43b2 --- /dev/null +++ b/docs/relnotes/11.0.6.html @@ -0,0 +1,145 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 11.0.6 Release Notes / November 21, 2015</h1> + +<p> +Mesa 11.0.6 is a bug fix release which fixes bugs found since the 11.0.5 release. +</p> +<p> +Mesa 11.0.6 implements the OpenGL 4.1 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.1. OpenGL +4.1 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +4bdf054af66ebabf3eca0616f9f5e44c2f234695661b570261c391bc2f4f7482 mesa-11.0.6.tar.gz +8340e64cdc91999840404c211496f3de38e7b4cb38db34e2f72f1642c5134760 mesa-11.0.6.tar.xz +</pre> + + +<h2>New features</h2> +<p>None</p> + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> + +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91780">Bug 91780</a> - Rendering issues with geometry shader</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92588">Bug 92588</a> - [HSW,BDW,BSW,SKL-Y][GLES 3.1 CTS] ES31-CTS.arrays_of_arrays.InteractionFunctionCalls2 - assert</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92738">Bug 92738</a> - Randon R7 240 doesn't work on 16KiB page size platform</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92860">Bug 92860</a> - [radeonsi][bisected] st/mesa: implement ARB_copy_image - Corruption in ARK Survival Evolved</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92900">Bug 92900</a> - [regression bisected] About 700 piglit regressions is what could go wrong</li> + +</ul> + + +<h2>Changes</h2> + +<p>Alex Deucher (1):</p> +<ul> + <li>radeonsi: enable optimal raster config setting for fiji (v2)</li> +</ul> + +<p>Ben Widawsky (1):</p> +<ul> + <li>i965/skl/gt4: Fix URB programming restriction.</li> +</ul> + +<p>Boyuan Zhang (2):</p> +<ul> + <li>st/vaapi: fix vaapi VC-1 simple/main corruption v2</li> + <li>radeon/uvd: fix VC-1 simple/main profile decode v2</li> +</ul> + +<p>Dave Airlie (1):</p> +<ul> + <li>r600: initialised PGM_RESOURCES_2 for ES/GS</li> +</ul> + +<p>Emil Velikov (4):</p> +<ul> + <li>docs: add sha256 checksums for 11.0.5</li> + <li>cherry-ignore: add the swrast front buffer support</li> + <li>automake: use static llvm for make distcheck</li> + <li>Update version to 11.0.6</li> +</ul> + +<p>Eric Anholt (3):</p> +<ul> + <li>vc4: Return GL_OUT_OF_MEMORY when buffer allocation fails.</li> + <li>vc4: Return NULL when we can't make our shadow for a sampler view.</li> + <li>vc4: Add support for nir_op_uge, using the carry bit on QPU_A_SUB.</li> +</ul> + +<p>Ian Romanick (2):</p> +<ul> + <li>meta/generate_mipmap: Don't leak the sampler object</li> + <li>meta/generate_mipmap: Only modify the draw framebuffer binding in fallback_required</li> +</ul> + +<p>Ilia Mirkin (2):</p> +<ul> + <li>mesa/copyimage: allow width/height to not be multiples of block</li> + <li>nouveau: don't expose HEVC decoding support</li> +</ul> + +<p>Jason Ekstrand (1):</p> +<ul> + <li>nir/vars_to_ssa: Rework copy set handling in lower_copies_to_load_store</li> +</ul> + +<p>Kenneth Graunke (1):</p> +<ul> + <li>glsl: Allow implicit int -> uint conversions for the % operator.</li> +</ul> + +<p>Marek Olšák (1):</p> +<ul> + <li>radeonsi: initialize SX_PS_DOWNCONVERT to 0 on Stoney</li> +</ul> + +<p>Michel Dänzer (1):</p> +<ul> + <li>winsys/radeon: Use CPU page size instead of hardcoding 4096 bytes v3</li> +</ul> + +<p>Oded Gabbay (1):</p> +<ul> + <li>llvmpipe: use simple coeffs calc for 128bit vectors</li> +</ul> + +<p>Roland Scheidegger (2):</p> +<ul> + <li>radeon: fix bgrx8/xrgb8 blits</li> + <li>r200: fix bgrx8/xrgb8 blits</li> +</ul> + + +</div> +</body> +</html> diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index 82ee3c4037b..77df804d901 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -44,25 +44,41 @@ Note: some of the new features are only available with certain drivers. </p> <ul> +<li>OpenGL 3.1 support on freedreno (a3xx, a4xx)</li> +<li>OpenGL 3.3 support for VMware guest VM driver (supported by Workstation 12 + and Fusion 8). +<li>GL_AMD_performance_monitor on nv50</li> <li>GL_ARB_arrays_of_arrays on i965</li> <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li> <li>GL_ARB_clear_texture on nv50, nvc0</li> +<li>GL_ARB_clip_control on freedreno/a4xx</li> <li>GL_ARB_copy_image on nv50, nvc0, radeonsi</li> +<li>GL_ARB_depth_clamp on freedreno/a4xx</li> <li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li> <li>GL_ARB_gpu_shader5 on r600 for Evergreen and later chips</li> +<li>GL_ARB_seamless_cubemap_per_texture on freedreno/a4xx</li> <li>GL_ARB_shader_clock on i965 (gen7+)</li> <li>GL_ARB_shader_stencil_export on i965 (gen9+)</li> <li>GL_ARB_shader_storage_buffer_object on i965</li> <li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li> <li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li> +<li>GL_ARB_texture_buffer_range on freedreno/a3xx</li> +<li>GL_ARB_texture_compression_bptc on freedreno/a4xx</li> <li>GL_ARB_texture_query_lod on softpipe</li> <li>GL_ARB_texture_view on radeonsi and r600 (for evergeen and newer)</li> +<li>GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx, a4xx)</li> +<li>GL_EXT_blend_func_extended on all drivers that support the ARB version</li> <li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li> <li>GL_EXT_draw_elements_base_vertex on all drivers</li> +<li>GL_EXT_texture_compression_rgtc / latc on freedreno (a3xx & a4xx)</li> +<li>GL_KHR_debug (GLES)</li> +<li>GL_NV_conditional_render on freedreno</li> <li>GL_OES_draw_elements_base_vertex on all drivers</li> <li>EGL_KHR_create_context on softpipe, llvmpipe</li> <li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li> <li>new virgl gallium driver for qemu virtio-gpu</li> +<li>16x multisampling on i965 (gen9+)</li> +<li>GL_EXT_shader_samples_identical on i965.</li> </ul> <h2>Bug fixes</h2> diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html new file mode 100644 index 00000000000..20f4543a562 --- /dev/null +++ b/docs/relnotes/11.2.0.html @@ -0,0 +1,67 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 11.2.0 Release Notes / TBD</h1> + +<p> +Mesa 11.2.0 is a new development release. +People who are concerned with stability and reliability should stick +with a previous release or wait for Mesa 11.2.1. +</p> +<p> +Mesa 11.2.0 implements the OpenGL 4.1 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.1. OpenGL +4.1 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +TBD. +</pre> + + +<h2>New features</h2> + +<p> +Note: some of the new features are only available with certain drivers. +</p> + +<ul> +<li>GL_ARB_base_instance on freedreno/a4xx</li> +<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li> +<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li> +<li>GL_ARB_texture_query_lod on freedreno/a4xx</li> +<li>GL_ARB_texture_rgb10_a2ui on freedreno/a4xx</li> +<li>GL_ARB_texture_view on freedreno/a4xx</li> +<li>GL_ARB_vertex_type_10f_11f_11f_rev on freedreno/a4xx</li> +<li>GL_KHR_texture_compression_astc_ldr on freedreno/a4xx</li> +</ul> + +<h2>Bug fixes</h2> + +TBD. + +<h2>Changes</h2> + +TBD. + +</div> +</body> +</html> diff --git a/docs/specs/EXT_shader_samples_identical.txt b/docs/specs/EXT_shader_samples_identical.txt new file mode 100644 index 00000000000..a8a901b8bbd --- /dev/null +++ b/docs/specs/EXT_shader_samples_identical.txt @@ -0,0 +1,176 @@ +Name + + EXT_shader_samples_identical + +Name Strings + + GL_EXT_shader_samples_identical + +Contact + + Ian Romanick, Intel (ian.d.romanick 'at' intel.com) + +Contributors + + Chris Forbes, Mesa + Magnus Wendt, Intel + Neil S. Roberts, Intel + Graham Sellers, AMD + +Status + + XXX - Not complete yet. + +Version + + Last Modified Date: November 19, 2015 + Revision: 6 + +Number + + TBD + +Dependencies + + OpenGL 3.2, or OpenGL ES 3.1, or ARB_texture_multisample is required. + + This extension is written against the OpenGL 4.5 (Core Profile) + Specification + +Overview + + Multisampled antialiasing has become a common method for improving the + quality of rendered images. Multisampling differs from supersampling in + that the color of a primitive that covers all or part of a pixel is + resolved once, regardless of the number of samples covered. If a large + polygon is rendered, the colors of all samples in each interior pixel will + be the same. This suggests a simple compression scheme that can reduce + the necessary memory bandwidth requirements. In one such scheme, each + sample is stored in a separate slice of the multisample surface. An + additional multisample control surface (MCS) contains a mapping from pixel + samples to slices. + + If all the values stored in the MCS for a particular pixel are the same, + then all the samples have the same value. Applications can take advantage + of this information to reduce the bandwidth of reading multisample + textures. A custom multisample resolve filter could optimize resolving + pixels where every sample is identical by reading the color once. + + color = texelFetch(sampler, coordinate, 0); + if (!textureSamplesIdenticalEXT(sampler, coordinate)) { + for (int i = 1; i < MAX_SAMPLES; i++) { + vec4 c = texelFetch(sampler, coordinate, i); + + //... accumulate c into color + + } + } + +New Procedures and Functions + + None. + +New Tokens + + None. + +Additions to the OpenGL 4.5 (Core Profile) Specification + + None. + +Modifications to The OpenGL Shading Language Specification, Version 4.50.5 + + Including the following line in a shader can be used to control the + language features described in this extension: + + #extension GL_EXT_shader_samples_identical + + A new preprocessor #define is added to the OpenGL Shading Language: + + #define GL_EXT_shader_samples_identical + + Add to the table in section 8.7 "Texture Lookup Functions" + + Syntax: + + bool textureSamplesIdenticalEXT(gsampler2DMS sampler, ivec2 coord) + + bool textureSamplesIdenticalEXT(gsampler2DMSArray sampler, + ivec3 coord) + + Description: + + Returns true if it can be determined that all samples within the texel + of the multisample texture bound to <sampler> at <coord> contain the + same values or false if this cannot be determined." + +Additions to the AGL/EGL/GLX/WGL Specifications + + None + +Errors + + None + +New State + + None + +New Implementation Dependent State + + None + +Issues + + 1) What should the new functions be called? + + RESOLVED: textureSamplesIdenticalEXT. Initially + textureAllSamplesIdenticalEXT was considered, but + textureSamplesIdenticalEXT is more similar to the existing textureSamples + function. + + 2) It seems like applications could implement additional optimization if + they were provided with raw MCS data. Should this extension also + provide that data? + + There are a number of challenges in providing raw MCS data. The biggest + problem being that the amount of MCS data depends on the number of + samples, and that is not known at compile time. Additionally, without new + texelFetch functions, applications would have difficulty utilizing the + information. + + Another option is to have a function that returns an array of tuples of + sample number and count. This also has difficulties with the maximum + array size not being known at compile time. + + RESOLVED: Do not expose raw MCS data in this extension. + + 3) Should this extension also extend SPIR-V? + + RESOLVED: Yes, but this has not yet been written. + + 4) Is it possible for textureSamplesIdenticalEXT to report false negatives? + + RESOLVED: Yes. It is possible that the underlying hardware may not detect + that separate writes of the same color to different samples of a pixel are + the same. The shader function is at the whim of the underlying hardware + implementation. It is also possible that a compressed multisample surface + is not used. In that case the function will likely always return false. + +Revision History + + Rev Date Author Changes + --- ---------- -------- --------------------------------------------- + 1 2014/08/20 cforbes Initial version + 2 2015/10/23 idr Change from MESA to EXT. Rebase on OpenGL 4.5, + and add dependency on OpenGL ES 3.1. Initial + draft of overview section and issues 1 through + 3. + 3 2015/10/27 idr Typo fixes. + 4 2015/11/10 idr Rename extension from EXT_shader_multisample_compression + to EXT_shader_samples_identical. + Add issue #4. + 5 2015/11/18 idr Fix some typos spotted by gsellers. Change the + name of the name of the function to + textureSamplesIdenticalEXT. + 6 2015/11/19 idr Fix more typos spotted by Nicolai Hähnle. diff --git a/src/egl/Makefile.am b/src/egl/Makefile.am index 5c2ba301ffb..6953d44e607 100644 --- a/src/egl/Makefile.am +++ b/src/egl/Makefile.am @@ -47,12 +47,21 @@ libEGL_la_LDFLAGS = \ $(LD_NO_UNDEFINED) dri2_backend_FILES = +dri3_backend_FILES = if HAVE_EGL_PLATFORM_X11 AM_CFLAGS += -DHAVE_X11_PLATFORM AM_CFLAGS += $(XCB_DRI2_CFLAGS) libEGL_la_LIBADD += $(XCB_DRI2_LIBS) dri2_backend_FILES += drivers/dri2/platform_x11.c + +if HAVE_DRI3 +dri3_backend_FILES += \ + drivers/dri2/platform_x11_dri3.c \ + drivers/dri2/platform_x11_dri3.h + +libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader_dri3_helper.la +endif endif if HAVE_EGL_PLATFORM_WAYLAND @@ -88,7 +97,8 @@ AM_CFLAGS += \ libEGL_la_SOURCES += \ $(dri2_backend_core_FILES) \ - $(dri2_backend_FILES) + $(dri2_backend_FILES) \ + $(dri3_backend_FILES) libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader.la libEGL_la_LIBADD += $(DLOPEN_LIBS) $(LIBDRM_LIBS) @@ -111,7 +121,10 @@ egl_HEADERS = \ $(top_srcdir)/include/EGL/eglmesaext.h \ $(top_srcdir)/include/EGL/eglplatform.h +TESTS = egl-symbols-check + EXTRA_DIST = \ + egl-symbols-check \ SConscript \ drivers/haiku \ docs \ diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 4cc5f231333..d34b16119e2 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -352,6 +352,12 @@ struct dri2_extension_match { int offset; }; +static struct dri2_extension_match dri3_driver_extensions[] = { + { __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) }, + { __DRI_IMAGE_DRIVER, 1, offsetof(struct dri2_egl_display, image_driver) }, + { NULL, 0, 0 } +}; + static struct dri2_extension_match dri2_driver_extensions[] = { { __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) }, { __DRI_DRI2, 2, offsetof(struct dri2_egl_display, dri2) }, @@ -385,13 +391,13 @@ dri2_bind_extensions(struct dri2_egl_display *dri2_dpy, void *field; for (i = 0; extensions[i]; i++) { - _eglLog(_EGL_DEBUG, "DRI2: found extension `%s'", extensions[i]->name); + _eglLog(_EGL_DEBUG, "found extension `%s'", extensions[i]->name); for (j = 0; matches[j].name; j++) { if (strcmp(extensions[i]->name, matches[j].name) == 0 && extensions[i]->version >= matches[j].version) { field = ((char *) dri2_dpy + matches[j].offset); *(const __DRIextension **) field = extensions[i]; - _eglLog(_EGL_INFO, "DRI2: found extension %s version %d", + _eglLog(_EGL_INFO, "found extension %s version %d", extensions[i]->name, extensions[i]->version); } } @@ -400,7 +406,7 @@ dri2_bind_extensions(struct dri2_egl_display *dri2_dpy, for (j = 0; matches[j].name; j++) { field = ((char *) dri2_dpy + matches[j].offset); if (*(const __DRIextension **) field == NULL) { - _eglLog(_EGL_WARNING, "DRI2: did not find extension %s version %d", + _eglLog(_EGL_WARNING, "did not find extension %s version %d", matches[j].name, matches[j].version); ret = EGL_FALSE; } @@ -494,6 +500,25 @@ dri2_open_driver(_EGLDisplay *disp) } EGLBoolean +dri2_load_driver_dri3(_EGLDisplay *disp) +{ + struct dri2_egl_display *dri2_dpy = disp->DriverData; + const __DRIextension **extensions; + + extensions = dri2_open_driver(disp); + if (!extensions) + return EGL_FALSE; + + if (!dri2_bind_extensions(dri2_dpy, dri3_driver_extensions, extensions)) { + dlclose(dri2_dpy->driver); + return EGL_FALSE; + } + dri2_dpy->driver_extensions = extensions; + + return EGL_TRUE; +} + +EGLBoolean dri2_load_driver(_EGLDisplay *disp) { struct dri2_egl_display *dri2_dpy = disp->DriverData; @@ -550,7 +575,9 @@ dri2_setup_screen(_EGLDisplay *disp) struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); unsigned int api_mask; - if (dri2_dpy->dri2) { + if (dri2_dpy->image_driver) { + api_mask = dri2_dpy->image_driver->getAPIMask(dri2_dpy->dri_screen); + } else if (dri2_dpy->dri2) { api_mask = dri2_dpy->dri2->getAPIMask(dri2_dpy->dri_screen); } else { assert(dri2_dpy->swrast); @@ -570,7 +597,7 @@ dri2_setup_screen(_EGLDisplay *disp) if (api_mask & (1 << __DRI_API_GLES3)) disp->ClientAPIs |= EGL_OPENGL_ES3_BIT_KHR; - assert(dri2_dpy->dri2 || dri2_dpy->swrast); + assert(dri2_dpy->image_driver || dri2_dpy->dri2 || dri2_dpy->swrast); disp->Extensions.KHR_surfaceless_context = EGL_TRUE; disp->Extensions.MESA_configless_context = EGL_TRUE; @@ -578,7 +605,8 @@ dri2_setup_screen(_EGLDisplay *disp) __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB)) disp->Extensions.KHR_gl_colorspace = EGL_TRUE; - if ((dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) || + if (dri2_dpy->image_driver || + (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) || (dri2_dpy->swrast && dri2_dpy->swrast->base.version >= 3)) { disp->Extensions.KHR_create_context = EGL_TRUE; @@ -641,7 +669,14 @@ dri2_create_screen(_EGLDisplay *disp) dri2_dpy = disp->DriverData; - if (dri2_dpy->dri2) { + if (dri2_dpy->image_driver) { + dri2_dpy->dri_screen = + dri2_dpy->image_driver->createNewScreen2(0, dri2_dpy->fd, + dri2_dpy->extensions, + dri2_dpy->driver_extensions, + &dri2_dpy->driver_configs, + disp); + } else if (dri2_dpy->dri2) { if (dri2_dpy->dri2->base.version >= 4) { dri2_dpy->dri_screen = dri2_dpy->dri2->createNewScreen2(0, dri2_dpy->fd, @@ -677,7 +712,7 @@ dri2_create_screen(_EGLDisplay *disp) extensions = dri2_dpy->core->getExtensions(dri2_dpy->dri_screen); - if (dri2_dpy->dri2) { + if (dri2_dpy->image_driver || dri2_dpy->dri2) { if (!dri2_bind_extensions(dri2_dpy, dri2_core_extensions, extensions)) goto cleanup_dri_screen; } else { @@ -1024,7 +1059,26 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, else dri_config = NULL; - if (dri2_dpy->dri2) { + if (dri2_dpy->image_driver) { + unsigned error; + unsigned num_attribs = 8; + uint32_t ctx_attribs[8]; + + if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs, + &num_attribs)) + goto cleanup; + + dri2_ctx->dri_context = + dri2_dpy->image_driver->createContextAttribs(dri2_dpy->dri_screen, + api, + dri_config, + shared, + num_attribs / 2, + ctx_attribs, + & error, + dri2_ctx); + dri2_create_context_attribs_error(error); + } else if (dri2_dpy->dri2) { if (dri2_dpy->dri2->base.version >= 3) { unsigned error; unsigned num_attribs = 8; @@ -1119,11 +1173,10 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, { struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); - struct dri2_egl_surface *dri2_dsurf = dri2_egl_surface(dsurf); - struct dri2_egl_surface *dri2_rsurf = dri2_egl_surface(rsurf); struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); _EGLContext *old_ctx; _EGLSurface *old_dsurf, *old_rsurf; + _EGLSurface *tmp_dsurf, *tmp_rsurf; __DRIdrawable *ddraw, *rdraw; __DRIcontext *cctx; @@ -1135,8 +1188,8 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, if (old_ctx && dri2_drv->glFlush) dri2_drv->glFlush(); - ddraw = (dri2_dsurf) ? dri2_dsurf->dri_drawable : NULL; - rdraw = (dri2_rsurf) ? dri2_rsurf->dri_drawable : NULL; + ddraw = (dsurf) ? dri2_dpy->vtbl->get_dri_drawable(dsurf) : NULL; + rdraw = (rsurf) ? dri2_dpy->vtbl->get_dri_drawable(rsurf) : NULL; cctx = (dri2_ctx) ? dri2_ctx->dri_context : NULL; if (old_ctx) { @@ -1156,10 +1209,10 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, return EGL_TRUE; } else { /* undo the previous _eglBindContext */ - _eglBindContext(old_ctx, old_dsurf, old_rsurf, &ctx, &dsurf, &rsurf); + _eglBindContext(old_ctx, old_dsurf, old_rsurf, &ctx, &tmp_dsurf, &tmp_rsurf); assert(&dri2_ctx->base == ctx && - &dri2_dsurf->base == dsurf && - &dri2_rsurf->base == rsurf); + tmp_dsurf == dsurf && + tmp_rsurf == rsurf); _eglPutSurface(dsurf); _eglPutSurface(rsurf); @@ -1173,6 +1226,14 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, } } +__DRIdrawable * +dri2_surface_get_dri_drawable(_EGLSurface *surf) +{ + struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); + + return dri2_surf->dri_drawable; +} + /* * Called from eglGetProcAddress() via drv->API.GetProcAddress(). */ @@ -1235,7 +1296,7 @@ void dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); - struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw); + __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(draw); if (dri2_dpy->flush) { if (dri2_dpy->flush->base.version >= 4) { @@ -1253,12 +1314,12 @@ dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw) * after calling eglSwapBuffers." */ dri2_dpy->flush->flush_with_flags(dri2_ctx->dri_context, - dri2_surf->dri_drawable, + dri_drawable, __DRI2_FLUSH_DRAWABLE | __DRI2_FLUSH_INVALIDATE_ANCILLARY, __DRI2_THROTTLE_SWAPBUFFER); } else { - dri2_dpy->flush->flush(dri2_surf->dri_drawable); + dri2_dpy->flush->flush(dri_drawable); } } } @@ -1315,7 +1376,8 @@ static EGLBoolean dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); - struct dri2_egl_surface *dri2_surf = dri2_egl_surface(ctx->DrawSurface); + _EGLSurface *surf = ctx->DrawSurface; + __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf); (void) drv; @@ -1323,7 +1385,7 @@ dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx) * we need to copy fake to real here.*/ if (dri2_dpy->flush != NULL) - dri2_dpy->flush->flush(dri2_surf->dri_drawable); + dri2_dpy->flush->flush(dri_drawable); return EGL_TRUE; } @@ -1346,10 +1408,10 @@ dri2_bind_tex_image(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, EGLint buffer) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); - struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); struct dri2_egl_context *dri2_ctx; _EGLContext *ctx; GLint format, target; + __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf); ctx = _eglGetCurrentContext(); dri2_ctx = dri2_egl_context(ctx); @@ -1357,7 +1419,7 @@ dri2_bind_tex_image(_EGLDriver *drv, if (!_eglBindTexImage(drv, disp, surf, buffer)) return EGL_FALSE; - switch (dri2_surf->base.TextureFormat) { + switch (surf->TextureFormat) { case EGL_TEXTURE_RGB: format = __DRI_TEXTURE_FORMAT_RGB; break; @@ -1369,7 +1431,7 @@ dri2_bind_tex_image(_EGLDriver *drv, format = __DRI_TEXTURE_FORMAT_RGBA; } - switch (dri2_surf->base.TextureTarget) { + switch (surf->TextureTarget) { case EGL_TEXTURE_2D: target = GL_TEXTURE_2D; break; @@ -1380,7 +1442,7 @@ dri2_bind_tex_image(_EGLDriver *drv, (*dri2_dpy->tex_buffer->setTexBuffer2)(dri2_ctx->dri_context, target, format, - dri2_surf->dri_drawable); + dri_drawable); return EGL_TRUE; } @@ -1390,10 +1452,10 @@ dri2_release_tex_image(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, EGLint buffer) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); - struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); struct dri2_egl_context *dri2_ctx; _EGLContext *ctx; GLint target; + __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf); ctx = _eglGetCurrentContext(); dri2_ctx = dri2_egl_context(ctx); @@ -1401,7 +1463,7 @@ dri2_release_tex_image(_EGLDriver *drv, if (!_eglReleaseTexImage(drv, disp, surf, buffer)) return EGL_FALSE; - switch (dri2_surf->base.TextureTarget) { + switch (surf->TextureTarget) { case EGL_TEXTURE_2D: target = GL_TEXTURE_2D; break; @@ -1413,7 +1475,7 @@ dri2_release_tex_image(_EGLDriver *drv, dri2_dpy->tex_buffer->releaseTexBuffer != NULL) { (*dri2_dpy->tex_buffer->releaseTexBuffer)(dri2_ctx->dri_context, target, - dri2_surf->dri_drawable); + dri_drawable); } return EGL_TRUE; diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 0e837b3eb8b..52ad92b182d 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -35,6 +35,10 @@ #include <xcb/dri2.h> #include <xcb/xfixes.h> #include <X11/Xlib-xcb.h> + +#ifdef HAVE_DRI3 +#include "loader_dri3_helper.h" +#endif #endif #ifdef HAVE_WAYLAND_PLATFORM @@ -145,6 +149,8 @@ struct dri2_egl_display_vtbl { EGLBoolean (*get_sync_values)(_EGLDisplay *display, _EGLSurface *surface, EGLuint64KHR *ust, EGLuint64KHR *msc, EGLuint64KHR *sbc); + + __DRIdrawable *(*get_dri_drawable)(_EGLSurface *surf); }; struct dri2_egl_display @@ -158,6 +164,7 @@ struct dri2_egl_display const __DRIconfig **driver_configs; void *driver; const __DRIcoreExtension *core; + const __DRIimageDriverExtension *image_driver; const __DRIdri2Extension *dri2; const __DRIswrastExtension *swrast; const __DRI2flushExtension *flush; @@ -190,6 +197,9 @@ struct dri2_egl_display #ifdef HAVE_X11_PLATFORM xcb_connection_t *conn; int screen; +#ifdef HAVE_DRI3 + struct loader_dri3_extensions loader_dri3_ext; +#endif #endif #ifdef HAVE_WAYLAND_PLATFORM @@ -203,8 +213,9 @@ struct dri2_egl_display int formats; uint32_t capabilities; int is_render_node; - int is_different_gpu; #endif + + int is_different_gpu; }; struct dri2_egl_context @@ -325,8 +336,14 @@ EGLBoolean dri2_load_driver_swrast(_EGLDisplay *disp); EGLBoolean +dri2_load_driver_dri3(_EGLDisplay *disp); + +EGLBoolean dri2_create_screen(_EGLDisplay *disp); +__DRIdrawable * +dri2_surface_get_dri_drawable(_EGLSurface *surf); + __DRIimage * dri2_lookup_egl_image(__DRIscreen *screen, void *image, void *data); diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index 4abe82f63a0..8f3abcb9867 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -650,6 +650,7 @@ static struct dri2_egl_display_vtbl droid_display_vtbl = { .query_buffer_age = dri2_fallback_query_buffer_age, .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image, .get_sync_values = dri2_fallback_get_sync_values, + .get_dri_drawable = dri2_surface_get_dri_drawable, }; EGLBoolean diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c index 815d2674cb2..3f4f7e78190 100644 --- a/src/egl/drivers/dri2/platform_drm.c +++ b/src/egl/drivers/dri2/platform_drm.c @@ -594,6 +594,7 @@ static struct dri2_egl_display_vtbl dri2_drm_display_vtbl = { .query_buffer_age = dri2_drm_query_buffer_age, .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image, .get_sync_values = dri2_fallback_get_sync_values, + .get_dri_drawable = dri2_surface_get_dri_drawable, }; EGLBoolean diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index a635c758da1..c2438f7509b 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -1025,6 +1025,7 @@ static struct dri2_egl_display_vtbl dri2_wl_display_vtbl = { .query_buffer_age = dri2_wl_query_buffer_age, .create_wayland_buffer_from_image = dri2_wl_create_wayland_buffer_from_image, .get_sync_values = dri2_fallback_get_sync_values, + .get_dri_drawable = dri2_surface_get_dri_drawable, }; static EGLBoolean @@ -1752,6 +1753,7 @@ static struct dri2_egl_display_vtbl dri2_wl_swrast_display_vtbl = { .query_buffer_age = dri2_fallback_query_buffer_age, .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image, .get_sync_values = dri2_fallback_get_sync_values, + .get_dri_drawable = dri2_surface_get_dri_drawable, }; static EGLBoolean diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index 88a06a8c6a8..08cbf2d8393 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -45,6 +45,10 @@ #include "egl_dri2_fallbacks.h" #include "loader.h" +#ifdef HAVE_DRI3 +#include "platform_x11_dri3.h" +#endif + static EGLBoolean dri2_x11_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, EGLint interval); @@ -703,7 +707,7 @@ dri2_x11_local_authenticate(_EGLDisplay *disp) static EGLBoolean dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy, - _EGLDisplay *disp) + _EGLDisplay *disp, bool supports_preserved) { xcb_screen_iterator_t s; xcb_depth_iterator_t d; @@ -724,8 +728,10 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy, surface_type = EGL_WINDOW_BIT | EGL_PIXMAP_BIT | - EGL_PBUFFER_BIT | - EGL_SWAP_BEHAVIOR_PRESERVED_BIT; + EGL_PBUFFER_BIT; + + if (supports_preserved) + surface_type |= EGL_SWAP_BEHAVIOR_PRESERVED_BIT; while (d.rem > 0) { EGLBoolean class_added[6] = { 0, }; @@ -1112,6 +1118,7 @@ static struct dri2_egl_display_vtbl dri2_x11_swrast_display_vtbl = { .query_buffer_age = dri2_fallback_query_buffer_age, .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image, .get_sync_values = dri2_fallback_get_sync_values, + .get_dri_drawable = dri2_surface_get_dri_drawable, }; static struct dri2_egl_display_vtbl dri2_x11_display_vtbl = { @@ -1130,6 +1137,7 @@ static struct dri2_egl_display_vtbl dri2_x11_display_vtbl = { .query_buffer_age = dri2_fallback_query_buffer_age, .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image, .get_sync_values = dri2_x11_get_sync_values, + .get_dri_drawable = dri2_surface_get_dri_drawable, }; static EGLBoolean @@ -1179,7 +1187,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp) if (!dri2_create_screen(disp)) goto cleanup_driver; - if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp)) + if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true)) goto cleanup_configs; /* Fill vtbl last to prevent accidentally calling virtual function during @@ -1250,6 +1258,100 @@ dri2_x11_setup_swap_interval(struct dri2_egl_display *dri2_dpy) } } +#ifdef HAVE_DRI3 +static EGLBoolean +dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp) +{ + struct dri2_egl_display *dri2_dpy; + + dri2_dpy = calloc(1, sizeof *dri2_dpy); + if (!dri2_dpy) + return _eglError(EGL_BAD_ALLOC, "eglInitialize"); + + disp->DriverData = (void *) dri2_dpy; + if (disp->PlatformDisplay == NULL) { + dri2_dpy->conn = xcb_connect(0, &dri2_dpy->screen); + dri2_dpy->own_device = true; + } else { + Display *dpy = disp->PlatformDisplay; + + dri2_dpy->conn = XGetXCBConnection(dpy); + dri2_dpy->screen = DefaultScreen(dpy); + } + + if (xcb_connection_has_error(dri2_dpy->conn)) { + _eglLog(_EGL_WARNING, "DRI3: xcb_connect failed"); + goto cleanup_dpy; + } + + if (dri2_dpy->conn) { + if (!dri3_x11_connect(dri2_dpy)) + goto cleanup_conn; + } + + if (!dri2_load_driver_dri3(disp)) + goto cleanup_conn; + + dri2_dpy->extensions[0] = &dri3_image_loader_extension.base; + dri2_dpy->extensions[1] = &use_invalidate.base; + dri2_dpy->extensions[2] = &image_lookup_extension.base; + dri2_dpy->extensions[3] = NULL; + + dri2_dpy->swap_available = true; + dri2_dpy->invalidate_available = true; + + if (!dri2_create_screen(disp)) + goto cleanup_fd; + + dri2_x11_setup_swap_interval(dri2_dpy); + + if (!dri2_dpy->is_different_gpu) + disp->Extensions.KHR_image_pixmap = EGL_TRUE; + disp->Extensions.NOK_texture_from_pixmap = EGL_TRUE; + disp->Extensions.CHROMIUM_sync_control = EGL_TRUE; + disp->Extensions.EXT_buffer_age = EGL_TRUE; + +#ifdef HAVE_WAYLAND_PLATFORM + disp->Extensions.WL_bind_wayland_display = EGL_TRUE; +#endif + + if (dri2_dpy->conn) { + if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false)) + goto cleanup_configs; + } + + dri2_dpy->loader_dri3_ext.core = dri2_dpy->core; + dri2_dpy->loader_dri3_ext.image_driver = dri2_dpy->image_driver; + dri2_dpy->loader_dri3_ext.flush = dri2_dpy->flush; + dri2_dpy->loader_dri3_ext.tex_buffer = dri2_dpy->tex_buffer; + dri2_dpy->loader_dri3_ext.image = dri2_dpy->image; + dri2_dpy->loader_dri3_ext.config = dri2_dpy->config; + + /* Fill vtbl last to prevent accidentally calling virtual function during + * initialization. + */ + dri2_dpy->vtbl = &dri3_x11_display_vtbl; + + _eglLog(_EGL_INFO, "Using DRI3"); + + return EGL_TRUE; + + cleanup_configs: + _eglCleanupDisplay(disp); + dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen); + dlclose(dri2_dpy->driver); + cleanup_fd: + close(dri2_dpy->fd); + cleanup_conn: + if (disp->PlatformDisplay == NULL) + xcb_disconnect(dri2_dpy->conn); + cleanup_dpy: + free(dri2_dpy); + + return EGL_FALSE; +} +#endif + static EGLBoolean dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp) { @@ -1321,7 +1423,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp) disp->Extensions.WL_bind_wayland_display = EGL_TRUE; #endif - if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp)) + if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true)) goto cleanup_configs; /* Fill vtbl last to prevent accidentally calling virtual function during @@ -1329,6 +1431,8 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp) */ dri2_dpy->vtbl = &dri2_x11_display_vtbl; + _eglLog(_EGL_INFO, "Using DRI2"); + return EGL_TRUE; cleanup_configs: @@ -1355,9 +1459,16 @@ dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp) int x11_dri2_accel = (getenv("LIBGL_ALWAYS_SOFTWARE") == NULL); if (x11_dri2_accel) { - if (!dri2_initialize_x11_dri2(drv, disp)) { - initialized = dri2_initialize_x11_swrast(drv, disp); +#ifdef HAVE_DRI3 + if (getenv("LIBGL_DRI3_DISABLE") != NULL || + !dri2_initialize_x11_dri3(drv, disp)) { +#endif + if (!dri2_initialize_x11_dri2(drv, disp)) { + initialized = dri2_initialize_x11_swrast(drv, disp); + } +#ifdef HAVE_DRI3 } +#endif } else { initialized = dri2_initialize_x11_swrast(drv, disp); } diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c new file mode 100644 index 00000000000..8e4a131b11a --- /dev/null +++ b/src/egl/drivers/dri2/platform_x11_dri3.c @@ -0,0 +1,547 @@ +/* + * Copyright © 2015 Boyan Ding + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <xcb/xcb.h> +#include <xcb/dri3.h> +#include <xcb/present.h> + +#include <xf86drm.h> + +#include "egl_dri2.h" +#include "egl_dri2_fallbacks.h" +#include "platform_x11_dri3.h" + +#include "loader.h" +#include "loader_dri3_helper.h" + +static struct dri3_egl_surface * +loader_drawable_to_egl_surface(struct loader_dri3_drawable *draw) { + size_t offset = offsetof(struct dri3_egl_surface, loader_drawable); + return (struct dri3_egl_surface *)(((void*) draw) - offset); +} + +static int +egl_dri3_get_swap_interval(struct loader_dri3_drawable *draw) +{ + struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw); + + return dri3_surf->base.SwapInterval; +} + +static int +egl_dri3_clamp_swap_interval(struct loader_dri3_drawable *draw, int interval) +{ + struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw); + + if (interval > dri3_surf->base.Config->MaxSwapInterval) + interval = dri3_surf->base.Config->MaxSwapInterval; + else if (interval < dri3_surf->base.Config->MinSwapInterval) + interval = dri3_surf->base.Config->MinSwapInterval; + + return interval; +} + +static void +egl_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval) +{ + struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw); + + dri3_surf->base.SwapInterval = interval; +} + +static void +egl_dri3_set_drawable_size(struct loader_dri3_drawable *draw, + int width, int height) +{ + struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw); + + dri3_surf->base.Width = width; + dri3_surf->base.Height = height; +} + +static bool +egl_dri3_in_current_context(struct loader_dri3_drawable *draw) +{ + struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw); + _EGLContext *ctx = _eglGetCurrentContext(); + + return ctx->Resource.Display == dri3_surf->base.Resource.Display; +} + +static __DRIcontext * +egl_dri3_get_dri_context(struct loader_dri3_drawable *draw) +{ + _EGLContext *ctx = _eglGetCurrentContext(); + struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); + + return dri2_ctx->dri_context; +} + +static void +egl_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags) +{ + struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw); + _EGLDisplay *disp = dri3_surf->base.Resource.Display; + + dri2_flush_drawable_for_swapbuffers(disp, &dri3_surf->base); +} + +static struct loader_dri3_vtable egl_dri3_vtable = { + .get_swap_interval = egl_dri3_get_swap_interval, + .clamp_swap_interval = egl_dri3_clamp_swap_interval, + .set_swap_interval = egl_dri3_set_swap_interval, + .set_drawable_size = egl_dri3_set_drawable_size, + .in_current_context = egl_dri3_in_current_context, + .get_dri_context = egl_dri3_get_dri_context, + .flush_drawable = egl_dri3_flush_drawable, + .show_fps = NULL, +}; + +static EGLBoolean +dri3_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) +{ + struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf); + + (void) drv; + + if (!_eglPutSurface(surf)) + return EGL_TRUE; + + loader_dri3_drawable_fini(&dri3_surf->loader_drawable); + + free(surf); + + return EGL_TRUE; +} + +static EGLBoolean +dri3_set_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, + EGLint interval) +{ + struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf); + + loader_dri3_set_swap_interval(&dri3_surf->loader_drawable, interval); + + return EGL_TRUE; +} + +static xcb_screen_t * +get_xcb_screen(xcb_screen_iterator_t iter, int screen) +{ + for (; iter.rem; --screen, xcb_screen_next(&iter)) + if (screen == 0) + return iter.data; + + return NULL; +} + +static _EGLSurface * +dri3_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, + _EGLConfig *conf, void *native_surface, + const EGLint *attrib_list) +{ + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + struct dri2_egl_config *dri2_conf = dri2_egl_config(conf); + struct dri3_egl_surface *dri3_surf; + const __DRIconfig *dri_config; + xcb_drawable_t drawable; + xcb_screen_iterator_t s; + xcb_screen_t *screen; + + STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface)); + drawable = (uintptr_t) native_surface; + + (void) drv; + + dri3_surf = calloc(1, sizeof *dri3_surf); + if (!dri3_surf) { + _eglError(EGL_BAD_ALLOC, "dri3_create_surface"); + return NULL; + } + + if (!_eglInitSurface(&dri3_surf->base, disp, type, conf, attrib_list)) + goto cleanup_surf; + + if (type == EGL_PBUFFER_BIT) { + s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn)); + screen = get_xcb_screen(s, dri2_dpy->screen); + if (!screen) { + _eglError(EGL_BAD_NATIVE_WINDOW, "dri3_create_surface"); + goto cleanup_surf; + } + + drawable = xcb_generate_id(dri2_dpy->conn); + xcb_create_pixmap(dri2_dpy->conn, conf->BufferSize, + drawable, screen->root, + dri3_surf->base.Width, dri3_surf->base.Height); + } + + dri_config = dri2_get_dri_config(dri2_conf, type, + dri3_surf->base.GLColorspace); + + if (loader_dri3_drawable_init(dri2_dpy->conn, drawable, + dri2_dpy->dri_screen, + dri2_dpy->is_different_gpu, dri_config, + &dri2_dpy->loader_dri3_ext, + &egl_dri3_vtable, + &dri3_surf->loader_drawable)) { + _eglError(EGL_BAD_ALLOC, "dri3_surface_create"); + goto cleanup_pixmap; + } + + return &dri3_surf->base; + + cleanup_pixmap: + if (type == EGL_PBUFFER_BIT) + xcb_free_pixmap(dri2_dpy->conn, drawable); + cleanup_surf: + free(dri3_surf); + + return NULL; +} + +/** + * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). + */ +static _EGLSurface * +dri3_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, + _EGLConfig *conf, void *native_window, + const EGLint *attrib_list) +{ + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + _EGLSurface *surf; + + surf = dri3_create_surface(drv, disp, EGL_WINDOW_BIT, conf, + native_window, attrib_list); + if (surf != NULL) + dri3_set_swap_interval(drv, disp, surf, dri2_dpy->default_swap_interval); + + return surf; +} + +static _EGLSurface * +dri3_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *disp, + _EGLConfig *conf, void *native_pixmap, + const EGLint *attrib_list) +{ + return dri3_create_surface(drv, disp, EGL_PIXMAP_BIT, conf, + native_pixmap, attrib_list); +} + +static _EGLSurface * +dri3_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *disp, + _EGLConfig *conf, const EGLint *attrib_list) +{ + return dri3_create_surface(drv, disp, EGL_PBUFFER_BIT, conf, + XCB_WINDOW_NONE, attrib_list); +} + +static EGLBoolean +dri3_get_sync_values(_EGLDisplay *display, _EGLSurface *surface, + EGLuint64KHR *ust, EGLuint64KHR *msc, + EGLuint64KHR *sbc) +{ + struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surface); + + return loader_dri3_wait_for_msc(&dri3_surf->loader_drawable, 0, 0, 0, + (int64_t *) ust, (int64_t *) msc, + (int64_t *) sbc) ? EGL_TRUE : EGL_FALSE; +} + +static _EGLImage * +dri3_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx, + EGLClientBuffer buffer, const EGLint *attr_list) +{ + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + struct dri2_egl_image *dri2_img; + xcb_drawable_t drawable; + xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie; + xcb_dri3_buffer_from_pixmap_reply_t *bp_reply; + unsigned int format; + + drawable = (xcb_drawable_t) (uintptr_t) buffer; + bp_cookie = xcb_dri3_buffer_from_pixmap(dri2_dpy->conn, drawable); + bp_reply = xcb_dri3_buffer_from_pixmap_reply(dri2_dpy->conn, + bp_cookie, NULL); + if (!bp_reply) { + _eglError(EGL_BAD_ALLOC, "xcb_dri3_buffer_from_pixmap"); + return NULL; + } + + switch (bp_reply->depth) { + case 16: + format = __DRI_IMAGE_FORMAT_RGB565; + break; + case 24: + format = __DRI_IMAGE_FORMAT_XRGB8888; + break; + case 32: + format = __DRI_IMAGE_FORMAT_ARGB8888; + break; + default: + _eglError(EGL_BAD_PARAMETER, + "dri3_create_image_khr: unsupported pixmap depth"); + free(bp_reply); + return EGL_NO_IMAGE_KHR; + } + + dri2_img = malloc(sizeof *dri2_img); + if (!dri2_img) { + _eglError(EGL_BAD_ALLOC, "dri3_create_image_khr"); + return EGL_NO_IMAGE_KHR; + } + + if (!_eglInitImage(&dri2_img->base, disp)) { + free(dri2_img); + return EGL_NO_IMAGE_KHR; + } + + dri2_img->dri_image = loader_dri3_create_image(dri2_dpy->conn, + bp_reply, + format, + dri2_dpy->dri_screen, + dri2_dpy->image, + dri2_img); + + free(bp_reply); + + return &dri2_img->base; +} + +static _EGLImage * +dri3_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp, + _EGLContext *ctx, EGLenum target, + EGLClientBuffer buffer, const EGLint *attr_list) +{ + (void) drv; + + switch (target) { + case EGL_NATIVE_PIXMAP_KHR: + return dri3_create_image_khr_pixmap(disp, ctx, buffer, attr_list); + default: + return dri2_create_image_khr(drv, disp, ctx, target, buffer, attr_list); + } +} + +/** + * Called by the driver when it needs to update the real front buffer with the + * contents of its fake front buffer. + */ +static void +dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate) +{ + /* There does not seem to be any kind of consensus on whether we should + * support front-buffer rendering or not: + * http://lists.freedesktop.org/archives/mesa-dev/2013-June/040129.html + */ + _eglLog(_EGL_WARNING, "FIXME: egl/x11 doesn't support front buffer rendering."); + (void) driDrawable; + (void) loaderPrivate; +} + +const __DRIimageLoaderExtension dri3_image_loader_extension = { + .base = { __DRI_IMAGE_LOADER, 1 }, + + .getBuffers = loader_dri3_get_buffers, + .flushFrontBuffer = dri3_flush_front_buffer, +}; + +static EGLBoolean +dri3_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) +{ + struct dri3_egl_surface *dri3_surf = dri3_egl_surface(draw); + + /* No-op for a pixmap or pbuffer surface */ + if (draw->Type == EGL_PIXMAP_BIT || draw->Type == EGL_PBUFFER_BIT) + return 0; + + return loader_dri3_swap_buffers_msc(&dri3_surf->loader_drawable, + 0, 0, 0, 0, + draw->SwapBehavior == EGL_BUFFER_PRESERVED) != -1; +} + +static EGLBoolean +dri3_copy_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, + void *native_pixmap_target) +{ + struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf); + xcb_pixmap_t target; + + STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_pixmap_target)); + target = (uintptr_t) native_pixmap_target; + + loader_dri3_copy_drawable(&dri3_surf->loader_drawable, target, + dri3_surf->loader_drawable.drawable); + + return EGL_TRUE; +} + +static int +dri3_query_buffer_age(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf) +{ + struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf); + + return loader_dri3_query_buffer_age(&dri3_surf->loader_drawable); +} + +static __DRIdrawable * +dri3_get_dri_drawable(_EGLSurface *surf) +{ + struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf); + + return dri3_surf->loader_drawable.dri_drawable; +} + +struct dri2_egl_display_vtbl dri3_x11_display_vtbl = { + .authenticate = NULL, + .create_window_surface = dri3_create_window_surface, + .create_pixmap_surface = dri3_create_pixmap_surface, + .create_pbuffer_surface = dri3_create_pbuffer_surface, + .destroy_surface = dri3_destroy_surface, + .create_image = dri3_create_image_khr, + .swap_interval = dri3_set_swap_interval, + .swap_buffers = dri3_swap_buffers, + .swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage, + .swap_buffers_region = dri2_fallback_swap_buffers_region, + .post_sub_buffer = dri2_fallback_post_sub_buffer, + .copy_buffers = dri3_copy_buffers, + .query_buffer_age = dri3_query_buffer_age, + .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image, + .get_sync_values = dri3_get_sync_values, + .get_dri_drawable = dri3_get_dri_drawable, +}; + +static char * +dri3_get_device_name(int fd) +{ + char *ret = NULL; + + ret = drmGetRenderDeviceNameFromFd(fd); + if (ret) + return ret; + + /* For dri3, render node support is required for WL_bind_wayland_display. + * In order not to regress on older systems without kernel or libdrm + * support, fall back to dri2. User can override it with environment + * variable if they don't need to use that extension. + */ + if (getenv("EGL_FORCE_DRI3") == NULL) { + _eglLog(_EGL_WARNING, "Render node support not available, falling back to dri2"); + _eglLog(_EGL_WARNING, "If you want to force dri3, set EGL_FORCE_DRI3 environment variable"); + } else + ret = loader_get_device_name_for_fd(fd); + + return ret; +} + +EGLBoolean +dri3_x11_connect(struct dri2_egl_display *dri2_dpy) +{ + xcb_dri3_query_version_reply_t *dri3_query; + xcb_dri3_query_version_cookie_t dri3_query_cookie; + xcb_present_query_version_reply_t *present_query; + xcb_present_query_version_cookie_t present_query_cookie; + xcb_generic_error_t *error; + xcb_screen_iterator_t s; + xcb_screen_t *screen; + const xcb_query_extension_reply_t *extension; + + xcb_prefetch_extension_data (dri2_dpy->conn, &xcb_dri3_id); + xcb_prefetch_extension_data (dri2_dpy->conn, &xcb_present_id); + + extension = xcb_get_extension_data(dri2_dpy->conn, &xcb_dri3_id); + if (!(extension && extension->present)) + return EGL_FALSE; + + extension = xcb_get_extension_data(dri2_dpy->conn, &xcb_present_id); + if (!(extension && extension->present)) + return EGL_FALSE; + + dri3_query_cookie = xcb_dri3_query_version(dri2_dpy->conn, + XCB_DRI3_MAJOR_VERSION, + XCB_DRI3_MINOR_VERSION); + + present_query_cookie = xcb_present_query_version(dri2_dpy->conn, + XCB_PRESENT_MAJOR_VERSION, + XCB_PRESENT_MINOR_VERSION); + + dri3_query = + xcb_dri3_query_version_reply(dri2_dpy->conn, dri3_query_cookie, &error); + if (dri3_query == NULL || error != NULL) { + _eglLog(_EGL_WARNING, "DRI3: failed to query the version"); + free(dri3_query); + free(error); + return EGL_FALSE; + } + free(dri3_query); + + present_query = + xcb_present_query_version_reply(dri2_dpy->conn, + present_query_cookie, &error); + if (present_query == NULL || error != NULL) { + _eglLog(_EGL_WARNING, "DRI3: failed to query Present version"); + free(present_query); + free(error); + return EGL_FALSE; + } + free(present_query); + + s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn)); + screen = get_xcb_screen(s, dri2_dpy->screen); + if (!screen) { + _eglError(EGL_BAD_NATIVE_WINDOW, "dri3_x11_connect"); + return EGL_FALSE; + } + + dri2_dpy->fd = loader_dri3_open(dri2_dpy->conn, screen->root, 0); + if (dri2_dpy->fd < 0) { + int conn_error = xcb_connection_has_error(dri2_dpy->conn); + _eglLog(_EGL_WARNING, "DRI3: Screen seems not DRI3 capable"); + + if (conn_error) + _eglLog(_EGL_WARNING, "DRI3: Failed to initialize"); + + return EGL_FALSE; + } + + dri2_dpy->fd = loader_get_user_preferred_fd(dri2_dpy->fd, &dri2_dpy->is_different_gpu); + + dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0); + if (!dri2_dpy->driver_name) { + _eglLog(_EGL_WARNING, "DRI3: No driver found"); + close(dri2_dpy->fd); + return EGL_FALSE; + } + + dri2_dpy->device_name = dri3_get_device_name(dri2_dpy->fd); + if (!dri2_dpy->device_name) { + close(dri2_dpy->fd); + return EGL_FALSE; + } + + return EGL_TRUE; +} diff --git a/src/egl/drivers/dri2/platform_x11_dri3.h b/src/egl/drivers/dri2/platform_x11_dri3.h new file mode 100644 index 00000000000..13d85724288 --- /dev/null +++ b/src/egl/drivers/dri2/platform_x11_dri3.h @@ -0,0 +1,41 @@ +/* + * Copyright © 2015 Boyan Ding + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#ifndef EGL_X11_DRI3_INCLUDED +#define EGL_X11_DRI3_INCLUDED + +#include "egl_dri2.h" + +_EGL_DRIVER_TYPECAST(dri3_egl_surface, _EGLSurface, obj) + +struct dri3_egl_surface { + _EGLSurface base; + struct loader_dri3_drawable loader_drawable; +}; + +extern const __DRIimageLoaderExtension dri3_image_loader_extension; +extern struct dri2_egl_display_vtbl dri3_x11_display_vtbl; + +EGLBoolean +dri3_x11_connect(struct dri2_egl_display *dri2_dpy); + +#endif diff --git a/src/egl/egl-symbols-check b/src/egl/egl-symbols-check new file mode 100755 index 00000000000..5d46fed57c9 --- /dev/null +++ b/src/egl/egl-symbols-check @@ -0,0 +1,55 @@ +#!/bin/bash + +FUNCS=$(nm -D --defined-only ${1-.libs/libEGL.so} | grep -o "T .*" | cut -c 3- | while read func; do +( grep -q "^$func$" || echo $func ) <<EOF +eglBindAPI +eglBindTexImage +eglChooseConfig +eglClientWaitSync +eglCopyBuffers +eglCreateContext +eglCreateImage +eglCreatePbufferFromClientBuffer +eglCreatePbufferSurface +eglCreatePixmapSurface +eglCreatePlatformPixmapSurface +eglCreatePlatformWindowSurface +eglCreateSync +eglCreateWindowSurface +eglDestroyContext +eglDestroyImage +eglDestroySurface +eglDestroySync +eglGetConfigAttrib +eglGetConfigs +eglGetCurrentContext +eglGetCurrentDisplay +eglGetCurrentSurface +eglGetDisplay +eglGetError +eglGetPlatformDisplay +eglGetProcAddress +eglGetSyncAttrib +eglInitialize +eglMakeCurrent +eglQueryAPI +eglQueryContext +eglQueryString +eglQuerySurface +eglReleaseTexImage +eglReleaseThread +eglSurfaceAttrib +eglSwapBuffers +eglSwapInterval +eglTerminate +eglWaitClient +eglWaitGL +eglWaitNative +eglWaitSync +_fini +_init +EOF +done) + +test ! -n "$FUNCS" || echo $FUNCS +test ! -n "$FUNCS" diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk index 39e064e9538..b406d4a5480 100644 --- a/src/gallium/Android.mk +++ b/src/gallium/Android.mk @@ -27,6 +27,7 @@ GALLIUM_TOP := $(call my-dir) GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk SUBDIRS := auxiliary +SUBDIRS += auxiliary/pipe-loader # # Gallium drivers and their respective winsys diff --git a/src/gallium/Automake.inc b/src/gallium/Automake.inc index ee07ab6c8f9..6fe2e22fecf 100644 --- a/src/gallium/Automake.inc +++ b/src/gallium/Automake.inc @@ -67,3 +67,9 @@ if HAVE_DRISW GALLIUM_PIPE_LOADER_WINSYS_LIBS += \ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la endif + +if HAVE_DRISW_KMS +GALLIUM_PIPE_LOADER_WINSYS_LIBS += \ + $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \ + $(LIBDRM_LIBS) +endif diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am index 611d55fafe2..e42a8f17703 100644 --- a/src/gallium/Makefile.am +++ b/src/gallium/Makefile.am @@ -5,6 +5,7 @@ SUBDIRS = ## SUBDIRS += auxiliary +SUBDIRS += auxiliary/pipe-loader ## ## Gallium pipe drivers and their respective winsys' @@ -98,7 +99,7 @@ if HAVE_DRISW SUBDIRS += winsys/sw/dri endif -if HAVE_DRI2 +if HAVE_DRISW_KMS SUBDIRS += winsys/sw/kms-dri endif @@ -120,7 +121,8 @@ EXTRA_DIST = \ ## Gallium state trackers and their users (targets) ## -if HAVE_LOADER_GALLIUM +## XXX: Rename the conditional once we have a config switch for static/dynamic pipe-drivers +if HAVE_CLOVER SUBDIRS += targets/pipe-loader endif diff --git a/src/gallium/SConscript b/src/gallium/SConscript index fa5fa6e8734..0c3a3742c16 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -5,6 +5,7 @@ Import('env') # SConscript('auxiliary/SConscript') +SConscript('auxiliary/pipe-loader/SConscript') # # Drivers diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am index a728162bd9d..ee296ceda33 100644 --- a/src/gallium/auxiliary/Makefile.am +++ b/src/gallium/auxiliary/Makefile.am @@ -1,7 +1,3 @@ -if HAVE_LOADER_GALLIUM -SUBDIRS := pipe-loader -endif - include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc @@ -66,15 +62,7 @@ COMMON_VL_CFLAGS = \ $(AM_CFLAGS) \ $(VL_CFLAGS) \ $(DRI2PROTO_CFLAGS) \ - $(LIBDRM_CFLAGS) \ - $(GALLIUM_PIPE_LOADER_DEFINES) \ - -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" - -if HAVE_GALLIUM_STATIC_TARGETS -COMMON_VL_CFLAGS += \ - -DGALLIUM_STATIC_TARGETS=1 - -endif # HAVE_GALLIUM_STATIC_TARGETS + $(LIBDRM_CFLAGS) noinst_LTLIBRARIES += libgalliumvl.la diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 82ef5ecfce4..61601920a94 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -219,8 +219,6 @@ C_SOURCES := \ util/u_format.h \ util/u_format_etc.c \ util/u_format_etc.h \ - util/u_format_fake.c \ - util/u_format_fake.h \ util/u_format_latc.c \ util/u_format_latc.h \ util/u_format_other.c \ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 7bda1184ee9..3ee708f4fad 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -536,6 +536,15 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #if defined(PIPE_ARCH_PPC) MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); +#if HAVE_LLVM >= 0x0304 + /* + * Make sure VSX instructions are disabled + * See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7 + */ + if (util_cpu_caps.has_altivec) { + MAttrs.push_back("-vsx"); + } +#endif #endif builder.setMAttrs(MAttrs); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 7d2cd9a9e73..28c7a86316e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -2608,7 +2608,12 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, params.type = bld->bld_base.base.type; params.sample_key = sample_key; params.texture_index = unit; - params.sampler_index = unit; + /* + * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS + * and trigger some assertions with d3d10 where the sampler view number + * can exceed this. + */ + params.sampler_index = 0; params.context_ptr = bld->context_ptr; params.thread_data_ptr = bld->thread_data_ptr; params.coords = coords; diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c index ffe30b8fa79..efceb85e38d 100644 --- a/src/gallium/auxiliary/hud/hud_context.c +++ b/src/gallium/auxiliary/hud/hud_context.c @@ -33,6 +33,7 @@ * Set GALLIUM_HUD=help for more info. */ +#include <signal.h> #include <stdio.h> #include "hud/hud_context.h" @@ -51,12 +52,15 @@ #include "tgsi/tgsi_text.h" #include "tgsi/tgsi_dump.h" +/* Control the visibility of all HUD contexts */ +static boolean huds_visible = TRUE; struct hud_context { struct pipe_context *pipe; struct cso_context *cso; struct u_upload_mgr *uploader; + struct hud_batch_query_context *batch_query; struct list_head pane_list; /* states */ @@ -95,6 +99,13 @@ struct hud_context { } text, bg, whitelines; }; +#ifdef PIPE_OS_UNIX +static void +signal_visible_handler(int sig, siginfo_t *siginfo, void *context) +{ + huds_visible = !huds_visible; +} +#endif static void hud_draw_colored_prims(struct hud_context *hud, unsigned prim, @@ -441,6 +452,9 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex) struct hud_pane *pane; struct hud_graph *gr; + if (!huds_visible) + return; + hud->fb_width = tex->width0; hud->fb_height = tex->height0; hud->constants.two_div_fb_width = 2.0f / hud->fb_width; @@ -510,6 +524,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex) hud_alloc_vertices(hud, &hud->text, 4 * 512, 4 * sizeof(float)); /* prepare all graphs */ + hud_batch_query_update(hud->batch_query); + LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) { LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) { gr->query_new_value(gr); @@ -903,17 +919,21 @@ hud_parse_env_var(struct hud_context *hud, const char *env) } else if (strcmp(name, "samples-passed") == 0 && has_occlusion_query(hud->pipe->screen)) { - hud_pipe_query_install(pane, hud->pipe, "samples-passed", + hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, + "samples-passed", PIPE_QUERY_OCCLUSION_COUNTER, 0, 0, PIPE_DRIVER_QUERY_TYPE_UINT64, - PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE); + PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE, + 0); } else if (strcmp(name, "primitives-generated") == 0 && has_streamout(hud->pipe->screen)) { - hud_pipe_query_install(pane, hud->pipe, "primitives-generated", + hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, + "primitives-generated", PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0, PIPE_DRIVER_QUERY_TYPE_UINT64, - PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE); + PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE, + 0); } else { boolean processed = FALSE; @@ -938,17 +958,19 @@ hud_parse_env_var(struct hud_context *hud, const char *env) if (strcmp(name, pipeline_statistics_names[i]) == 0) break; if (i < Elements(pipeline_statistics_names)) { - hud_pipe_query_install(pane, hud->pipe, name, + hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, name, PIPE_QUERY_PIPELINE_STATISTICS, i, 0, PIPE_DRIVER_QUERY_TYPE_UINT64, - PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE); + PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE, + 0); processed = TRUE; } } /* driver queries */ if (!processed) { - if (!hud_driver_query_install(pane, hud->pipe, name)){ + if (!hud_driver_query_install(&hud->batch_query, pane, hud->pipe, + name)) { fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", name); } } @@ -1125,6 +1147,12 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso) struct pipe_sampler_view view_templ; unsigned i; const char *env = debug_get_option("GALLIUM_HUD", NULL); + unsigned signo = debug_get_num_option("GALLIUM_HUD_TOGGLE_SIGNAL", 0); +#ifdef PIPE_OS_UNIX + static boolean sig_handled = FALSE; + struct sigaction action = {}; +#endif + huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", TRUE); if (!env || !*env) return NULL; @@ -1267,6 +1295,22 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso) LIST_INITHEAD(&hud->pane_list); + /* setup sig handler once for all hud contexts */ +#ifdef PIPE_OS_UNIX + if (!sig_handled && signo != 0) { + action.sa_sigaction = &signal_visible_handler; + action.sa_flags = SA_SIGINFO; + + if (signo >= NSIG) + fprintf(stderr, "gallium_hud: invalid signal %u\n", signo); + else if (sigaction(signo, &action, NULL) < 0) + fprintf(stderr, "gallium_hud: unable to set handler for signal %u\n", signo); + fflush(stderr); + + sig_handled = TRUE; + } +#endif + hud_parse_env_var(hud, env); return hud; } @@ -1287,6 +1331,7 @@ hud_destroy(struct hud_context *hud) FREE(pane); } + hud_batch_query_cleanup(&hud->batch_query); pipe->delete_fs_state(pipe, hud->fs_color); pipe->delete_fs_state(pipe, hud->fs_text); pipe->delete_vs_state(pipe, hud->vs); diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c b/src/gallium/auxiliary/hud/hud_driver_query.c index f14305ea835..d7b1f11ed56 100644 --- a/src/gallium/auxiliary/hud/hud_driver_query.c +++ b/src/gallium/auxiliary/hud/hud_driver_query.c @@ -34,13 +34,164 @@ #include "hud/hud_private.h" #include "pipe/p_screen.h" #include "os/os_time.h" +#include "util/u_math.h" #include "util/u_memory.h" #include <stdio.h> +// Must be a power of two #define NUM_QUERIES 8 +struct hud_batch_query_context { + struct pipe_context *pipe; + unsigned num_query_types; + unsigned allocated_query_types; + unsigned *query_types; + + boolean failed; + struct pipe_query *query[NUM_QUERIES]; + union pipe_query_result *result[NUM_QUERIES]; + unsigned head, pending, results; +}; + +void +hud_batch_query_update(struct hud_batch_query_context *bq) +{ + struct pipe_context *pipe; + + if (!bq || bq->failed) + return; + + pipe = bq->pipe; + + if (bq->query[bq->head]) + pipe->end_query(pipe, bq->query[bq->head]); + + bq->results = 0; + + while (bq->pending) { + unsigned idx = (bq->head - bq->pending + 1) % NUM_QUERIES; + struct pipe_query *query = bq->query[idx]; + + if (!bq->result[idx]) + bq->result[idx] = MALLOC(sizeof(bq->result[idx]->batch[0]) * + bq->num_query_types); + if (!bq->result[idx]) { + fprintf(stderr, "gallium_hud: out of memory.\n"); + bq->failed = TRUE; + return; + } + + if (!pipe->get_query_result(pipe, query, FALSE, bq->result[idx])) + break; + + ++bq->results; + --bq->pending; + } + + bq->head = (bq->head + 1) % NUM_QUERIES; + + if (bq->pending == NUM_QUERIES) { + fprintf(stderr, + "gallium_hud: all queries busy after %i frames, dropping data.\n", + NUM_QUERIES); + + assert(bq->query[bq->head]); + + pipe->destroy_query(bq->pipe, bq->query[bq->head]); + bq->query[bq->head] = NULL; + } + + ++bq->pending; + + if (!bq->query[bq->head]) { + bq->query[bq->head] = pipe->create_batch_query(pipe, + bq->num_query_types, + bq->query_types); + + if (!bq->query[bq->head]) { + fprintf(stderr, + "gallium_hud: create_batch_query failed. You may have " + "selected too many or incompatible queries.\n"); + bq->failed = TRUE; + return; + } + } + + if (!pipe->begin_query(pipe, bq->query[bq->head])) { + fprintf(stderr, + "gallium_hud: could not begin batch query. You may have " + "selected too many or incompatible queries.\n"); + bq->failed = TRUE; + } +} + +static boolean +batch_query_add(struct hud_batch_query_context **pbq, + struct pipe_context *pipe, unsigned query_type, + unsigned *result_index) +{ + struct hud_batch_query_context *bq = *pbq; + unsigned i; + + if (!bq) { + bq = CALLOC_STRUCT(hud_batch_query_context); + if (!bq) + return false; + bq->pipe = pipe; + *pbq = bq; + } + + for (i = 0; i < bq->num_query_types; ++i) { + if (bq->query_types[i] == query_type) { + *result_index = i; + return true; + } + } + + if (bq->num_query_types == bq->allocated_query_types) { + unsigned new_alloc = MAX2(16, bq->allocated_query_types * 2); + unsigned *new_query_types + = REALLOC(bq->query_types, + bq->allocated_query_types * sizeof(unsigned), + new_alloc * sizeof(unsigned)); + if (!new_query_types) + return false; + bq->query_types = new_query_types; + bq->allocated_query_types = new_alloc; + } + + bq->query_types[bq->num_query_types] = query_type; + *result_index = bq->num_query_types++; + return true; +} + +void +hud_batch_query_cleanup(struct hud_batch_query_context **pbq) +{ + struct hud_batch_query_context *bq = *pbq; + unsigned idx; + + if (!bq) + return; + + *pbq = NULL; + + if (bq->query[bq->head] && !bq->failed) + bq->pipe->end_query(bq->pipe, bq->query[bq->head]); + + for (idx = 0; idx < NUM_QUERIES; ++idx) { + if (bq->query[idx]) + bq->pipe->destroy_query(bq->pipe, bq->query[idx]); + FREE(bq->result[idx]); + } + + FREE(bq->query_types); + FREE(bq); +} + struct query_info { struct pipe_context *pipe; + struct hud_batch_query_context *batch; unsigned query_type; unsigned result_index; /* unit depends on query_type */ enum pipe_driver_query_result_type result_type; @@ -48,7 +199,6 @@ struct query_info { /* Ring of queries. If a query is busy, we use another slot. */ struct pipe_query *query[NUM_QUERIES]; unsigned head, tail; - unsigned num_queries; uint64_t last_time; uint64_t results_cumulative; @@ -56,11 +206,26 @@ struct query_info { }; static void -query_new_value(struct hud_graph *gr) +query_new_value_batch(struct query_info *info) +{ + struct hud_batch_query_context *bq = info->batch; + unsigned result_index = info->result_index; + unsigned idx = (bq->head - bq->pending) % NUM_QUERIES; + unsigned results = bq->results; + + while (results) { + info->results_cumulative += bq->result[idx]->batch[result_index].u64; + ++info->num_results; + + --results; + idx = (idx - 1) % NUM_QUERIES; + } +} + +static void +query_new_value_normal(struct query_info *info) { - struct query_info *info = gr->query_data; struct pipe_context *pipe = info->pipe; - uint64_t now = os_time_get(); if (info->last_time) { if (info->query[info->head]) @@ -107,30 +272,9 @@ query_new_value(struct hud_graph *gr) break; } } - - if (info->num_results && info->last_time + gr->pane->period <= now) { - uint64_t value; - - switch (info->result_type) { - default: - case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE: - value = info->results_cumulative / info->num_results; - break; - case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE: - value = info->results_cumulative; - break; - } - - hud_graph_add_value(gr, value); - - info->last_time = now; - info->results_cumulative = 0; - info->num_results = 0; - } } else { /* initialize */ - info->last_time = now; info->query[info->head] = pipe->create_query(pipe, info->query_type, 0); } @@ -139,11 +283,49 @@ query_new_value(struct hud_graph *gr) } static void +query_new_value(struct hud_graph *gr) +{ + struct query_info *info = gr->query_data; + uint64_t now = os_time_get(); + + if (info->batch) { + query_new_value_batch(info); + } else { + query_new_value_normal(info); + } + + if (!info->last_time) { + info->last_time = now; + return; + } + + if (info->num_results && info->last_time + gr->pane->period <= now) { + uint64_t value; + + switch (info->result_type) { + default: + case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE: + value = info->results_cumulative / info->num_results; + break; + case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE: + value = info->results_cumulative; + break; + } + + hud_graph_add_value(gr, value); + + info->last_time = now; + info->results_cumulative = 0; + info->num_results = 0; + } +} + +static void free_query_info(void *ptr) { struct query_info *info = ptr; - if (info->last_time) { + if (!info->batch && info->last_time) { struct pipe_context *pipe = info->pipe; int i; @@ -159,11 +341,13 @@ free_query_info(void *ptr) } void -hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe, +hud_pipe_query_install(struct hud_batch_query_context **pbq, + struct hud_pane *pane, struct pipe_context *pipe, const char *name, unsigned query_type, unsigned result_index, uint64_t max_value, enum pipe_driver_query_type type, - enum pipe_driver_query_result_type result_type) + enum pipe_driver_query_result_type result_type, + unsigned flags) { struct hud_graph *gr; struct query_info *info; @@ -175,28 +359,40 @@ hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe, strncpy(gr->name, name, sizeof(gr->name)); gr->name[sizeof(gr->name) - 1] = '\0'; gr->query_data = CALLOC_STRUCT(query_info); - if (!gr->query_data) { - FREE(gr); - return; - } + if (!gr->query_data) + goto fail_gr; gr->query_new_value = query_new_value; gr->free_query_data = free_query_info; info = gr->query_data; info->pipe = pipe; - info->query_type = query_type; - info->result_index = result_index; info->result_type = result_type; + if (flags & PIPE_DRIVER_QUERY_FLAG_BATCH) { + if (!batch_query_add(pbq, pipe, query_type, &info->result_index)) + goto fail_info; + info->batch = *pbq; + } else { + info->query_type = query_type; + info->result_index = result_index; + } + hud_pane_add_graph(pane, gr); if (pane->max_value < max_value) hud_pane_set_max_value(pane, max_value); pane->type = type; + return; + +fail_info: + FREE(info); +fail_gr: + FREE(gr); } boolean -hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe, +hud_driver_query_install(struct hud_batch_query_context **pbq, + struct hud_pane *pane, struct pipe_context *pipe, const char *name) { struct pipe_screen *screen = pipe->screen; @@ -220,8 +416,9 @@ hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe, if (!found) return FALSE; - hud_pipe_query_install(pane, pipe, query.name, query.query_type, 0, - query.max_value.u64, query.type, query.result_type); + hud_pipe_query_install(pbq, pane, pipe, query.name, query.query_type, 0, + query.max_value.u64, query.type, query.result_type, + query.flags); return TRUE; } diff --git a/src/gallium/auxiliary/hud/hud_private.h b/src/gallium/auxiliary/hud/hud_private.h index 01caf7b8b2c..4a788bba456 100644 --- a/src/gallium/auxiliary/hud/hud_private.h +++ b/src/gallium/auxiliary/hud/hud_private.h @@ -80,19 +80,26 @@ void hud_pane_set_max_value(struct hud_pane *pane, uint64_t value); void hud_graph_add_value(struct hud_graph *gr, uint64_t value); /* graphs/queries */ +struct hud_batch_query_context; + #define ALL_CPUS ~0 /* optionally set as cpu_index */ int hud_get_num_cpus(void); void hud_fps_graph_install(struct hud_pane *pane); void hud_cpu_graph_install(struct hud_pane *pane, unsigned cpu_index); -void hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe, +void hud_pipe_query_install(struct hud_batch_query_context **pbq, + struct hud_pane *pane, struct pipe_context *pipe, const char *name, unsigned query_type, unsigned result_index, uint64_t max_value, enum pipe_driver_query_type type, - enum pipe_driver_query_result_type result_type); -boolean hud_driver_query_install(struct hud_pane *pane, + enum pipe_driver_query_result_type result_type, + unsigned flags); +boolean hud_driver_query_install(struct hud_batch_query_context **pbq, + struct hud_pane *pane, struct pipe_context *pipe, const char *name); +void hud_batch_query_update(struct hud_batch_query_context *bq); +void hud_batch_query_cleanup(struct hud_batch_query_context **pbq); #endif diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 0539cfc16a1..86c2ffadbc8 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -295,7 +295,7 @@ ttn_emit_declaration(struct ttn_compile *c) type = nir_type_int; break; case TGSI_RETURN_TYPE_UINT: - type = nir_type_unsigned; + type = nir_type_uint; break; case TGSI_RETURN_TYPE_FLOAT: default: @@ -1239,6 +1239,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) op = nir_texop_tex; num_srcs = 1; break; + case TGSI_OPCODE_TEX2: + op = nir_texop_tex; + num_srcs = 1; + samp = 2; + break; case TGSI_OPCODE_TXP: op = nir_texop_tex; num_srcs = 2; @@ -1275,6 +1280,10 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) num_srcs = 3; samp = 3; break; + case TGSI_OPCODE_LODQ: + op = nir_texop_lod; + num_srcs = 1; + break; default: fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode); @@ -1327,7 +1336,9 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) */ sview = instr->sampler_index; - if (sview < c->num_samp_types) { + if (op == nir_texop_lod) { + instr->dest_type = nir_type_float; + } else if (sview < c->num_samp_types) { instr->dest_type = c->samp_types[sview]; } else { instr->dest_type = nir_type_float; @@ -1394,10 +1405,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) } if (instr->is_shadow) { - if (instr->coord_components < 3) - instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z)); - else + if (instr->coord_components == 4) + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X)); + else if (instr->coord_components == 3) instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W)); + else + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z)); instr->src[src_number].src_type = nir_tex_src_comparitor; src_number++; @@ -1641,7 +1654,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high, [TGSI_OPCODE_TG4] = 0, - [TGSI_OPCODE_LODQ] = 0, /* XXX */ + [TGSI_OPCODE_LODQ] = 0, [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract, [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract, @@ -1650,7 +1663,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_POPC] = nir_op_bit_count, [TGSI_OPCODE_LSB] = nir_op_find_lsb, [TGSI_OPCODE_IMSB] = nir_op_ifind_msb, - [TGSI_OPCODE_UMSB] = nir_op_ifind_msb, /* XXX: signed vs unsigned */ + [TGSI_OPCODE_UMSB] = nir_op_ufind_msb, [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */ [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */ @@ -1803,11 +1816,13 @@ ttn_emit_instruction(struct ttn_compile *c) case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TEX2: case TGSI_OPCODE_TXL2: case TGSI_OPCODE_TXB2: case TGSI_OPCODE_TXQ_LZ: case TGSI_OPCODE_TXF: case TGSI_OPCODE_TG4: + case TGSI_OPCODE_LODQ: ttn_tex(c, dest, src); break; diff --git a/src/gallium/auxiliary/os/os_process.c b/src/gallium/auxiliary/os/os_process.c index a6262283d87..d2dcd0d7fbc 100644 --- a/src/gallium/auxiliary/os/os_process.c +++ b/src/gallium/auxiliary/os/os_process.c @@ -54,37 +54,48 @@ boolean os_get_process_name(char *procname, size_t size) { const char *name; + + /* First, check if the GALLIUM_PROCESS_NAME env var is set to + * override the normal process name query. + */ + name = os_get_option("GALLIUM_PROCESS_NAME"); + + if (!name) { + /* do normal query */ + #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) - char szProcessPath[MAX_PATH]; - char *lpProcessName; - char *lpProcessExt; + char szProcessPath[MAX_PATH]; + char *lpProcessName; + char *lpProcessExt; - GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath)); + GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath)); - lpProcessName = strrchr(szProcessPath, '\\'); - lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath; + lpProcessName = strrchr(szProcessPath, '\\'); + lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath; - lpProcessExt = strrchr(lpProcessName, '.'); - if (lpProcessExt) { - *lpProcessExt = '\0'; - } + lpProcessExt = strrchr(lpProcessName, '.'); + if (lpProcessExt) { + *lpProcessExt = '\0'; + } - name = lpProcessName; + name = lpProcessName; #elif defined(__GLIBC__) || defined(__CYGWIN__) - name = program_invocation_short_name; + name = program_invocation_short_name; #elif defined(PIPE_OS_BSD) || defined(PIPE_OS_APPLE) - /* *BSD and OS X */ - name = getprogname(); + /* *BSD and OS X */ + name = getprogname(); #elif defined(PIPE_OS_HAIKU) - image_info info; - get_image_info(B_CURRENT_TEAM, &info); - name = info.name; + image_info info; + get_image_info(B_CURRENT_TEAM, &info); + name = info.name; #else #warning unexpected platform in os_process.c - return FALSE; + return FALSE; #endif + } + assert(size > 0); assert(procname); diff --git a/src/gallium/auxiliary/pipe-loader/Android.mk b/src/gallium/auxiliary/pipe-loader/Android.mk new file mode 100644 index 00000000000..27893137a1a --- /dev/null +++ b/src/gallium/auxiliary/pipe-loader/Android.mk @@ -0,0 +1,49 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2015 Emil Velikov <[email protected]> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# NOTE: Currently we build only a 'static' pipe-loader +LOCAL_PATH := $(call my-dir) + +# get COMMON_SOURCES and DRM_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_CFLAGS := \ + -DHAVE_PIPE_LOADER_DRI \ + -DDROP_PIPE_LOADER_MISC \ + -DGALLIUM_STATIC_TARGETS + +LOCAL_SRC_FILES := $(COMMON_SOURCES) + +LOCAL_MODULE := libmesa_pipe_loader + +ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),) +LOCAL_CFLAGS += -DHAVE_LIBDRM +LOCAL_SRC_FILES += $(DRM_SOURCES) + +LOCAL_SHARED_LIBRARIES := libdrm +LOCAL_STATIC_LIBRARIES := libmesa_loader +endif + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am index 8c837996539..8039a957b1b 100644 --- a/src/gallium/auxiliary/pipe-loader/Makefile.am +++ b/src/gallium/auxiliary/pipe-loader/Makefile.am @@ -9,20 +9,40 @@ AM_CFLAGS = \ $(GALLIUM_CFLAGS) \ $(VISIBILITY_CFLAGS) -noinst_LTLIBRARIES = libpipe_loader.la +noinst_LTLIBRARIES = \ + libpipe_loader_static.la \ + libpipe_loader_dynamic.la -libpipe_loader_la_SOURCES = \ +libpipe_loader_static_la_CFLAGS = \ + $(AM_CFLAGS) \ + -DGALLIUM_STATIC_TARGETS=1 + +libpipe_loader_dynamic_la_CFLAGS = \ + $(AM_CFLAGS) \ + -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" + +libpipe_loader_static_la_SOURCES = \ $(COMMON_SOURCES) -if HAVE_DRM_LOADER_GALLIUM +libpipe_loader_dynamic_la_SOURCES = \ + $(COMMON_SOURCES) + +if HAVE_LIBDRM AM_CFLAGS += \ $(LIBDRM_CFLAGS) -libpipe_loader_la_SOURCES += \ +libpipe_loader_static_la_SOURCES += \ $(DRM_SOURCES) -libpipe_loader_la_LIBADD = \ - $(top_builddir)/src/loader/libloader.la +libpipe_loader_dynamic_la_SOURCES += \ + $(DRM_SOURCES) endif +libpipe_loader_static_la_LIBADD = \ + $(top_builddir)/src/loader/libloader.la + +libpipe_loader_dynamic_la_LIBADD = \ + $(top_builddir)/src/loader/libloader.la + +EXTRA_DIST = SConscript diff --git a/src/gallium/auxiliary/pipe-loader/SConscript b/src/gallium/auxiliary/pipe-loader/SConscript new file mode 100644 index 00000000000..c611fb892f8 --- /dev/null +++ b/src/gallium/auxiliary/pipe-loader/SConscript @@ -0,0 +1,33 @@ +Import('*') + +env = env.Clone() + +env.MSVC2008Compat() + +env.Append(CPPPATH = [ + '#/src/loader', + '#/src/gallium/winsys', +]) + +env.Append(CPPDEFINES = [ + ('HAVE_PIPE_LOADER_DRI', '1'), + ('DROP_PIPE_LOADER_MISC', '1'), + ('GALLIUM_STATIC_TARGETS', '1'), +]) + +source = env.ParseSourceList('Makefile.sources', 'COMMON_SOURCES') + +if env['HAVE_DRM']: + source += env.ParseSourceList('Makefile.sources', 'DRM_SOURCES') + + env.PkgUseModules('DRM') + env.Append(LIBS = [libloader]) + +pipe_loader = env.ConvenienceLibrary( + target = 'pipe_loader', + source = source, +) + +env.Alias('pipe_loader', pipe_loader) + +Export('pipe_loader') diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/src/gallium/auxiliary/pipe-loader/pipe_loader.c index 8e79f853b0a..aef996c4617 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c @@ -32,10 +32,15 @@ #include "util/u_string.h" #include "util/u_dl.h" +#ifdef _MSC_VER +#include <stdlib.h> +#define PATH_MAX _MAX_PATH +#endif + #define MODULE_PREFIX "pipe_" static int (*backends[])(struct pipe_loader_device **, int) = { -#ifdef HAVE_PIPE_LOADER_DRM +#ifdef HAVE_LIBDRM &pipe_loader_drm_probe, #endif &pipe_loader_sw_probe @@ -69,10 +74,9 @@ pipe_loader_configuration(struct pipe_loader_device *dev, } struct pipe_screen * -pipe_loader_create_screen(struct pipe_loader_device *dev, - const char *library_paths) +pipe_loader_create_screen(struct pipe_loader_device *dev) { - return dev->ops->create_screen(dev, library_paths); + return dev->ops->create_screen(dev); } struct util_dl_library * diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h index 9b8712666bb..690d088ed82 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h @@ -82,13 +82,9 @@ pipe_loader_probe(struct pipe_loader_device **devs, int ndev); * Create a pipe_screen for the specified device. * * \param dev Device the screen will be created for. - * \param library_paths Colon-separated list of filesystem paths that - * will be used to look for the pipe driver - * module that handles this device. */ struct pipe_screen * -pipe_loader_create_screen(struct pipe_loader_device *dev, - const char *library_paths); +pipe_loader_create_screen(struct pipe_loader_device *dev); /** * Query the configuration parameters for the specified device. @@ -112,8 +108,6 @@ pipe_loader_configuration(struct pipe_loader_device *dev, void pipe_loader_release(struct pipe_loader_device **devs, int ndev); -#ifdef HAVE_PIPE_LOADER_DRI - /** * Initialize sw dri device give the drisw_loader_funcs. * @@ -125,7 +119,15 @@ bool pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf); -#endif +/** + * Initialize a kms backed sw device given an fd. + * + * This function is platform-specific. + * + * \sa pipe_loader_probe + */ +bool +pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd); /** * Initialize a null sw device. @@ -158,8 +160,6 @@ boolean pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev, struct pipe_screen *screen); -#ifdef HAVE_PIPE_LOADER_DRM - /** * Get a list of known DRM devices. * @@ -180,8 +180,6 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev); bool pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd); -#endif - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c index 1799df7e4c5..994a284385c 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c @@ -36,6 +36,7 @@ #include <unistd.h> #include "loader.h" +#include "target-helpers/drm_helper_public.h" #include "state_tracker/drm_driver.h" #include "pipe_loader_priv.h" @@ -50,13 +51,123 @@ struct pipe_loader_drm_device { struct pipe_loader_device base; + const struct drm_driver_descriptor *dd; +#ifndef GALLIUM_STATIC_TARGETS struct util_dl_library *lib; +#endif int fd; }; #define pipe_loader_drm_device(dev) ((struct pipe_loader_drm_device *)dev) -static struct pipe_loader_ops pipe_loader_drm_ops; +static const struct pipe_loader_ops pipe_loader_drm_ops; + +#ifdef GALLIUM_STATIC_TARGETS +static const struct drm_conf_ret throttle_ret = { + DRM_CONF_INT, + {2}, +}; + +static const struct drm_conf_ret share_fd_ret = { + DRM_CONF_BOOL, + {true}, +}; + +static inline const struct drm_conf_ret * +configuration_query(enum drm_conf conf) +{ + switch (conf) { + case DRM_CONF_THROTTLE: + return &throttle_ret; + case DRM_CONF_SHARE_FD: + return &share_fd_ret; + default: + break; + } + return NULL; +} + +static const struct drm_driver_descriptor driver_descriptors[] = { + { + .name = "i915", + .driver_name = "i915", + .create_screen = pipe_i915_create_screen, + .configuration = configuration_query, + }, +#ifdef USE_VC4_SIMULATOR + /* VC4 simulator and ILO (i965) are mutually exclusive (error at + * configure). As the latter is unconditionally added, keep this one above + * it. + */ + { + .name = "i965", + .driver_name = "vc4", + .create_screen = pipe_vc4_create_screen, + .configuration = configuration_query, + }, +#endif + { + .name = "i965", + .driver_name = "i915", + .create_screen = pipe_ilo_create_screen, + .configuration = configuration_query, + }, + { + .name = "nouveau", + .driver_name = "nouveau", + .create_screen = pipe_nouveau_create_screen, + .configuration = configuration_query, + }, + { + .name = "r300", + .driver_name = "radeon", + .create_screen = pipe_r300_create_screen, + .configuration = configuration_query, + }, + { + .name = "r600", + .driver_name = "radeon", + .create_screen = pipe_r600_create_screen, + .configuration = configuration_query, + }, + { + .name = "radeonsi", + .driver_name = "radeon", + .create_screen = pipe_radeonsi_create_screen, + .configuration = configuration_query, + }, + { + .name = "vmwgfx", + .driver_name = "vmwgfx", + .create_screen = pipe_vmwgfx_create_screen, + .configuration = configuration_query, + }, + { + .name = "kgsl", + .driver_name = "freedreno", + .create_screen = pipe_freedreno_create_screen, + .configuration = configuration_query, + }, + { + .name = "msm", + .driver_name = "freedreno", + .create_screen = pipe_freedreno_create_screen, + .configuration = configuration_query, + }, + { + .name = "virtio_gpu", + .driver_name = "virtio-gpu", + .create_screen = pipe_virgl_create_screen, + .configuration = configuration_query, + }, + { + .name = "vc4", + .driver_name = "vc4", + .create_screen = pipe_vc4_create_screen, + .configuration = configuration_query, + }, +}; +#endif bool pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd) @@ -81,10 +192,36 @@ pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd) if (!ddev->base.driver_name) goto fail; +#ifdef GALLIUM_STATIC_TARGETS + for (int i = 0; i < ARRAY_SIZE(driver_descriptors); i++) { + if (strcmp(driver_descriptors[i].name, ddev->base.driver_name) == 0) { + ddev->dd = &driver_descriptors[i]; + break; + } + } + if (!ddev->dd) + goto fail; +#else + ddev->lib = pipe_loader_find_module(&ddev->base, PIPE_SEARCH_DIR); + if (!ddev->lib) + goto fail; + + ddev->dd = (const struct drm_driver_descriptor *) + util_dl_get_proc_address(ddev->lib, "driver_descriptor"); + + /* sanity check on the name */ + if (!ddev->dd || strcmp(ddev->dd->name, ddev->base.driver_name) != 0) + goto fail; +#endif + *dev = &ddev->base; return true; fail: +#ifndef GALLIUM_STATIC_TARGETS + if (ddev->lib) + util_dl_close(ddev->lib); +#endif FREE(ddev); return false; } @@ -105,8 +242,9 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev) for (i = DRM_RENDER_NODE_MIN_MINOR, j = 0; i <= DRM_RENDER_NODE_MAX_MINOR; i++) { - fd = open_drm_render_node_minor(i); struct pipe_loader_device *dev; + + fd = open_drm_render_node_minor(i); if (fd < 0) continue; @@ -132,8 +270,10 @@ pipe_loader_drm_release(struct pipe_loader_device **dev) { struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(*dev); +#ifndef GALLIUM_STATIC_TARGETS if (ddev->lib) util_dl_close(ddev->lib); +#endif close(ddev->fd); FREE(ddev->base.driver_name); @@ -146,47 +286,22 @@ pipe_loader_drm_configuration(struct pipe_loader_device *dev, enum drm_conf conf) { struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev); - const struct drm_driver_descriptor *dd; - - if (!ddev->lib) - return NULL; - - dd = (const struct drm_driver_descriptor *) - util_dl_get_proc_address(ddev->lib, "driver_descriptor"); - /* sanity check on the name */ - if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0) + if (!ddev->dd->configuration) return NULL; - if (!dd->configuration) - return NULL; - - return dd->configuration(conf); + return ddev->dd->configuration(conf); } static struct pipe_screen * -pipe_loader_drm_create_screen(struct pipe_loader_device *dev, - const char *library_paths) +pipe_loader_drm_create_screen(struct pipe_loader_device *dev) { struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev); - const struct drm_driver_descriptor *dd; - - if (!ddev->lib) - ddev->lib = pipe_loader_find_module(dev, library_paths); - if (!ddev->lib) - return NULL; - - dd = (const struct drm_driver_descriptor *) - util_dl_get_proc_address(ddev->lib, "driver_descriptor"); - - /* sanity check on the name */ - if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0) - return NULL; - return dd->create_screen(ddev->fd); + return ddev->dd->create_screen(ddev->fd); } -static struct pipe_loader_ops pipe_loader_drm_ops = { +static const struct pipe_loader_ops pipe_loader_drm_ops = { .create_screen = pipe_loader_drm_create_screen, .configuration = pipe_loader_drm_configuration, .release = pipe_loader_drm_release diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h index d3b025221c5..da2ca8c6e1f 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h @@ -31,8 +31,7 @@ #include "pipe_loader.h" struct pipe_loader_ops { - struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev, - const char *library_paths); + struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev); const struct drm_conf_ret *(*configuration)(struct pipe_loader_device *dev, enum drm_conf conf); diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c index 6794930193d..5539a730b4c 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c @@ -30,45 +30,160 @@ #include "util/u_memory.h" #include "util/u_dl.h" #include "sw/dri/dri_sw_winsys.h" +#include "sw/kms-dri/kms_dri_sw_winsys.h" #include "sw/null/null_sw_winsys.h" #include "sw/wrapper/wrapper_sw_winsys.h" #include "target-helpers/inline_sw_helper.h" #include "state_tracker/drisw_api.h" +#include "state_tracker/sw_driver.h" struct pipe_loader_sw_device { struct pipe_loader_device base; + const struct sw_driver_descriptor *dd; +#ifndef GALLIUM_STATIC_TARGETS struct util_dl_library *lib; +#endif struct sw_winsys *ws; }; #define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev) -static struct pipe_loader_ops pipe_loader_sw_ops; +static const struct pipe_loader_ops pipe_loader_sw_ops; -static struct sw_winsys *(*backends[])() = { - null_sw_create +#ifdef GALLIUM_STATIC_TARGETS +static const struct sw_driver_descriptor driver_descriptors = { + .create_screen = sw_screen_create, + .winsys = { +#ifdef HAVE_PIPE_LOADER_DRI + { + .name = "dri", + .create_winsys = dri_create_sw_winsys, + }, +#endif +#ifdef HAVE_PIPE_LOADER_KMS + { + .name = "kms_dri", + .create_winsys = kms_dri_create_winsys, + }, +#endif +/** + * XXX: Do not include these two for non autotools builds. + * They don't have neither opencl nor nine, where these are used. + */ +#ifndef DROP_PIPE_LOADER_MISC + { + .name = "null", + .create_winsys = null_sw_create, + }, + { + .name = "wrapped", + .create_winsys = wrapper_sw_winsys_wrap_pipe_screen, + }, +#endif + { 0 }, + } }; +#endif + +static bool +pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev) +{ + sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; + sdev->base.driver_name = "swrast"; + sdev->base.ops = &pipe_loader_sw_ops; + +#ifdef GALLIUM_STATIC_TARGETS + sdev->dd = &driver_descriptors; + if (!sdev->dd) + return false; +#else + sdev->lib = pipe_loader_find_module(&sdev->base, PIPE_SEARCH_DIR); + if (!sdev->lib) + return false; + + sdev->dd = (const struct sw_driver_descriptor *) + util_dl_get_proc_address(sdev->lib, "swrast_driver_descriptor"); + + if (!sdev->dd){ + util_dl_close(sdev->lib); + sdev->lib = NULL; + return false; + } +#endif + + return true; +} + +static void +pipe_loader_sw_probe_teardown_common(struct pipe_loader_sw_device *sdev) +{ +#ifndef GALLIUM_STATIC_TARGETS + if (sdev->lib) + util_dl_close(sdev->lib); +#endif +} #ifdef HAVE_PIPE_LOADER_DRI bool pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf) { struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); + int i; if (!sdev) return false; - sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; - sdev->base.driver_name = "swrast"; - sdev->base.ops = &pipe_loader_sw_ops; - sdev->ws = dri_create_sw_winsys(drisw_lf); - if (!sdev->ws) { - FREE(sdev); - return false; + if (!pipe_loader_sw_probe_init_common(sdev)) + goto fail; + + for (i = 0; sdev->dd->winsys; i++) { + if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) { + sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf); + break; + } } + if (!sdev->ws) + goto fail; + *devs = &sdev->base; + return true; + +fail: + pipe_loader_sw_probe_teardown_common(sdev); + FREE(sdev); + return false; +} +#endif + +#ifdef HAVE_PIPE_LOADER_KMS +bool +pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd) +{ + struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); + int i; + if (!sdev) + return false; + + if (!pipe_loader_sw_probe_init_common(sdev)) + goto fail; + + for (i = 0; sdev->dd->winsys; i++) { + if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) { + sdev->ws = sdev->dd->winsys[i].create_winsys(fd); + break; + } + } + if (!sdev->ws) + goto fail; + + *devs = &sdev->base; return true; + +fail: + pipe_loader_sw_probe_teardown_common(sdev); + FREE(sdev); + return false; } #endif @@ -76,38 +191,40 @@ bool pipe_loader_sw_probe_null(struct pipe_loader_device **devs) { struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); + int i; if (!sdev) return false; - sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; - sdev->base.driver_name = "swrast"; - sdev->base.ops = &pipe_loader_sw_ops; - sdev->ws = null_sw_create(); - if (!sdev->ws) { - FREE(sdev); - return false; + if (!pipe_loader_sw_probe_init_common(sdev)) + goto fail; + + for (i = 0; sdev->dd->winsys; i++) { + if (strcmp(sdev->dd->winsys[i].name, "null") == 0) { + sdev->ws = sdev->dd->winsys[i].create_winsys(); + break; + } } - *devs = &sdev->base; + if (!sdev->ws) + goto fail; + *devs = &sdev->base; return true; + +fail: + pipe_loader_sw_probe_teardown_common(sdev); + FREE(sdev); + return false; } int pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev) { - int i; - - for (i = 0; i < Elements(backends); i++) { - if (i < ndev) { - struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); - /* TODO: handle CALLOC_STRUCT failure */ + int i = 1; - sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; - sdev->base.driver_name = "swrast"; - sdev->base.ops = &pipe_loader_sw_ops; - sdev->ws = backends[i](); - devs[i] = &sdev->base; + if (i < ndev) { + if (!pipe_loader_sw_probe_null(devs)) { + i--; } } @@ -119,21 +236,30 @@ pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev, struct pipe_screen *screen) { struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); + int i; if (!sdev) return false; - sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; - sdev->base.driver_name = "swrast"; - sdev->base.ops = &pipe_loader_sw_ops; - sdev->ws = wrapper_sw_winsys_wrap_pipe_screen(screen); + if (!pipe_loader_sw_probe_init_common(sdev)) + goto fail; - if (!sdev->ws) { - FREE(sdev); - return false; + for (i = 0; sdev->dd->winsys; i++) { + if (strcmp(sdev->dd->winsys[i].name, "wrapped") == 0) { + sdev->ws = sdev->dd->winsys[i].create_winsys(screen); + break; + } } + if (!sdev->ws) + goto fail; + *dev = &sdev->base; return true; + +fail: + pipe_loader_sw_probe_teardown_common(sdev); + FREE(sdev); + return false; } static void @@ -141,8 +267,10 @@ pipe_loader_sw_release(struct pipe_loader_device **dev) { struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev); +#ifndef GALLIUM_STATIC_TARGETS if (sdev->lib) util_dl_close(sdev->lib); +#endif FREE(sdev); *dev = NULL; @@ -156,28 +284,19 @@ pipe_loader_sw_configuration(struct pipe_loader_device *dev, } static struct pipe_screen * -pipe_loader_sw_create_screen(struct pipe_loader_device *dev, - const char *library_paths) +pipe_loader_sw_create_screen(struct pipe_loader_device *dev) { struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev); - struct pipe_screen *(*init)(struct sw_winsys *); + struct pipe_screen *screen; - if (!sdev->lib) - sdev->lib = pipe_loader_find_module(dev, library_paths); - if (!sdev->lib) - return NULL; - - init = (void *)util_dl_get_proc_address(sdev->lib, "swrast_create_screen"); - if (!init){ - util_dl_close(sdev->lib); - sdev->lib = NULL; - return NULL; - } + screen = sdev->dd->create_screen(sdev->ws); + if (!screen) + sdev->ws->destroy(sdev->ws); - return init(sdev->ws); + return screen; } -static struct pipe_loader_ops pipe_loader_sw_ops = { +static const struct pipe_loader_ops pipe_loader_sw_ops = { .create_screen = pipe_loader_sw_create_screen, .configuration = pipe_loader_sw_configuration, .release = pipe_loader_sw_release diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h new file mode 100644 index 00000000000..332b1cba984 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/drm_helper.h @@ -0,0 +1,275 @@ +#ifndef DRM_HELPER_H +#define DRM_HELPER_H + +#include <stdio.h> +#include "target-helpers/inline_debug_helper.h" +#include "target-helpers/drm_helper_public.h" + +#ifdef GALLIUM_I915 +#include "i915/drm/i915_drm_public.h" +#include "i915/i915_public.h" + +struct pipe_screen * +pipe_i915_create_screen(int fd) +{ + struct i915_winsys *iws; + struct pipe_screen *screen; + + iws = i915_drm_winsys_create(fd); + if (!iws) + return NULL; + + screen = i915_screen_create(iws); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_i915_create_screen(int fd) +{ + fprintf(stderr, "i915g: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_ILO +#include "intel/drm/intel_drm_public.h" +#include "ilo/ilo_public.h" + +struct pipe_screen * +pipe_ilo_create_screen(int fd) +{ + struct intel_winsys *iws; + struct pipe_screen *screen; + + iws = intel_winsys_create_for_fd(fd); + if (!iws) + return NULL; + + screen = ilo_screen_create(iws); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_ilo_create_screen(int fd) +{ + fprintf(stderr, "ilo: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_NOUVEAU +#include "nouveau/drm/nouveau_drm_public.h" + +struct pipe_screen * +pipe_nouveau_create_screen(int fd) +{ + struct pipe_screen *screen; + + screen = nouveau_drm_screen_create(fd); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_nouveau_create_screen(int fd) +{ + fprintf(stderr, "nouveau: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_R300 +#include "radeon/radeon_winsys.h" +#include "radeon/drm/radeon_drm_public.h" +#include "r300/r300_public.h" + +struct pipe_screen * +pipe_r300_create_screen(int fd) +{ + struct radeon_winsys *rw; + + rw = radeon_drm_winsys_create(fd, r300_screen_create); + return rw ? debug_screen_wrap(rw->screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_r300_create_screen(int fd) +{ + fprintf(stderr, "r300: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_R600 +#include "radeon/radeon_winsys.h" +#include "radeon/drm/radeon_drm_public.h" +#include "r600/r600_public.h" + +struct pipe_screen * +pipe_r600_create_screen(int fd) +{ + struct radeon_winsys *rw; + + rw = radeon_drm_winsys_create(fd, r600_screen_create); + return rw ? debug_screen_wrap(rw->screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_r600_create_screen(int fd) +{ + fprintf(stderr, "r600: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_RADEONSI +#include "radeon/radeon_winsys.h" +#include "radeon/drm/radeon_drm_public.h" +#include "amdgpu/drm/amdgpu_public.h" +#include "radeonsi/si_public.h" + +struct pipe_screen * +pipe_radeonsi_create_screen(int fd) +{ + struct radeon_winsys *rw; + + /* First, try amdgpu. */ + rw = amdgpu_winsys_create(fd, radeonsi_screen_create); + + if (!rw) + rw = radeon_drm_winsys_create(fd, radeonsi_screen_create); + + return rw ? debug_screen_wrap(rw->screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_radeonsi_create_screen(int fd) +{ + fprintf(stderr, "radeonsi: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_VMWGFX +#include "svga/drm/svga_drm_public.h" +#include "svga/svga_public.h" + +struct pipe_screen * +pipe_vmwgfx_create_screen(int fd) +{ + struct svga_winsys_screen *sws; + struct pipe_screen *screen; + + sws = svga_drm_winsys_screen_create(fd); + if (!sws) + return NULL; + + screen = svga_screen_create(sws); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_vmwgfx_create_screen(int fd) +{ + fprintf(stderr, "svga: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_FREEDRENO +#include "freedreno/drm/freedreno_drm_public.h" + +struct pipe_screen * +pipe_freedreno_create_screen(int fd) +{ + struct pipe_screen *screen; + + screen = fd_drm_screen_create(fd); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_freedreno_create_screen(int fd) +{ + fprintf(stderr, "freedreno: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_VIRGL +#include "virgl/drm/virgl_drm_public.h" +#include "virgl/virgl_public.h" + +struct pipe_screen * +pipe_virgl_create_screen(int fd) +{ + struct virgl_winsys *vws; + struct pipe_screen *screen; + + vws = virgl_drm_winsys_create(fd); + if (!vws) + return NULL; + + screen = virgl_create_screen(vws); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_virgl_create_screen(int fd) +{ + fprintf(stderr, "virgl: driver missing\n"); + return NULL; +} + +#endif + +#ifdef GALLIUM_VC4 +#include "vc4/drm/vc4_drm_public.h" + +struct pipe_screen * +pipe_vc4_create_screen(int fd) +{ + struct pipe_screen *screen; + + screen = vc4_drm_screen_create(fd); + return screen ? debug_screen_wrap(screen) : NULL; +} + +#else + +struct pipe_screen * +pipe_vc4_create_screen(int fd) +{ + fprintf(stderr, "vc4: driver missing\n"); + return NULL; +} + +#endif + + +#endif /* DRM_HELPER_H */ diff --git a/src/gallium/auxiliary/target-helpers/drm_helper_public.h b/src/gallium/auxiliary/target-helpers/drm_helper_public.h new file mode 100644 index 00000000000..d1f9382a6f9 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h @@ -0,0 +1,37 @@ +#ifndef _DRM_HELPER_PUBLIC_H +#define _DRM_HELPER_PUBLIC_H + + +struct pipe_screen; + +struct pipe_screen * +pipe_i915_create_screen(int fd); + +struct pipe_screen * +pipe_ilo_create_screen(int fd); + +struct pipe_screen * +pipe_nouveau_create_screen(int fd); + +struct pipe_screen * +pipe_r300_create_screen(int fd); + +struct pipe_screen * +pipe_r600_create_screen(int fd); + +struct pipe_screen * +pipe_radeonsi_create_screen(int fd); + +struct pipe_screen * +pipe_vmwgfx_create_screen(int fd); + +struct pipe_screen * +pipe_freedreno_create_screen(int fd); + +struct pipe_screen * +pipe_virgl_create_screen(int fd); + +struct pipe_screen * +pipe_vc4_create_screen(int fd); + +#endif /* _DRM_HELPER_PUBLIC_H */ diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h deleted file mode 100644 index 6ca4dc8136c..00000000000 --- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h +++ /dev/null @@ -1,531 +0,0 @@ -#ifndef INLINE_DRM_HELPER_H -#define INLINE_DRM_HELPER_H - -#include "state_tracker/drm_driver.h" -#include "target-helpers/inline_debug_helper.h" -#include "loader.h" -#if defined(DRI_TARGET) -#include "dri_screen.h" -#endif - -#if GALLIUM_SOFTPIPE -#include "target-helpers/inline_sw_helper.h" -#include "sw/kms-dri/kms_dri_sw_winsys.h" -#endif - -#if GALLIUM_I915 -#include "i915/drm/i915_drm_public.h" -#include "i915/i915_public.h" -#endif - -#if GALLIUM_ILO -#include "intel/drm/intel_drm_public.h" -#include "ilo/ilo_public.h" -#endif - -#if GALLIUM_NOUVEAU -#include "nouveau/drm/nouveau_drm_public.h" -#endif - -#if GALLIUM_R300 -#include "radeon/radeon_winsys.h" -#include "radeon/drm/radeon_drm_public.h" -#include "r300/r300_public.h" -#endif - -#if GALLIUM_R600 -#include "radeon/radeon_winsys.h" -#include "radeon/drm/radeon_drm_public.h" -#include "r600/r600_public.h" -#endif - -#if GALLIUM_RADEONSI -#include "radeon/radeon_winsys.h" -#include "radeon/drm/radeon_drm_public.h" -#include "amdgpu/drm/amdgpu_public.h" -#include "radeonsi/si_public.h" -#endif - -#if GALLIUM_VMWGFX -#include "svga/drm/svga_drm_public.h" -#include "svga/svga_public.h" -#endif - -#if GALLIUM_FREEDRENO -#include "freedreno/drm/freedreno_drm_public.h" -#endif - -#if GALLIUM_VC4 -#include "vc4/drm/vc4_drm_public.h" -#endif - -#if GALLIUM_VIRGL -#include "virgl/drm/virgl_drm_public.h" -#include "virgl/virgl_public.h" -#endif - -static char* driver_name = NULL; - -/* XXX: We need to teardown the winsys if *screen_create() fails. */ - -#if defined(GALLIUM_SOFTPIPE) -#if defined(DRI_TARGET) -#if defined(HAVE_LIBDRM) - -const __DRIextension **__driDriverGetExtensions_kms_swrast(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_kms_swrast(void) -{ - globalDriverAPI = &dri_kms_driver_api; - return galliumdrm_driver_extensions; -} - -struct pipe_screen * -kms_swrast_create_screen(int fd) -{ - struct sw_winsys *sws; - struct pipe_screen *screen; - - sws = kms_dri_create_winsys(fd); - if (!sws) - return NULL; - - screen = sw_screen_create(sws); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif -#endif -#endif - -#if defined(GALLIUM_I915) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_i915(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_i915(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_i915_create_screen(int fd) -{ - struct i915_winsys *iws; - struct pipe_screen *screen; - - iws = i915_drm_winsys_create(fd); - if (!iws) - return NULL; - - screen = i915_screen_create(iws); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_ILO) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_i965(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_ilo_create_screen(int fd) -{ - struct intel_winsys *iws; - struct pipe_screen *screen; - - iws = intel_winsys_create_for_fd(fd); - if (!iws) - return NULL; - - screen = ilo_screen_create(iws); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_NOUVEAU) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_nouveau(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_nouveau(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_nouveau_create_screen(int fd) -{ - struct pipe_screen *screen; - - screen = nouveau_drm_screen_create(fd); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_R300) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_r300(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_r300(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_r300_create_screen(int fd) -{ - struct radeon_winsys *rw; - - rw = radeon_drm_winsys_create(fd, r300_screen_create); - return rw ? debug_screen_wrap(rw->screen) : NULL; -} -#endif - -#if defined(GALLIUM_R600) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_r600(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_r600(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_r600_create_screen(int fd) -{ - struct radeon_winsys *rw; - - rw = radeon_drm_winsys_create(fd, r600_screen_create); - return rw ? debug_screen_wrap(rw->screen) : NULL; -} -#endif - -#if defined(GALLIUM_RADEONSI) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_radeonsi(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_radeonsi(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_radeonsi_create_screen(int fd) -{ - struct radeon_winsys *rw; - - /* First, try amdgpu. */ - rw = amdgpu_winsys_create(fd, radeonsi_screen_create); - - if (!rw) - rw = radeon_drm_winsys_create(fd, radeonsi_screen_create); - - return rw ? debug_screen_wrap(rw->screen) : NULL; -} -#endif - -#if defined(GALLIUM_VMWGFX) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_vmwgfx(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_vmwgfx(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_vmwgfx_create_screen(int fd) -{ - struct svga_winsys_screen *sws; - struct pipe_screen *screen; - - sws = svga_drm_winsys_screen_create(fd); - if (!sws) - return NULL; - - screen = svga_screen_create(sws); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_FREEDRENO) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_msm(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_msm(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} - -const __DRIextension **__driDriverGetExtensions_kgsl(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_kgsl(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_freedreno_create_screen(int fd) -{ - struct pipe_screen *screen; - - screen = fd_drm_screen_create(fd); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_VIRGL) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_virtio_gpu(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -static struct pipe_screen * -pipe_virgl_create_screen(int fd) -{ - struct virgl_winsys *vws; - struct pipe_screen *screen; - - vws = virgl_drm_winsys_create(fd); - if (!vws) - return NULL; - - screen = virgl_create_screen(vws); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -#if defined(GALLIUM_VC4) -#if defined(DRI_TARGET) - -const __DRIextension **__driDriverGetExtensions_vc4(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_vc4(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} - -#if defined(USE_VC4_SIMULATOR) -const __DRIextension **__driDriverGetExtensions_i965(void); - -/** - * When building using the simulator (on x86), we advertise ourselves as the - * i965 driver so that you can just make a directory with a link from - * i965_dri.so to the built vc4_dri.so, and point LIBGL_DRIVERS_PATH to that - * on your i965-using host to run the driver under simulation. - * - * This is, of course, incompatible with building with the ilo driver, but you - * shouldn't be building that anyway. - */ -PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void) -{ - globalDriverAPI = &galliumdrm_driver_api; - return galliumdrm_driver_extensions; -} -#endif - -#endif - -static struct pipe_screen * -pipe_vc4_create_screen(int fd) -{ - struct pipe_screen *screen; - - screen = vc4_drm_screen_create(fd); - return screen ? debug_screen_wrap(screen) : NULL; -} -#endif - -inline struct pipe_screen * -dd_create_screen(int fd) -{ - driver_name = loader_get_driver_for_fd(fd, _LOADER_GALLIUM); - if (!driver_name) - return NULL; - -#if defined(GALLIUM_I915) - if (strcmp(driver_name, "i915") == 0) - return pipe_i915_create_screen(fd); - else -#endif -#if defined(GALLIUM_ILO) - if (strcmp(driver_name, "i965") == 0) - return pipe_ilo_create_screen(fd); - else -#endif -#if defined(GALLIUM_NOUVEAU) - if (strcmp(driver_name, "nouveau") == 0) - return pipe_nouveau_create_screen(fd); - else -#endif -#if defined(GALLIUM_R300) - if (strcmp(driver_name, "r300") == 0) - return pipe_r300_create_screen(fd); - else -#endif -#if defined(GALLIUM_R600) - if (strcmp(driver_name, "r600") == 0) - return pipe_r600_create_screen(fd); - else -#endif -#if defined(GALLIUM_RADEONSI) - if (strcmp(driver_name, "radeonsi") == 0) - return pipe_radeonsi_create_screen(fd); - else -#endif -#if defined(GALLIUM_VMWGFX) - if (strcmp(driver_name, "vmwgfx") == 0) - return pipe_vmwgfx_create_screen(fd); - else -#endif -#if defined(GALLIUM_FREEDRENO) - if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0)) - return pipe_freedreno_create_screen(fd); - else -#endif -#if defined(GALLIUM_VIRGL) - if ((strcmp(driver_name, "virtio_gpu") == 0)) - return pipe_virgl_create_screen(fd); - else -#endif -#if defined(GALLIUM_VC4) - if (strcmp(driver_name, "vc4") == 0) - return pipe_vc4_create_screen(fd); - else -#if defined(USE_VC4_SIMULATOR) - if (strcmp(driver_name, "i965") == 0) - return pipe_vc4_create_screen(fd); - else -#endif -#endif - return NULL; -} - -inline const char * -dd_driver_name(void) -{ - return driver_name; -} - -static const struct drm_conf_ret throttle_ret = { - DRM_CONF_INT, - {2}, -}; - -static const struct drm_conf_ret share_fd_ret = { - DRM_CONF_BOOL, - {true}, -}; - -static inline const struct drm_conf_ret * -configuration_query(enum drm_conf conf) -{ - switch (conf) { - case DRM_CONF_THROTTLE: - return &throttle_ret; - case DRM_CONF_SHARE_FD: - return &share_fd_ret; - default: - break; - } - return NULL; -} - -inline const struct drm_conf_ret * -dd_configuration(enum drm_conf conf) -{ - if (!driver_name) - return NULL; - -#if defined(GALLIUM_I915) - if (strcmp(driver_name, "i915") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_ILO) - if (strcmp(driver_name, "i965") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_NOUVEAU) - if (strcmp(driver_name, "nouveau") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_R300) - if (strcmp(driver_name, "r300") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_R600) - if (strcmp(driver_name, "r600") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_RADEONSI) - if (strcmp(driver_name, "radeonsi") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_VMWGFX) - if (strcmp(driver_name, "vmwgfx") == 0) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_FREEDRENO) - if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0)) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_VIRGL) - if ((strcmp(driver_name, "virtio_gpu") == 0)) - return configuration_query(conf); - else -#endif -#if defined(GALLIUM_VC4) - if (strcmp(driver_name, "vc4") == 0) - return configuration_query(conf); - else -#if defined(USE_VC4_SIMULATOR) - if (strcmp(driver_name, "i965") == 0) - return configuration_query(conf); - else -#endif -#endif - return NULL; -} -#endif /* INLINE_DRM_HELPER_H */ diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index f3693fb1f39..a9ab16f2b54 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -69,69 +69,4 @@ sw_screen_create(struct sw_winsys *winsys) return sw_screen_create_named(winsys, driver); } -#if defined(GALLIUM_SOFTPIPE) -#if defined(DRI_TARGET) -#include "target-helpers/inline_debug_helper.h" -#include "sw/dri/dri_sw_winsys.h" -#include "dri_screen.h" - -const __DRIextension **__driDriverGetExtensions_swrast(void); - -PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void) -{ - globalDriverAPI = &galliumsw_driver_api; - return galliumsw_driver_extensions; -} - -inline struct pipe_screen * -drisw_create_screen(struct drisw_loader_funcs *lf) -{ - struct sw_winsys *winsys = NULL; - struct pipe_screen *screen = NULL; - - winsys = dri_create_sw_winsys(lf); - if (winsys == NULL) - return NULL; - - screen = sw_screen_create(winsys); - if (screen == NULL) { - winsys->destroy(winsys); - return NULL; - } - - screen = debug_screen_wrap(screen); - return screen; -} -#endif // DRI_TARGET - -#if defined(NINE_TARGET) -#include "sw/wrapper/wrapper_sw_winsys.h" -#include "target-helpers/inline_debug_helper.h" - -extern struct pipe_screen *ninesw_create_screen(struct pipe_screen *screen); - -inline struct pipe_screen * -ninesw_create_screen(struct pipe_screen *pscreen) -{ - struct sw_winsys *winsys = NULL; - struct pipe_screen *screen = NULL; - - winsys = wrapper_sw_winsys_wrap_pipe_screen(pscreen); - if (winsys == NULL) - return NULL; - - screen = sw_screen_create(winsys); - if (screen == NULL) { - winsys->destroy(winsys); - return NULL; - } - - screen = debug_screen_wrap(screen); - return screen; -} -#endif // NINE_TARGET - -#endif // GALLIUM_SOFTPIPE - - #endif diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c index aca435d6cad..9b97d8dc4b9 100644 --- a/src/gallium/auxiliary/util/u_dl.c +++ b/src/gallium/auxiliary/util/u_dl.c @@ -45,7 +45,7 @@ struct util_dl_library * util_dl_open(const char *filename) { #if defined(PIPE_OS_UNIX) - return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL); + return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_LOCAL); #elif defined(PIPE_OS_WINDOWS) return (struct util_dl_library *)LoadLibraryA(filename); #else diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index d3b77e6b99b..c26d7331d4c 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -202,6 +202,36 @@ PIPE_FORMAT_BPTC_SRGBA , bptc, 4, 4, x128, , , , xyzw, sr PIPE_FORMAT_BPTC_RGB_FLOAT , bptc, 4, 4, x128, , , , xyz1, rgb PIPE_FORMAT_BPTC_RGB_UFLOAT , bptc, 4, 4, x128, , , , xyz1, rgb +PIPE_FORMAT_ASTC_4x4 , astc, 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_5x4 , astc, 5, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_5x5 , astc, 5, 5, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_6x5 , astc, 6, 5, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_6x6 , astc, 6, 6, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_8x5 , astc, 8, 5, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_8x6 , astc, 8, 6, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_8x8 , astc, 8, 8, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_10x5 , astc,10, 5, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_10x6 , astc,10, 6, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_10x8 , astc,10, 8, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_10x10 , astc,10,10, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_12x10 , astc,12,10, x128, , , , xyzw, rgb +PIPE_FORMAT_ASTC_12x12 , astc,12,12, x128, , , , xyzw, rgb + +PIPE_FORMAT_ASTC_4x4_SRGB , astc, 4, 4, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_5x4_SRGB , astc, 5, 4, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_5x5_SRGB , astc, 5, 5, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_6x5_SRGB , astc, 6, 5, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_6x6_SRGB , astc, 6, 6, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_8x5_SRGB , astc, 8, 5, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_8x6_SRGB , astc, 8, 6, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_8x8_SRGB , astc, 8, 8, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_10x5_SRGB , astc,10, 5, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_10x6_SRGB , astc,10, 6, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_10x8_SRGB , astc,10, 8, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_10x10_SRGB , astc,10,10, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_12x10_SRGB , astc,12,10, x128, , , , xyzw, srgb +PIPE_FORMAT_ASTC_12x12_SRGB , astc,12,12, x128, , , , xyzw, srgb + # Straightforward D3D10-like formats (also used for # vertex buffer element description) # diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index a1b1b28fa41..ffdb864fa83 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -84,9 +84,14 @@ enum util_format_layout { UTIL_FORMAT_LAYOUT_BPTC = 7, /** + * ASTC + */ + UTIL_FORMAT_LAYOUT_ASTC = 8, + + /** * Everything else that doesn't fit in any of the above layouts. */ - UTIL_FORMAT_LAYOUT_OTHER = 8 + UTIL_FORMAT_LAYOUT_OTHER = 9 }; @@ -481,6 +486,7 @@ util_format_is_compressed(enum pipe_format format) case UTIL_FORMAT_LAYOUT_RGTC: case UTIL_FORMAT_LAYOUT_ETC: case UTIL_FORMAT_LAYOUT_BPTC: + case UTIL_FORMAT_LAYOUT_ASTC: /* XXX add other formats in the future */ return TRUE; default: @@ -924,6 +930,35 @@ util_format_srgb(enum pipe_format format) return PIPE_FORMAT_B5G6R5_SRGB; case PIPE_FORMAT_BPTC_RGBA_UNORM: return PIPE_FORMAT_BPTC_SRGBA; + case PIPE_FORMAT_ASTC_4x4: + return PIPE_FORMAT_ASTC_4x4_SRGB; + case PIPE_FORMAT_ASTC_5x4: + return PIPE_FORMAT_ASTC_5x4_SRGB; + case PIPE_FORMAT_ASTC_5x5: + return PIPE_FORMAT_ASTC_5x5_SRGB; + case PIPE_FORMAT_ASTC_6x5: + return PIPE_FORMAT_ASTC_6x5_SRGB; + case PIPE_FORMAT_ASTC_6x6: + return PIPE_FORMAT_ASTC_6x6_SRGB; + case PIPE_FORMAT_ASTC_8x5: + return PIPE_FORMAT_ASTC_8x5_SRGB; + case PIPE_FORMAT_ASTC_8x6: + return PIPE_FORMAT_ASTC_8x6_SRGB; + case PIPE_FORMAT_ASTC_8x8: + return PIPE_FORMAT_ASTC_8x8_SRGB; + case PIPE_FORMAT_ASTC_10x5: + return PIPE_FORMAT_ASTC_10x5_SRGB; + case PIPE_FORMAT_ASTC_10x6: + return PIPE_FORMAT_ASTC_10x6_SRGB; + case PIPE_FORMAT_ASTC_10x8: + return PIPE_FORMAT_ASTC_10x8_SRGB; + case PIPE_FORMAT_ASTC_10x10: + return PIPE_FORMAT_ASTC_10x10_SRGB; + case PIPE_FORMAT_ASTC_12x10: + return PIPE_FORMAT_ASTC_12x10_SRGB; + case PIPE_FORMAT_ASTC_12x12: + return PIPE_FORMAT_ASTC_12x12_SRGB; + default: return PIPE_FORMAT_NONE; } @@ -971,6 +1006,34 @@ util_format_linear(enum pipe_format format) return PIPE_FORMAT_B5G6R5_UNORM; case PIPE_FORMAT_BPTC_SRGBA: return PIPE_FORMAT_BPTC_RGBA_UNORM; + case PIPE_FORMAT_ASTC_4x4_SRGB: + return PIPE_FORMAT_ASTC_4x4; + case PIPE_FORMAT_ASTC_5x4_SRGB: + return PIPE_FORMAT_ASTC_5x4; + case PIPE_FORMAT_ASTC_5x5_SRGB: + return PIPE_FORMAT_ASTC_5x5; + case PIPE_FORMAT_ASTC_6x5_SRGB: + return PIPE_FORMAT_ASTC_6x5; + case PIPE_FORMAT_ASTC_6x6_SRGB: + return PIPE_FORMAT_ASTC_6x6; + case PIPE_FORMAT_ASTC_8x5_SRGB: + return PIPE_FORMAT_ASTC_8x5; + case PIPE_FORMAT_ASTC_8x6_SRGB: + return PIPE_FORMAT_ASTC_8x6; + case PIPE_FORMAT_ASTC_8x8_SRGB: + return PIPE_FORMAT_ASTC_8x8; + case PIPE_FORMAT_ASTC_10x5_SRGB: + return PIPE_FORMAT_ASTC_10x5; + case PIPE_FORMAT_ASTC_10x6_SRGB: + return PIPE_FORMAT_ASTC_10x6; + case PIPE_FORMAT_ASTC_10x8_SRGB: + return PIPE_FORMAT_ASTC_10x8; + case PIPE_FORMAT_ASTC_10x10_SRGB: + return PIPE_FORMAT_ASTC_10x10; + case PIPE_FORMAT_ASTC_12x10_SRGB: + return PIPE_FORMAT_ASTC_12x10; + case PIPE_FORMAT_ASTC_12x12_SRGB: + return PIPE_FORMAT_ASTC_12x12; default: return format; } diff --git a/src/gallium/auxiliary/util/u_format_fake.c b/src/gallium/auxiliary/util/u_format_fake.c deleted file mode 100644 index 77e896d27bd..00000000000 --- a/src/gallium/auxiliary/util/u_format_fake.c +++ /dev/null @@ -1,37 +0,0 @@ -#include "u_format.h" -#include "u_format_fake.h" - -#define fake(format) \ -void \ -util_format_##format##_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) {assert(0);} \ -\ -void \ -util_format_##format##_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \ -\ -void \ -util_format_##format##_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \ -\ -void \ -util_format_##format##_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \ -\ -void \ -util_format_##format##_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \ -\ -void \ -util_format_##format##_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) {assert(0);} - -fake(bptc_rgba_unorm) -fake(bptc_srgba) -fake(bptc_rgb_float) -fake(bptc_rgb_ufloat) - -fake(etc2_rgb8) -fake(etc2_srgb8) -fake(etc2_rgb8a1) -fake(etc2_srgb8a1) -fake(etc2_rgba8) -fake(etc2_srgba8) -fake(etc2_r11_unorm) -fake(etc2_r11_snorm) -fake(etc2_rg11_unorm) -fake(etc2_rg11_snorm) diff --git a/src/gallium/auxiliary/util/u_format_fake.h b/src/gallium/auxiliary/util/u_format_fake.h deleted file mode 100644 index e6bfd4e1594..00000000000 --- a/src/gallium/auxiliary/util/u_format_fake.h +++ /dev/null @@ -1,66 +0,0 @@ -/************************************************************************** - * - * Copyright 2011 Red Hat Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - **************************************************************************/ - -#ifndef U_FORMAT_FAKE_H_ -#define U_FORMAT_FAKE_H_ - -#define __format_fake(format) \ -void \ -util_format_##format##_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); \ -\ -void \ -util_format_##format##_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \ -\ -void \ -util_format_##format##_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \ -\ -void \ -util_format_##format##_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \ -\ -void \ -util_format_##format##_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); \ -\ -void \ -util_format_##format##_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); - -__format_fake(bptc_rgba_unorm) -__format_fake(bptc_srgba) -__format_fake(bptc_rgb_float) -__format_fake(bptc_rgb_ufloat) - -__format_fake(etc2_rgb8) -__format_fake(etc2_srgb8) -__format_fake(etc2_rgb8a1) -__format_fake(etc2_srgb8a1) -__format_fake(etc2_rgba8) -__format_fake(etc2_srgba8) -__format_fake(etc2_r11_unorm) -__format_fake(etc2_r11_snorm) -__format_fake(etc2_rg11_unorm) -__format_fake(etc2_rg11_snorm) - -#endif diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py index fb42de723c4..d4bb1de4cb5 100644 --- a/src/gallium/auxiliary/util/u_format_pack.py +++ b/src/gallium/auxiliary/util/u_format_pack.py @@ -686,7 +686,7 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix): def is_format_hand_written(format): - return format.layout in ('s3tc', 'rgtc', 'etc', 'bptc', 'subsampled', 'other') or format.colorspace == ZS + return format.layout in ('s3tc', 'rgtc', 'etc', 'bptc', 'astc', 'subsampled', 'other') or format.colorspace == ZS def generate(formats): diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index aceb0caf7e1..879d10ff01d 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -90,7 +90,6 @@ def write_format_table(formats): print '#include "u_format_rgtc.h"' print '#include "u_format_latc.h"' print '#include "u_format_etc.h"' - print '#include "u_format_fake.h"' print u_format_pack.generate(formats) @@ -139,10 +138,15 @@ def write_format_table(formats): u_format_pack.print_channels(format, do_channel_array) u_format_pack.print_channels(format, do_swizzle_array) print " %s," % (colorspace_map(format.colorspace),) - if format.colorspace != ZS and not format.is_pure_color(): + access = True + if format.layout in ('bptc', 'astc'): + access = False + if format.layout == 'etc' and format.short_name() != 'etc1_rgb8': + access = False + if format.colorspace != ZS and not format.is_pure_color() and access: print " &util_format_%s_unpack_rgba_8unorm," % format.short_name() print " &util_format_%s_pack_rgba_8unorm," % format.short_name() - if format.layout == 's3tc' or format.layout == 'rgtc' or format.layout == 'bptc': + if format.layout == 's3tc' or format.layout == 'rgtc': print " &util_format_%s_fetch_rgba_8unorm," % format.short_name() else: print " NULL, /* fetch_rgba_8unorm */" diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h index df01917466f..1af7653d650 100644 --- a/src/gallium/auxiliary/vl/vl_winsys.h +++ b/src/gallium/auxiliary/vl/vl_winsys.h @@ -42,34 +42,31 @@ struct pipe_loader_device; struct vl_screen { - struct pipe_screen *pscreen; - struct pipe_loader_device *dev; -}; + void (*destroy)(struct vl_screen *vscreen); -struct vl_screen* -vl_screen_create(Display *display, int screen); + struct pipe_resource * + (*texture_from_drawable)(struct vl_screen *vscreen, void *drawable); -void vl_screen_destroy(struct vl_screen *vscreen); + struct u_rect * + (*get_dirty_area)(struct vl_screen *vscreen); -struct pipe_resource* -vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable); + uint64_t + (*get_timestamp)(struct vl_screen *vscreen, void *drawable); -struct u_rect * -vl_screen_get_dirty_area(struct vl_screen *vscreen); + void + (*set_next_timestamp)(struct vl_screen *vscreen, uint64_t stamp); -uint64_t -vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable); + void * + (*get_private)(struct vl_screen *vscreen); -void -vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp); + struct pipe_screen *pscreen; + struct pipe_loader_device *dev; +}; -void* -vl_screen_get_private(struct vl_screen *vscreen); +struct vl_screen * +vl_dri2_screen_create(Display *display, int screen); -struct vl_screen* +struct vl_screen * vl_drm_screen_create(int fd); -void -vl_drm_screen_destroy(struct vl_screen *vscreen); - #endif diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c index 3b1b87f9523..ae0d4cdee1b 100644 --- a/src/gallium/auxiliary/vl/vl_winsys_dri.c +++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c @@ -73,24 +73,27 @@ struct vl_dri_screen int64_t last_ust, ns_frame, last_msc, next_msc; }; -static const unsigned int attachments[1] = { XCB_DRI2_ATTACHMENT_BUFFER_BACK_LEFT }; +static const unsigned attachments[1] = { XCB_DRI2_ATTACHMENT_BUFFER_BACK_LEFT }; + +static void vl_dri2_screen_destroy(struct vl_screen *vscreen); static void -vl_dri2_handle_stamps(struct vl_dri_screen* scrn, +vl_dri2_handle_stamps(struct vl_dri_screen *scrn, uint32_t ust_hi, uint32_t ust_lo, uint32_t msc_hi, uint32_t msc_lo) { int64_t ust = ((((uint64_t)ust_hi) << 32) | ust_lo) * 1000; int64_t msc = (((uint64_t)msc_hi) << 32) | msc_lo; - if (scrn->last_ust && scrn->last_msc && (ust > scrn->last_ust) && (msc > scrn->last_msc)) + if (scrn->last_ust && (ust > scrn->last_ust) && + scrn->last_msc && (msc > scrn->last_msc)) scrn->ns_frame = (ust - scrn->last_ust) / (msc - scrn->last_msc); scrn->last_ust = ust; scrn->last_msc = msc; } -static xcb_dri2_get_buffers_reply_t* +static xcb_dri2_get_buffers_reply_t * vl_dri2_get_flush_reply(struct vl_dri_screen *scrn) { xcb_dri2_wait_sbc_reply_t *wait_sbc_reply; @@ -120,7 +123,7 @@ vl_dri2_flush_frontbuffer(struct pipe_screen *screen, unsigned level, unsigned layer, void *context_private, struct pipe_box *sub_box) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)context_private; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)context_private; uint32_t msc_hi, msc_lo; assert(screen); @@ -132,9 +135,11 @@ vl_dri2_flush_frontbuffer(struct pipe_screen *screen, msc_hi = scrn->next_msc >> 32; msc_lo = scrn->next_msc & 0xFFFFFFFF; - scrn->swap_cookie = xcb_dri2_swap_buffers_unchecked(scrn->conn, scrn->drawable, msc_hi, msc_lo, 0, 0, 0, 0); + scrn->swap_cookie = xcb_dri2_swap_buffers_unchecked(scrn->conn, scrn->drawable, + msc_hi, msc_lo, 0, 0, 0, 0); scrn->wait_cookie = xcb_dri2_wait_sbc_unchecked(scrn->conn, scrn->drawable, 0, 0); - scrn->buffers_cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, scrn->drawable, 1, 1, attachments); + scrn->buffers_cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, scrn->drawable, + 1, 1, attachments); scrn->flushed = true; scrn->current_buffer = !scrn->current_buffer; @@ -170,10 +175,10 @@ vl_dri2_set_drawable(struct vl_dri_screen *scrn, Drawable drawable) scrn->drawable = drawable; } -struct pipe_resource* -vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable) +static struct pipe_resource * +vl_dri2_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen; struct winsys_handle dri2_handle; struct pipe_resource template, *tex; @@ -185,11 +190,12 @@ vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable) assert(scrn); - vl_dri2_set_drawable(scrn, drawable); + vl_dri2_set_drawable(scrn, (Drawable)drawable); reply = vl_dri2_get_flush_reply(scrn); if (!reply) { xcb_dri2_get_buffers_cookie_t cookie; - cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, drawable, 1, 1, attachments); + cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, (Drawable)drawable, + 1, 1, attachments); reply = xcb_dri2_get_buffers_reply(scrn->conn, cookie, NULL); } if (!reply) @@ -241,32 +247,33 @@ vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable) template.bind = PIPE_BIND_RENDER_TARGET; template.flags = 0; - tex = scrn->base.pscreen->resource_from_handle(scrn->base.pscreen, &template, &dri2_handle); + tex = scrn->base.pscreen->resource_from_handle(scrn->base.pscreen, &template, + &dri2_handle); free(reply); return tex; } -struct u_rect * -vl_screen_get_dirty_area(struct vl_screen *vscreen) +static struct u_rect * +vl_dri2_screen_get_dirty_area(struct vl_screen *vscreen) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen; assert(scrn); return &scrn->dirty_areas[scrn->current_buffer]; } -uint64_t -vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable) +static uint64_t +vl_dri2_screen_get_timestamp(struct vl_screen *vscreen, void *drawable) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen; xcb_dri2_get_msc_cookie_t cookie; xcb_dri2_get_msc_reply_t *reply; assert(scrn); - vl_dri2_set_drawable(scrn, drawable); + vl_dri2_set_drawable(scrn, (Drawable)drawable); if (!scrn->last_ust) { - cookie = xcb_dri2_get_msc_unchecked(scrn->conn, drawable); + cookie = xcb_dri2_get_msc_unchecked(scrn->conn, (Drawable)drawable); reply = xcb_dri2_get_msc_reply(scrn->conn, cookie, NULL); if (reply) { @@ -278,19 +285,20 @@ vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable) return scrn->last_ust; } -void -vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp) +static void +vl_dri2_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen; assert(scrn); if (stamp && scrn->last_ust && scrn->ns_frame && scrn->last_msc) - scrn->next_msc = ((int64_t)stamp - scrn->last_ust + scrn->ns_frame/2) / scrn->ns_frame + scrn->last_msc; + scrn->next_msc = ((int64_t)stamp - scrn->last_ust + scrn->ns_frame/2) / + scrn->ns_frame + scrn->last_msc; else scrn->next_msc = 0; } -void* -vl_screen_get_private(struct vl_screen *vscreen) +static void * +vl_dri2_screen_get_private(struct vl_screen *vscreen) { return vscreen; } @@ -305,8 +313,8 @@ get_xcb_screen(xcb_screen_iterator_t iter, int screen) return NULL; } -struct vl_screen* -vl_screen_create(Display *display, int screen) +struct vl_screen * +vl_dri2_screen_create(Display *display, int screen) { struct vl_dri_screen *scrn; const xcb_query_extension_reply_t *extension; @@ -320,7 +328,7 @@ vl_screen_create(Display *display, int screen) xcb_generic_error_t *error = NULL; char *device_name; int fd, device_name_length; - unsigned int driverType; + unsigned driverType; drm_magic_t magic; @@ -340,7 +348,9 @@ vl_screen_create(Display *display, int screen) if (!(extension && extension->present)) goto free_screen; - dri2_query_cookie = xcb_dri2_query_version (scrn->conn, XCB_DRI2_MAJOR_VERSION, XCB_DRI2_MINOR_VERSION); + dri2_query_cookie = xcb_dri2_query_version (scrn->conn, + XCB_DRI2_MAJOR_VERSION, + XCB_DRI2_MINOR_VERSION); dri2_query = xcb_dri2_query_version_reply (scrn->conn, dri2_query_cookie, &error); if (dri2_query == NULL || error != NULL || dri2_query->minor_version < 2) goto free_query; @@ -352,7 +362,7 @@ vl_screen_create(Display *display, int screen) { char *prime = getenv("DRI_PRIME"); if (prime) { - unsigned int primeid; + unsigned primeid; errno = 0; primeid = strtoul(prime, NULL, 0); if (errno == 0) @@ -362,9 +372,12 @@ vl_screen_create(Display *display, int screen) } #endif - connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, driverType); + connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, + get_xcb_screen(s, screen)->root, + driverType); connect = xcb_dri2_connect_reply(scrn->conn, connect_cookie, NULL); - if (connect == NULL || connect->driver_name_length + connect->device_name_length == 0) + if (connect == NULL || + connect->driver_name_length + connect->device_name_length == 0) goto free_connect; device_name_length = xcb_dri2_connect_device_name_length(connect); @@ -381,22 +394,26 @@ vl_screen_create(Display *display, int screen) if (drmGetMagic(fd, &magic)) goto free_connect; - authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, magic); + authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, + get_xcb_screen(s, screen)->root, + magic); authenticate = xcb_dri2_authenticate_reply(scrn->conn, authenticate_cookie, NULL); if (authenticate == NULL || !authenticate->authenticated) goto free_authenticate; -#if GALLIUM_STATIC_TARGETS - scrn->base.pscreen = dd_create_screen(fd); -#else - if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd)) - scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev, PIPE_SEARCH_DIR); -#endif // GALLIUM_STATIC_TARGETS + if (pipe_loader_drm_probe_fd(&scrn->base.dev, dup(fd))) + scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev); if (!scrn->base.pscreen) goto release_pipe; + scrn->base.destroy = vl_dri2_screen_destroy; + scrn->base.texture_from_drawable = vl_dri2_screen_texture_from_drawable; + scrn->base.get_dirty_area = vl_dri2_screen_get_dirty_area; + scrn->base.get_timestamp = vl_dri2_screen_get_timestamp; + scrn->base.set_next_timestamp = vl_dri2_screen_set_next_timestamp; + scrn->base.get_private = vl_dri2_screen_get_private; scrn->base.pscreen->flush_frontbuffer = vl_dri2_flush_frontbuffer; vl_compositor_reset_dirty_area(&scrn->dirty_areas[0]); vl_compositor_reset_dirty_area(&scrn->dirty_areas[1]); @@ -409,10 +426,8 @@ vl_screen_create(Display *display, int screen) return &scrn->base; release_pipe: -#if !GALLIUM_STATIC_TARGETS if (scrn->base.dev) pipe_loader_release(&scrn->base.dev, 1); -#endif // !GALLIUM_STATIC_TARGETS free_authenticate: free(authenticate); free_connect: @@ -426,9 +441,10 @@ free_screen: return NULL; } -void vl_screen_destroy(struct vl_screen *vscreen) +static void +vl_dri2_screen_destroy(struct vl_screen *vscreen) { - struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen; + struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen; assert(vscreen); @@ -440,8 +456,6 @@ void vl_screen_destroy(struct vl_screen *vscreen) vl_dri2_destroy_drawable(scrn); scrn->base.pscreen->destroy(scrn->base.pscreen); -#if !GALLIUM_STATIC_TARGETS pipe_loader_release(&scrn->base.dev, 1); -#endif // !GALLIUM_STATIC_TARGETS FREE(scrn); } diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c index 1167fcf6a90..f993e2c7727 100644 --- a/src/gallium/auxiliary/vl/vl_winsys_drm.c +++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c @@ -34,7 +34,10 @@ #include "util/u_memory.h" #include "vl/vl_winsys.h" -struct vl_screen* +static void +vl_drm_screen_destroy(struct vl_screen *vscreen); + +struct vl_screen * vl_drm_screen_create(int fd) { struct vl_screen *vscreen; @@ -43,35 +46,34 @@ vl_drm_screen_create(int fd) if (!vscreen) return NULL; -#if GALLIUM_STATIC_TARGETS - vscreen->pscreen = dd_create_screen(fd); -#else - if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd))) { - vscreen->pscreen = - pipe_loader_create_screen(vscreen->dev, PIPE_SEARCH_DIR); - if (!vscreen->pscreen) - pipe_loader_release(&vscreen->dev, 1); - } -#endif + if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd))) + vscreen->pscreen = pipe_loader_create_screen(vscreen->dev); - if (!vscreen->pscreen) { - FREE(vscreen); - return NULL; - } + if (!vscreen->pscreen) + goto error; + vscreen->destroy = vl_drm_screen_destroy; + vscreen->texture_from_drawable = NULL; + vscreen->get_dirty_area = NULL; + vscreen->get_timestamp = NULL; + vscreen->set_next_timestamp = NULL; + vscreen->get_private = NULL; return vscreen; + +error: + if (vscreen->dev) + pipe_loader_release(&vscreen->dev, 1); + + FREE(vscreen); + return NULL; } -void +static void vl_drm_screen_destroy(struct vl_screen *vscreen) { assert(vscreen); vscreen->pscreen->destroy(vscreen->pscreen); - -#if !GALLIUM_STATIC_TARGETS pipe_loader_release(&vscreen->dev, 1); -#endif - FREE(vscreen); } diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index ef235734755..77f708f449c 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index b5e1ddadde0..a6940dfefea 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: @@ -111,10 +111,14 @@ enum a3xx_vtx_fmt { VFMT_8_8_SNORM = 53, VFMT_8_8_8_SNORM = 54, VFMT_8_8_8_8_SNORM = 55, - VFMT_10_10_10_2_UINT = 60, - VFMT_10_10_10_2_UNORM = 61, - VFMT_10_10_10_2_SINT = 62, - VFMT_10_10_10_2_SNORM = 63, + VFMT_10_10_10_2_UINT = 56, + VFMT_10_10_10_2_UNORM = 57, + VFMT_10_10_10_2_SINT = 58, + VFMT_10_10_10_2_SNORM = 59, + VFMT_2_10_10_10_UINT = 60, + VFMT_2_10_10_10_UNORM = 61, + VFMT_2_10_10_10_SINT = 62, + VFMT_2_10_10_10_SNORM = 63, }; enum a3xx_tex_fmt { @@ -138,10 +142,12 @@ enum a3xx_tex_fmt { TFMT_DXT1 = 36, TFMT_DXT3 = 37, TFMT_DXT5 = 38, + TFMT_2_10_10_10_UNORM = 40, TFMT_10_10_10_2_UNORM = 41, TFMT_9_9_9_E5_FLOAT = 42, TFMT_11_11_10_FLOAT = 43, TFMT_A8_UNORM = 44, + TFMT_L8_UNORM = 45, TFMT_L8_A8_UNORM = 47, TFMT_8_UNORM = 48, TFMT_8_8_UNORM = 49, @@ -183,6 +189,8 @@ enum a3xx_tex_fmt { TFMT_32_SINT = 92, TFMT_32_32_SINT = 93, TFMT_32_32_32_32_SINT = 95, + TFMT_2_10_10_10_UINT = 96, + TFMT_10_10_10_2_UINT = 97, TFMT_ETC2_RG11_SNORM = 112, TFMT_ETC2_RG11_UNORM = 113, TFMT_ETC2_R11_SNORM = 114, @@ -215,6 +223,9 @@ enum a3xx_color_fmt { RB_R8_UINT = 14, RB_R8_SINT = 15, RB_R10G10B10A2_UNORM = 16, + RB_A2R10G10B10_UNORM = 17, + RB_R10G10B10A2_UINT = 18, + RB_A2R10G10B10_UINT = 19, RB_A8_UNORM = 20, RB_R8_UNORM = 21, RB_R16_FLOAT = 24, @@ -251,25 +262,6 @@ enum a3xx_sp_perfcounter_select { SP_ALU_ACTIVE_CYCLES = 29, }; -enum a3xx_rop_code { - ROP_CLEAR = 0, - ROP_NOR = 1, - ROP_AND_INVERTED = 2, - ROP_COPY_INVERTED = 3, - ROP_AND_REVERSE = 4, - ROP_INVERT = 5, - ROP_XOR = 6, - ROP_NAND = 7, - ROP_AND = 8, - ROP_EQUIV = 9, - ROP_NOOP = 10, - ROP_OR_INVERTED = 11, - ROP_COPY = 12, - ROP_OR_REVERSE = 13, - ROP_OR = 14, - ROP_SET = 15, -}; - enum a3xx_rb_blend_opcode { BLEND_DST_PLUS_SRC = 0, BLEND_SRC_MINUS_DST = 1, @@ -1620,12 +1612,24 @@ static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val) } #define REG_A3XX_VFD_CONTROL_1 0x00002241 -#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff +#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000000f #define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0 static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val) { return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK; } +#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK 0x000000f0 +#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT 4 +static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK; +} +#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK 0x00000f00 +#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT 8 +static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK; +} #define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000 #define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16 static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 3906c9b996e..b8a31d84b3f 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -81,7 +81,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ info->restart_index : 0xffffffff); + /* points + psize -> spritelist: */ if (ctx->rasterizer->point_size_per_vertex && + fd3_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 8f9c8b0623c..24afbc9e956 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -209,13 +209,19 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, fd3_pipe_sampler_view(tex->textures[i]) : &dummy_view; struct fd_resource *rsc = fd_resource(view->base.texture); - unsigned start = fd_sampler_first_level(&view->base); - unsigned end = fd_sampler_last_level(&view->base);; + if (rsc && rsc->base.b.target == PIPE_BUFFER) { + OUT_RELOC(ring, rsc->bo, view->base.u.buf.first_element * + util_format_get_blocksize(view->base.format), 0, 0); + j = 1; + } else { + unsigned start = fd_sampler_first_level(&view->base); + unsigned end = fd_sampler_last_level(&view->base);; - for (j = 0; j < (end - start + 1); j++) { - struct fd_resource_slice *slice = + for (j = 0; j < (end - start + 1); j++) { + struct fd_resource_slice *slice = fd_resource_slice(rsc, j + start); - OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0); + OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0); + } } /* pad the remaining entries w/ null: */ @@ -350,7 +356,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) unsigned instance_regid = regid(63, 0); unsigned vtxcnt_regid = regid(63, 0); + /* Note that sysvals come *after* normal inputs: */ for (i = 0; i < vp->inputs_count; i++) { + if (!vp->inputs[i].compmask) + continue; if (vp->inputs[i].sysval) { switch(vp->inputs[i].slot) { case SYSTEM_VALUE_BASE_VERTEX: @@ -369,18 +378,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) unreachable("invalid system value"); break; } - } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) { + } else if (i < vtx->vtx->num_elements) { last = i; } } - /* hw doesn't like to be configured for zero vbo's, it seems: */ - if ((vtx->vtx->num_elements == 0) && - (vertex_regid == regid(63, 0)) && - (instance_regid == regid(63, 0)) && - (vtxcnt_regid == regid(63, 0))) - return; - for (i = 0, j = 0; i <= last; i++) { assert(!vp->inputs[i].sysval); if (vp->inputs[i].compmask) { @@ -424,6 +426,38 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) } } + /* hw doesn't like to be configured for zero vbo's, it seems: */ + if (last < 0) { + /* just recycle the shader bo, we just need to point to *something* + * valid: + */ + struct fd_bo *dummy_vbo = vp->bo; + bool switchnext = (vertex_regid != regid(63, 0)) || + (instance_regid != regid(63, 0)) || + (vtxcnt_regid != regid(63, 0)); + + OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2); + OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) | + A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) | + COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | + A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) | + A3XX_VFD_FETCH_INSTR_0_STEPRATE(1)); + OUT_RELOC(ring, dummy_vbo, 0, 0, 0); + + OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1); + OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | + A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) | + A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) | + A3XX_VFD_DECODE_INSTR_SWAP(XYZW) | + A3XX_VFD_DECODE_INSTR_REGID(regid(0,0)) | + A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) | + A3XX_VFD_DECODE_INSTR_LASTCOMPVALID | + COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT)); + + total_in = 1; + j = 1; + } + OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2); OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) | A3XX_VFD_CONTROL_0_PACKETSIZE(2) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 857d156c869..52ea9444517 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -188,9 +188,13 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), _T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX), + V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ), V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_UINT, 10_10_10_2_UINT, NONE, WXYZ), V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ), V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX), + V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ), _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), @@ -271,6 +275,16 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(DXT3_SRGBA, DXT3, NONE, WZYX), _T(DXT5_RGBA, DXT5, NONE, WZYX), _T(DXT5_SRGBA, DXT5, NONE, WZYX), + + /* faked */ + _T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(RGTC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(RGTC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(LATC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(LATC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(LATC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(LATC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX), }; enum a3xx_vtx_fmt @@ -310,6 +324,8 @@ fd3_pipe2fetchsize(enum pipe_format format) { if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) format = PIPE_FORMAT_Z32_FLOAT; + else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { case 8: return TFETCH_1_BYTE; case 16: return TFETCH_2_BYTE; @@ -324,6 +340,14 @@ fd3_pipe2fetchsize(enum pipe_format format) } } +unsigned +fd3_pipe2nblocksx(enum pipe_format format, unsigned width) +{ + if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; + return util_format_get_nblocksx(format, width); +} + /* we need to special case a bit the depth/stencil restore, because we are * using the texture sampler to blit into the depth/stencil buffer, *not* * into a color buffer. Otherwise fd3_tex_swiz() will do the wrong thing, diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h index 05c5ea3d247..48c503e9a82 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h @@ -37,6 +37,7 @@ enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format); enum pipe_format fd3_gmem_restore_format(enum pipe_format format); enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format); enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format); +unsigned fd3_pipe2nblocksx(enum pipe_format format, unsigned width); uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c index 2d6ecb2c050..99ae99ea0c1 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -211,8 +211,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, { struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view); struct fd_resource *rsc = fd_resource(prsc); - unsigned lvl = fd_sampler_first_level(cso); - unsigned miplevels = fd_sampler_last_level(cso) - lvl; + unsigned lvl; uint32_t sz2 = 0; if (!so) @@ -227,20 +226,34 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->texconst0 = A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) | - A3XX_TEX_CONST_0_MIPLVLS(miplevels) | fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); if (util_format_is_srgb(cso->format)) so->texconst0 |= A3XX_TEX_CONST_0_SRGB; - so->texconst1 = + if (prsc->target == PIPE_BUFFER) { + lvl = 0; + so->texconst1 = + A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) | + A3XX_TEX_CONST_1_WIDTH(cso->u.buf.last_element - + cso->u.buf.first_element + 1) | + A3XX_TEX_CONST_1_HEIGHT(1); + } else { + unsigned miplevels; + + lvl = fd_sampler_first_level(cso); + miplevels = fd_sampler_last_level(cso) - lvl; + + so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels); + so->texconst1 = A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) | A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); + } /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */ so->texconst2 = - A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp); + A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp); switch (prsc->target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index 9f970365464..a450379e98d 100644 --- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: @@ -47,11 +47,13 @@ enum a4xx_color_fmt { RB4_R8_UNORM = 2, RB4_R4G4B4A4_UNORM = 8, RB4_R5G5B5A1_UNORM = 10, - RB4_R5G6R5_UNORM = 14, + RB4_R5G6B5_UNORM = 14, RB4_R8G8_UNORM = 15, RB4_R8G8_SNORM = 16, RB4_R8G8_UINT = 17, RB4_R8G8_SINT = 18, + RB4_R16_UNORM = 19, + RB4_R16_SNORM = 20, RB4_R16_FLOAT = 21, RB4_R16_UINT = 22, RB4_R16_SINT = 23, @@ -63,12 +65,16 @@ enum a4xx_color_fmt { RB4_R10G10B10A2_UNORM = 31, RB4_R10G10B10A2_UINT = 34, RB4_R11G11B10_FLOAT = 39, + RB4_R16G16_UNORM = 40, + RB4_R16G16_SNORM = 41, RB4_R16G16_FLOAT = 42, RB4_R16G16_UINT = 43, RB4_R16G16_SINT = 44, RB4_R32_FLOAT = 45, RB4_R32_UINT = 46, RB4_R32_SINT = 47, + RB4_R16G16B16A16_UNORM = 52, + RB4_R16G16B16A16_SNORM = 53, RB4_R16G16B16A16_FLOAT = 54, RB4_R16G16B16A16_UINT = 55, RB4_R16G16B16A16_SINT = 56, @@ -106,6 +112,7 @@ enum a4xx_vtx_fmt { VFMT4_32_32_FIXED = 10, VFMT4_32_32_32_FIXED = 11, VFMT4_32_32_32_32_FIXED = 12, + VFMT4_11_11_10_FLOAT = 13, VFMT4_16_SINT = 16, VFMT4_16_16_SINT = 17, VFMT4_16_16_16_SINT = 18, @@ -146,18 +153,19 @@ enum a4xx_vtx_fmt { VFMT4_8_8_SNORM = 53, VFMT4_8_8_8_SNORM = 54, VFMT4_8_8_8_8_SNORM = 55, - VFMT4_10_10_10_2_UINT = 60, - VFMT4_10_10_10_2_UNORM = 61, - VFMT4_10_10_10_2_SINT = 62, - VFMT4_10_10_10_2_SNORM = 63, + VFMT4_10_10_10_2_UINT = 56, + VFMT4_10_10_10_2_UNORM = 57, + VFMT4_10_10_10_2_SINT = 58, + VFMT4_10_10_10_2_SNORM = 59, }; enum a4xx_tex_fmt { TFMT4_5_6_5_UNORM = 11, - TFMT4_5_5_5_1_UNORM = 10, + TFMT4_5_5_5_1_UNORM = 9, TFMT4_4_4_4_4_UNORM = 8, TFMT4_X8Z24_UNORM = 71, TFMT4_10_10_10_2_UNORM = 33, + TFMT4_10_10_10_2_UINT = 34, TFMT4_A8_UNORM = 3, TFMT4_L8_A8_UNORM = 13, TFMT4_8_UNORM = 4, @@ -172,6 +180,12 @@ enum a4xx_tex_fmt { TFMT4_8_SINT = 7, TFMT4_8_8_SINT = 17, TFMT4_8_8_8_8_SINT = 31, + TFMT4_16_UNORM = 18, + TFMT4_16_16_UNORM = 38, + TFMT4_16_16_16_16_UNORM = 51, + TFMT4_16_SNORM = 19, + TFMT4_16_16_SNORM = 39, + TFMT4_16_16_16_16_SNORM = 52, TFMT4_16_UINT = 21, TFMT4_16_16_UINT = 41, TFMT4_16_16_16_16_UINT = 54, @@ -190,8 +204,21 @@ enum a4xx_tex_fmt { TFMT4_32_FLOAT = 43, TFMT4_32_32_FLOAT = 56, TFMT4_32_32_32_32_FLOAT = 63, + TFMT4_32_32_32_FLOAT = 59, + TFMT4_32_32_32_UINT = 60, + TFMT4_32_32_32_SINT = 61, TFMT4_9_9_9_E5_FLOAT = 32, TFMT4_11_11_10_FLOAT = 37, + TFMT4_DXT1 = 86, + TFMT4_DXT3 = 87, + TFMT4_DXT5 = 88, + TFMT4_RGTC1_UNORM = 90, + TFMT4_RGTC1_SNORM = 91, + TFMT4_RGTC2_UNORM = 94, + TFMT4_RGTC2_SNORM = 95, + TFMT4_BPTC_UFLOAT = 97, + TFMT4_BPTC_FLOAT = 98, + TFMT4_BPTC = 99, TFMT4_ATC_RGB = 100, TFMT4_ATC_RGBA_EXPLICIT = 101, TFMT4_ATC_RGBA_INTERPOLATED = 102, @@ -400,8 +427,13 @@ static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4 #define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 #define A4XX_RB_MRT_CONTROL_BLEND 0x00000010 #define A4XX_RB_MRT_CONTROL_BLEND2 0x00000020 -#define A4XX_RB_MRT_CONTROL_FASTCLEAR 0x00000400 -#define A4XX_RB_MRT_CONTROL_B11 0x00000800 +#define A4XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000040 +#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00 +#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8 +static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000 #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24 static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) @@ -600,7 +632,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val) { return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK; } -#define A4XX_RB_FS_OUTPUT_FAST_CLEAR 0x00000100 +#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND 0x00000100 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16 static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val) @@ -2056,6 +2088,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val) #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b #define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000 +#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000 +#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 #define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003 #define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR 0x00000001 @@ -2596,7 +2630,20 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val) #define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 #define A4XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000 -#define REG_A4XX_UNKNOWN_21C5 0x000021c5 +#define REG_A4XX_PC_PRIM_VTX_CNTL2 0x000021c5 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK 0x00000007 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT 0 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK 0x00000038 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT 3 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE 0x00000040 #define REG_A4XX_PC_RESTART_INDEX 0x000021c6 @@ -2738,6 +2785,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val) { return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK; } +#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 +#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 +static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK; +} #define REG_A4XX_TEX_SAMP_1 0x00000001 #define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e @@ -2746,6 +2799,7 @@ static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val { return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK; } +#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 #define A4XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 #define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 #define A4XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 @@ -2814,7 +2868,7 @@ static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val) { return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK; } -#define A4XX_TEX_CONST_1_WIDTH__MASK 0x1fff8000 +#define A4XX_TEX_CONST_1_WIDTH__MASK 0x3fff8000 #define A4XX_TEX_CONST_1_WIDTH__SHIFT 15 static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val) { diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c index d5e823ef69d..f19702280e0 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c @@ -27,6 +27,7 @@ */ #include "pipe/p_state.h" +#include "util/u_blend.h" #include "util/u_string.h" #include "util/u_memory.h" @@ -59,12 +60,12 @@ fd4_blend_state_create(struct pipe_context *pctx, const struct pipe_blend_state *cso) { struct fd4_blend_stateobj *so; -// enum a3xx_rop_code rop = ROP_COPY; + enum a3xx_rop_code rop = ROP_COPY; bool reads_dest = false; unsigned i, mrt_blend = 0; if (cso->logicop_enable) { -// rop = cso->logicop_func; /* maps 1:1 */ + rop = cso->logicop_func; /* maps 1:1 */ switch (cso->logicop_func) { case PIPE_LOGICOP_NOR: @@ -98,16 +99,25 @@ fd4_blend_state_create(struct pipe_context *pctx, else rt = &cso->rt[0]; - so->rb_mrt[i].blend_control = + so->rb_mrt[i].blend_control_rgb = A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | - A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)); + + so->rb_mrt[i].blend_control_alpha = A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); + so->rb_mrt[i].blend_control_no_alpha_rgb = + A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor))); + + so->rb_mrt[i].control = - 0xc00 | /* XXX ROP_CODE ?? */ + A4XX_RB_MRT_CONTROL_ROP_CODE(rop) | + COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) | A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); if (rt->blend_enable) { @@ -118,14 +128,17 @@ fd4_blend_state_create(struct pipe_context *pctx, mrt_blend |= (1 << i); } - if (reads_dest) + if (reads_dest) { so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE; + mrt_blend |= (1 << i); + } if (cso->dither) so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS); } - so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend); + so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) | + COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND); return so; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h index 7620d00a625..6230fa7a50e 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h @@ -39,7 +39,12 @@ struct fd4_blend_stateobj { struct { uint32_t control; uint32_t buf_info; - uint32_t blend_control; + /* Blend control bits for color if there is an alpha channel */ + uint32_t blend_control_rgb; + /* Blend control bits for color if there is no alpha channel */ + uint32_t blend_control_no_alpha_rgb; + /* Blend control bits for alpha channel */ + uint32_t blend_control_alpha; } rb_mrt[A4XX_MAX_RENDER_TARGETS]; uint32_t rb_fs_output; }; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index 7bd5163529a..8cbe68d5790 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -47,6 +47,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd4_emit *emit) { const struct pipe_draw_info *info = emit->info; + enum pc_di_primtype primtype = ctx->primtypes[info->mode]; if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit))) return; @@ -64,7 +65,14 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ info->restart_index : 0xffffffff); + /* points + psize -> spritelist: */ + if (ctx->rasterizer->point_size_per_vertex && + fd4_emit_get_vp(emit)->writes_psize && + (info->mode == PIPE_PRIM_POINTS)) + primtype = DI_PT_POINTLIST_PSIZE; + fd4_draw_emit(ctx, ring, + primtype, emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info); } @@ -263,8 +271,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0; OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); - OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR | - A4XX_RB_MRT_CONTROL_B11 | + OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h index b89a30a7c4b..a6c56404a8a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h @@ -101,12 +101,12 @@ fd4_size2indextype(unsigned index_size) } static inline void fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, const struct pipe_draw_info *info) { struct pipe_index_buffer *idx = &ctx->indexbuf; struct fd_bo *idx_bo = NULL; - enum pc_di_primtype primtype = ctx->primtypes[info->mode]; enum a4xx_index_size idx_type; enum pc_di_src_sel src_sel; uint32_t idx_size, idx_offset; @@ -127,11 +127,6 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, src_sel = DI_SRC_SEL_AUTO_INDEX; } - /* points + psize -> spritelist: */ - if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex && - (info->mode == PIPE_PRIM_POINTS)) - primtype = DI_PT_POINTLIST_PSIZE; - fd4_draw(ctx, ring, primtype, vismode, src_sel, info->count, info->instance_count, idx_type, idx_size, idx_offset, idx_bo); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 26b58718cd8..f220fc7ac1f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -185,7 +185,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct fd4_pipe_sampler_view *view = tex->textures[i] ? fd4_pipe_sampler_view(tex->textures[i]) : &dummy_view; - unsigned start = fd_sampler_first_level(&view->base); OUT_RING(ring, view->texconst0); OUT_RING(ring, view->texconst1); @@ -193,8 +192,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, view->texconst3); if (view->base.texture) { struct fd_resource *rsc = fd_resource(view->base.texture); - uint32_t offset = fd_resource_offset(rsc, start, 0); - OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0); + OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0); } else { OUT_RING(ring, 0x00000000); } @@ -286,7 +284,8 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) | A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height)); - OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) | + A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format))); OUT_RING(ring, 0x00000000); OUT_RELOC(ring, rsc->bo, offset, 0, 0); OUT_RING(ring, 0x00000000); @@ -332,7 +331,10 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) unsigned instance_regid = regid(63, 0); unsigned vtxcnt_regid = regid(63, 0); + /* Note that sysvals come *after* normal inputs: */ for (i = 0; i < vp->inputs_count; i++) { + if (!vp->inputs[i].compmask) + continue; if (vp->inputs[i].sysval) { switch(vp->inputs[i].slot) { case SYSTEM_VALUE_BASE_VERTEX: @@ -351,19 +353,11 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) unreachable("invalid system value"); break; } - } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) { + } else if (i < vtx->vtx->num_elements) { last = i; } } - - /* hw doesn't like to be configured for zero vbo's, it seems: */ - if ((vtx->vtx->num_elements == 0) && - (vertex_regid == regid(63, 0)) && - (instance_regid == regid(63, 0)) && - (vtxcnt_regid == regid(63, 0))) - return; - for (i = 0, j = 0; i <= last; i++) { assert(!vp->inputs[i].sysval); if (vp->inputs[i].compmask) { @@ -408,6 +402,38 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) } } + /* hw doesn't like to be configured for zero vbo's, it seems: */ + if (last < 0) { + /* just recycle the shader bo, we just need to point to *something* + * valid: + */ + struct fd_bo *dummy_vbo = vp->bo; + bool switchnext = (vertex_regid != regid(63, 0)) || + (instance_regid != regid(63, 0)) || + (vtxcnt_regid != regid(63, 0)); + + OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) | + A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) | + COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT)); + OUT_RELOC(ring, dummy_vbo, 0, 0, 0); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1)); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1)); + + OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1); + OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL | + A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) | + A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) | + A4XX_VFD_DECODE_INSTR_SWAP(XYZW) | + A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) | + A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) | + A4XX_VFD_DECODE_INSTR_LASTCOMPVALID | + COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT)); + + total_in = 1; + j = 1; + } + OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5); OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) | 0xa0000 | /* XXX */ @@ -470,11 +496,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches); } - if (dirty & FD_DIRTY_ZSA) { + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + uint32_t rb_alpha_control = zsa->rb_alpha_control; + + if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0]))) + rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST; OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1); - OUT_RING(ring, zsa->rb_alpha_control); + OUT_RING(ring, rb_alpha_control); OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2); OUT_RING(ring, zsa->rb_stencil_control); @@ -535,8 +566,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, */ if (emit->info) { const struct pipe_draw_info *info = emit->info; - uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer) - ->pc_prim_vtx_cntl; + struct fd4_rasterizer_stateobj *rast = + fd4_rasterizer_stateobj(ctx->rasterizer); + uint32_t val = rast->pc_prim_vtx_cntl; if (info->indexed && info->primitive_restart) val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; @@ -552,7 +584,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2); OUT_RING(ring, val); - OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */ + OUT_RING(ring, rast->pc_prim_vtx_cntl2); } if (dirty & FD_DIRTY_SCISSOR) { @@ -581,7 +613,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); } - if (dirty & FD_DIRTY_PROG) { + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs); } @@ -599,11 +631,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, uint32_t i; for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { + enum pipe_format format = pipe_surface_format( + ctx->framebuffer.cbufs[i]); + bool is_int = util_format_is_pure_integer(format); + bool has_alpha = util_format_has_alpha(format); + uint32_t control = blend->rb_mrt[i].control; + uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; + + if (is_int) { + control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; + control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); + } + + if (has_alpha) { + blend_control |= blend->rb_mrt[i].blend_control_rgb; + } else { + blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; + control &= ~A4XX_RB_MRT_CONTROL_BLEND2; + } + OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); - OUT_RING(ring, blend->rb_mrt[i].control); + OUT_RING(ring, control); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); - OUT_RING(ring, blend->rb_mrt[i].blend_control); + OUT_RING(ring, blend_control); } OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); @@ -611,19 +662,48 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff)); } - if (dirty & FD_DIRTY_BLEND_COLOR) { + if (dirty & (FD_DIRTY_BLEND_COLOR | FD_DIRTY_FRAMEBUFFER)) { struct pipe_blend_color *bcolor = &ctx->blend_color; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + float factor = 65535.0; + int i; + + for (i = 0; i < pfb->nr_cbufs; i++) { + enum pipe_format format = pipe_surface_format(pfb->cbufs[i]); + const struct util_format_description *desc = + util_format_description(format); + int j; + + if (desc->is_mixed) + continue; + + j = util_format_get_first_non_void_channel(format); + if (j == -1) + continue; + + if (desc->channel[j].size > 8 || !desc->channel[j].normalized || + desc->channel[j].pure_integer) + continue; + + /* Just use the first unorm8/snorm8 render buffer. Can't keep + * everyone happy. + */ + if (desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED) + factor = 32767.0; + break; + } + OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8); - OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * factor) | A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0])); OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0])); - OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * factor) | A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1])); OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1])); - OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * factor) | A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2])); OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2])); - OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * factor) | A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c index 847d4fb6d63..c240745cec1 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c @@ -99,20 +99,26 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(S8_UINT, 8_UINT, R8_UNORM, WZYX), /* 16-bit */ - V_(R16_UNORM, 16_UNORM, NONE, WZYX), - V_(R16_SNORM, 16_SNORM, NONE, WZYX), - VT(R16_UINT, 16_UINT, R16_UINT, WZYX), - VT(R16_SINT, 16_SINT, R16_SINT, WZYX), - V_(R16_USCALED, 16_UINT, NONE, WZYX), - V_(R16_SSCALED, 16_UINT, NONE, WZYX), - VT(R16_FLOAT, 16_FLOAT, R16_FLOAT,WZYX), - - _T(A16_UINT, 16_UINT, NONE, WZYX), - _T(A16_SINT, 16_SINT, NONE, WZYX), - _T(L16_UINT, 16_UINT, NONE, WZYX), - _T(L16_SINT, 16_SINT, NONE, WZYX), - _T(I16_UINT, 16_UINT, NONE, WZYX), - _T(I16_SINT, 16_SINT, NONE, WZYX), + VT(R16_UNORM, 16_UNORM, R16_UNORM, WZYX), + VT(R16_SNORM, 16_SNORM, R16_SNORM, WZYX), + VT(R16_UINT, 16_UINT, R16_UINT, WZYX), + VT(R16_SINT, 16_SINT, R16_SINT, WZYX), + V_(R16_USCALED, 16_UINT, NONE, WZYX), + V_(R16_SSCALED, 16_UINT, NONE, WZYX), + VT(R16_FLOAT, 16_FLOAT, R16_FLOAT, WZYX), + + _T(A16_UNORM, 16_UNORM, NONE, WZYX), + _T(A16_SNORM, 16_SNORM, NONE, WZYX), + _T(A16_UINT, 16_UINT, NONE, WZYX), + _T(A16_SINT, 16_SINT, NONE, WZYX), + _T(L16_UNORM, 16_UNORM, NONE, WZYX), + _T(L16_SNORM, 16_SNORM, NONE, WZYX), + _T(L16_UINT, 16_UINT, NONE, WZYX), + _T(L16_SINT, 16_SINT, NONE, WZYX), + _T(I16_UNORM, 16_UNORM, NONE, WZYX), + _T(I16_SNORM, 16_SNORM, NONE, WZYX), + _T(I16_UINT, 16_UINT, NONE, WZYX), + _T(I16_SINT, 16_SINT, NONE, WZYX), VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX), VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX), @@ -124,6 +130,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(L8A8_UINT, 8_8_UINT, NONE, WZYX), _T(L8A8_SINT, 8_8_SINT, NONE, WZYX), + _T(B5G6R5_UNORM, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ), _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ), @@ -151,16 +158,18 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(I32_UINT, 32_UINT, NONE, WZYX), _T(I32_SINT, 32_SINT, NONE, WZYX), - V_(R16G16_UNORM, 16_16_UNORM, NONE, WZYX), - V_(R16G16_SNORM, 16_16_SNORM, NONE, WZYX), - VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), - VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), - V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX), - V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), - VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT,WZYX), + VT(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX), + VT(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX), + VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), + VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), + V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX), + V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), + VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX), - _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), - _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), + _T(L16A16_UNORM, 16_16_UNORM, NONE, WZYX), + _T(L16A16_SNORM, 16_16_SNORM, NONE, WZYX), + _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), + _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), VT(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), _T(R8G8B8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), @@ -191,11 +200,15 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), _T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX), - V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ), + VT(R10G10B10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX), + VT(B10G10R10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ), V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ), V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX), + V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ), - _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), + VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), @@ -213,8 +226,10 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { V_(R16G16B16_FLOAT, 16_16_16_FLOAT, NONE, WZYX), /* 64-bit */ - V_(R16G16B16A16_UNORM, 16_16_16_16_UNORM, NONE, WZYX), - V_(R16G16B16A16_SNORM, 16_16_16_16_SNORM, NONE, WZYX), + VT(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), + VT(R16G16B16X16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), + VT(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), + VT(R16G16B16X16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), VT(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), _T(R16G16B16X16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), VT(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), @@ -235,11 +250,11 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(L32A32_SINT, 32_32_SINT, NONE, WZYX), /* 96-bit */ - V_(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX), - V_(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX), + VT(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX), + VT(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX), V_(R32G32B32_USCALED, 32_32_32_UINT, NONE, WZYX), V_(R32G32B32_SSCALED, 32_32_32_SINT, NONE, WZYX), - V_(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX), + VT(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX), V_(R32G32B32_FIXED, 32_32_32_FIXED, NONE, WZYX), /* 128-bit */ @@ -252,6 +267,72 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { VT(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), _T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), V_(R32G32B32A32_FIXED, 32_32_32_32_FIXED, NONE, WZYX), + + /* compressed */ + _T(ETC1_RGB8, ETC1, NONE, WZYX), + _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX), + _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX), + _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX), + _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX), + _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX), + _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX), + _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX), + _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX), + _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX), + _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX), + + _T(DXT1_RGB, DXT1, NONE, WZYX), + _T(DXT1_SRGB, DXT1, NONE, WZYX), + _T(DXT1_RGBA, DXT1, NONE, WZYX), + _T(DXT1_SRGBA, DXT1, NONE, WZYX), + _T(DXT3_RGBA, DXT3, NONE, WZYX), + _T(DXT3_SRGBA, DXT3, NONE, WZYX), + _T(DXT5_RGBA, DXT5, NONE, WZYX), + _T(DXT5_SRGBA, DXT5, NONE, WZYX), + + _T(BPTC_RGBA_UNORM, BPTC, NONE, WZYX), + _T(BPTC_SRGBA, BPTC, NONE, WZYX), + _T(BPTC_RGB_FLOAT, BPTC_FLOAT, NONE, WZYX), + _T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX), + + _T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX), + _T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX), + _T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX), + _T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX), + _T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX), + _T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX), + _T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX), + _T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX), + + _T(ASTC_4x4, ASTC_4x4, NONE, WZYX), + _T(ASTC_5x4, ASTC_5x4, NONE, WZYX), + _T(ASTC_5x5, ASTC_5x5, NONE, WZYX), + _T(ASTC_6x5, ASTC_6x5, NONE, WZYX), + _T(ASTC_6x6, ASTC_6x6, NONE, WZYX), + _T(ASTC_8x5, ASTC_8x5, NONE, WZYX), + _T(ASTC_8x6, ASTC_8x6, NONE, WZYX), + _T(ASTC_8x8, ASTC_8x8, NONE, WZYX), + _T(ASTC_10x5, ASTC_10x5, NONE, WZYX), + _T(ASTC_10x6, ASTC_10x6, NONE, WZYX), + _T(ASTC_10x8, ASTC_10x8, NONE, WZYX), + _T(ASTC_10x10, ASTC_10x10, NONE, WZYX), + _T(ASTC_12x10, ASTC_12x10, NONE, WZYX), + _T(ASTC_12x12, ASTC_12x12, NONE, WZYX), + + _T(ASTC_4x4_SRGB, ASTC_4x4, NONE, WZYX), + _T(ASTC_5x4_SRGB, ASTC_5x4, NONE, WZYX), + _T(ASTC_5x5_SRGB, ASTC_5x5, NONE, WZYX), + _T(ASTC_6x5_SRGB, ASTC_6x5, NONE, WZYX), + _T(ASTC_6x6_SRGB, ASTC_6x6, NONE, WZYX), + _T(ASTC_8x5_SRGB, ASTC_8x5, NONE, WZYX), + _T(ASTC_8x6_SRGB, ASTC_8x6, NONE, WZYX), + _T(ASTC_8x8_SRGB, ASTC_8x8, NONE, WZYX), + _T(ASTC_10x5_SRGB, ASTC_10x5, NONE, WZYX), + _T(ASTC_10x6_SRGB, ASTC_10x6, NONE, WZYX), + _T(ASTC_10x8_SRGB, ASTC_10x8, NONE, WZYX), + _T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX), + _T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX), + _T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX), }; /* convert pipe format to vertex buffer format: */ @@ -295,11 +376,15 @@ fd4_pipe2fetchsize(enum pipe_format format) if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) format = PIPE_FORMAT_Z32_FLOAT; - switch (util_format_get_blocksizebits(format)) { + if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC) + return TFETCH4_16_BYTE; + + switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { case 8: return TFETCH4_1_BYTE; case 16: return TFETCH4_2_BYTE; case 32: return TFETCH4_4_BYTE; case 64: return TFETCH4_8_BYTE; + case 96: return TFETCH4_1_BYTE; /* Does this matter? */ case 128: return TFETCH4_16_BYTE; default: debug_printf("Unknown block size for format %s: %d\n", diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 3f8bbf3a124..221608127b4 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -347,8 +347,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0; OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); - OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR | - A4XX_RB_MRT_CONTROL_B11 | + OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index e3d5dabab4c..3df13543148 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -245,13 +245,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } - /* adjust regids for alpha output formats. there is no alpha render - * format, so it's just treated like red - */ - for (i = 0; i < nr; i++) - if (util_format_is_alpha(pipe_surface_format(bufs[i]))) - color_regid[i] += 3; - /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c index dc7e98b149d..7456c63febe 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c @@ -77,6 +77,13 @@ fd4_rasterizer_state_create(struct pipe_context *pctx, so->gras_su_mode_control = A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0); + so->pc_prim_vtx_cntl2 = + A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | + A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); + + if (cso->fill_front != PIPE_POLYGON_MODE_FILL || + cso->fill_back != PIPE_POLYGON_MODE_FILL) + so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE; if (cso->cull_face & PIPE_FACE_FRONT) so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT; @@ -90,5 +97,10 @@ fd4_rasterizer_state_create(struct pipe_context *pctx, if (cso->offset_tri) so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET; + if (!cso->depth_clip) + so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE; + if (cso->clip_halfz) + so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z; + return so; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h index 64e81a9983b..b56a04da6a8 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h @@ -42,6 +42,7 @@ struct fd4_rasterizer_stateobj { uint32_t gras_su_mode_control; uint32_t gras_cl_clip_cntl; uint32_t pc_prim_vtx_cntl; + uint32_t pc_prim_vtx_cntl2; }; static inline struct fd4_rasterizer_stateobj * diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c index d8ea414f300..b2a69cca56c 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c @@ -57,6 +57,8 @@ fd4_screen_is_format_supported(struct pipe_screen *pscreen, } if ((usage & PIPE_BIND_SAMPLER_VIEW) && + (target == PIPE_BUFFER || + util_format_get_blocksize(format) != 12) && (fd4_pipe2tex(format) != ~0)) { retval |= PIPE_BIND_SAMPLER_VIEW; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c index dbff5a738fd..0eba75577b0 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c @@ -124,9 +124,11 @@ fd4_sampler_state_create(struct pipe_context *pctx, so->texsamp1 = // COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) | + COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS); if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); so->texsamp1 |= A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); @@ -210,8 +212,8 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, { struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view); struct fd_resource *rsc = fd_resource(prsc); - unsigned lvl = fd_sampler_first_level(cso); - unsigned miplevels = fd_sampler_last_level(cso) - lvl; + unsigned lvl, layers; + uint32_t sz2 = 0; if (!so) return NULL; @@ -223,39 +225,65 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->base.context = pctx; so->texconst0 = - A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | + A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) | A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) | - A4XX_TEX_CONST_0_MIPLVLS(miplevels) | fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); if (util_format_is_srgb(cso->format)) so->texconst0 |= A4XX_TEX_CONST_0_SRGB; - so->texconst1 = - A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | - A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); - so->texconst2 = - A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) | - A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp); + if (cso->target == PIPE_BUFFER) { + unsigned elements = cso->u.buf.last_element - + cso->u.buf.first_element + 1; + lvl = 0; + so->texconst1 = + A4XX_TEX_CONST_1_WIDTH(elements) | + A4XX_TEX_CONST_1_HEIGHT(1); + so->texconst2 = + A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) | + A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp); + so->offset = cso->u.buf.first_element * + util_format_get_blocksize(cso->format); + } else { + unsigned miplevels; - switch (prsc->target) { + lvl = fd_sampler_first_level(cso); + miplevels = fd_sampler_last_level(cso) - lvl; + layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; + + so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels); + so->texconst1 = + A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | + A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); + so->texconst2 = + A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) | + A4XX_TEX_CONST_2_PITCH( + util_format_get_nblocksx( + cso->format, rsc->slices[lvl].pitch) * rsc->cpp); + so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer); + } + + switch (cso->target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: so->texconst3 = - A4XX_TEX_CONST_3_DEPTH(prsc->array_size) | + A4XX_TEX_CONST_3_DEPTH(layers) | A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size); break; case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: so->texconst3 = - A4XX_TEX_CONST_3_DEPTH(prsc->array_size / 6) | + A4XX_TEX_CONST_3_DEPTH(layers / 6) | A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size); break; case PIPE_TEXTURE_3D: so->texconst3 = A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) | - A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0); + A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[lvl].size0); + while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0) + sz2 = rsc->slices[++lvl].size0; + so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(sz2); break; default: so->texconst3 = 0x00000000; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h index 31955770a85..6ca34ade60d 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h @@ -51,7 +51,8 @@ fd4_sampler_stateobj(struct pipe_sampler_state *samp) struct fd4_pipe_sampler_view { struct pipe_sampler_view base; - uint32_t texconst0, texconst1, texconst2, texconst3, textconst4; + uint32_t texconst0, texconst1, texconst2, texconst3, texconst4; + uint32_t offset; }; static inline struct fd4_pipe_sampler_view * diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index ca3d2ac3fca..0e0f0e65e9b 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: @@ -119,6 +119,25 @@ enum adreno_rb_copy_control_mode { RB_COPY_DEPTH_STENCIL = 5, }; +enum a3xx_rop_code { + ROP_CLEAR = 0, + ROP_NOR = 1, + ROP_AND_INVERTED = 2, + ROP_COPY_INVERTED = 3, + ROP_AND_REVERSE = 4, + ROP_INVERT = 5, + ROP_XOR = 6, + ROP_NAND = 7, + ROP_AND = 8, + ROP_EQUIV = 9, + ROP_NOOP = 10, + ROP_OR_INVERTED = 11, + ROP_COPY = 12, + ROP_OR_REVERSE = 13, + ROP_OR = 14, + ROP_SET = 15, +}; + enum a3xx_render_mode { RB_RENDERING_PASS = 0, RB_TILING_PASS = 1, diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index f095e3061b2..4aabc086607 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 61c4c6d6e24..571c8142bf7 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -359,6 +359,10 @@ struct fd_context { struct fd_streamout_stateobj streamout; struct pipe_clip_state ucp; + struct pipe_query *cond_query; + bool cond_cond; /* inverted rendering condition */ + uint cond_mode; + /* GMEM/tile handling fxns: */ void (*emit_tile_init)(struct fd_context *ctx); void (*emit_tile_prep)(struct fd_context *ctx, struct fd_tile *tile); diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index 7bf3343f43a..bf803cc77bc 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -88,6 +88,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) return; } + /* TODO: push down the region versions into the tiles */ + if (!fd_render_condition_check(pctx)) + return; + /* emulate unsupported primitives: */ if (!fd_supported_prim(ctx, info->mode)) { if (ctx->streamout.num_targets > 0) @@ -220,6 +224,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, unsigned cleared_buffers; int i; + /* TODO: push down the region versions into the tiles */ + if (!fd_render_condition_check(pctx)) + return; + /* for bookkeeping about which buffers have been cleared (and thus * can fully or partially skip mem2gmem) we need to ignore buffers * that have already had a draw, in case apps do silly things like diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c index db2683c9b6f..b87e8250719 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.c +++ b/src/gallium/drivers/freedreno/freedreno_query.c @@ -81,6 +81,16 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq, return q->funcs->get_query_result(fd_context(pctx), q, wait, result); } +static void +fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq, + boolean condition, uint mode) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->cond_query = pq; + ctx->cond_cond = condition; + ctx->cond_mode = mode; +} + static int fd_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, struct pipe_driver_query_info *info) @@ -118,4 +128,5 @@ fd_query_context_init(struct pipe_context *pctx) pctx->begin_query = fd_begin_query; pctx->end_query = fd_end_query; pctx->get_query_result = fd_get_query_result; + pctx->render_condition = fd_render_condition; } diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 98de0969cab..63ca9e30620 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -27,6 +27,7 @@ */ #include "util/u_format.h" +#include "util/u_format_rgtc.h" #include "util/u_format_zs.h" #include "util/u_inlines.h" #include "util/u_transfer.h" @@ -111,11 +112,19 @@ realloc_bo(struct fd_resource *rsc, uint32_t size) util_range_set_empty(&rsc->valid_buffer_range); } -/* Currently this is only used for flushing Z32_S8 texture transfers, but - * eventually it should handle everything. - */ +static unsigned +fd_resource_layer_offset(struct fd_resource *rsc, + struct fd_resource_slice *slice, + unsigned layer) +{ + if (rsc->layer_first) + return layer * rsc->layer_size; + else + return layer * slice->size0; +} + static void -fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) +fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box) { struct fd_resource *rsc = fd_resource(trans->base.resource); struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); @@ -123,13 +132,12 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) enum pipe_format format = trans->base.resource->format; float *depth = fd_bo_map(rsc->bo) + slice->offset + + fd_resource_layer_offset(rsc, slice, trans->base.box.z) + (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4; uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset + + fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) + (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x; - assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || - format == PIPE_FORMAT_X32_S8X24_UINT); - if (format != PIPE_FORMAT_X32_S8X24_UINT) util_format_z32_float_s8x24_uint_unpack_z_float( depth, slice->pitch * 4, @@ -142,6 +150,73 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) box->width, box->height); } +static void +fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box) +{ + struct fd_resource *rsc = fd_resource(trans->base.resource); + struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); + enum pipe_format format = trans->base.resource->format; + + uint8_t *data = fd_bo_map(rsc->bo) + slice->offset + + fd_resource_layer_offset(rsc, slice, trans->base.box.z) + + ((trans->base.box.y + box->y) * slice->pitch + + trans->base.box.x + box->x) * rsc->cpp; + + uint8_t *source = trans->staging + + util_format_get_nblocksy(format, box->y) * trans->base.stride + + util_format_get_stride(format, box->x); + + switch (format) { + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + util_format_rgtc1_unorm_unpack_rgba_8unorm( + data, slice->pitch * rsc->cpp, + source, trans->base.stride, + box->width, box->height); + break; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + util_format_rgtc2_unorm_unpack_rgba_8unorm( + data, slice->pitch * rsc->cpp, + source, trans->base.stride, + box->width, box->height); + break; + default: + assert(!"Unexpected format\n"); + break; + } +} + +static void +fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) +{ + enum pipe_format format = trans->base.resource->format; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_X32_S8X24_UINT: + fd_resource_flush_z32s8(trans, box); + break; + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + fd_resource_flush_rgtc(trans, box); + break; + default: + assert(!"Unexpected staging transfer type"); + break; + } +} + static void fd_resource_transfer_flush_region(struct pipe_context *pctx, struct pipe_transfer *ptrans, const struct pipe_box *box) @@ -267,20 +342,15 @@ fd_resource_transfer_map(struct pipe_context *pctx, return NULL; } - if (rsc->layer_first) { - offset = slice->offset + - box->y / util_format_get_blockheight(format) * ptrans->stride + - box->x / util_format_get_blockwidth(format) * rsc->cpp + - box->z * rsc->layer_size; - } else { - offset = slice->offset + - box->y / util_format_get_blockheight(format) * ptrans->stride + - box->x / util_format_get_blockwidth(format) * rsc->cpp + - box->z * slice->size0; - } + offset = slice->offset + + box->y / util_format_get_blockheight(format) * ptrans->stride + + box->x / util_format_get_blockwidth(format) * rsc->cpp + + fd_resource_layer_offset(rsc, slice, box->z); if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || prsc->format == PIPE_FORMAT_X32_S8X24_UINT) { + assert(trans->base.box.depth == 1); + trans->base.stride = trans->base.box.width * rsc->cpp * 2; trans->staging = malloc(trans->base.stride * trans->base.box.height); if (!trans->staging) @@ -298,8 +368,10 @@ fd_resource_transfer_map(struct pipe_context *pctx, goto fail; float *depth = (float *)(buf + slice->offset + + fd_resource_layer_offset(rsc, slice, box->z) + box->y * slice->pitch * 4 + box->x * 4); uint8_t *stencil = sbuf + sslice->offset + + fd_resource_layer_offset(rsc->stencil, sslice, box->z) + box->y * sslice->pitch + box->x; if (format != PIPE_FORMAT_X32_S8X24_UINT) @@ -316,6 +388,54 @@ fd_resource_transfer_map(struct pipe_context *pctx, buf = trans->staging; offset = 0; + } else if (rsc->internal_format != format && + util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) { + assert(trans->base.box.depth == 1); + + trans->base.stride = util_format_get_stride( + format, trans->base.box.width); + trans->staging = malloc( + util_format_get_2d_size(format, trans->base.stride, + trans->base.box.height)); + if (!trans->staging) + goto fail; + + /* if we're not discarding the whole range (or resource), we must copy + * the real data in. + */ + if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_DISCARD_RANGE))) { + uint8_t *rgba8 = (uint8_t *)buf + slice->offset + + fd_resource_layer_offset(rsc, slice, box->z) + + box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp; + + switch (format) { + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + util_format_rgtc1_unorm_pack_rgba_8unorm( + trans->staging, trans->base.stride, + rgba8, slice->pitch * rsc->cpp, + box->width, box->height); + break; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + util_format_rgtc2_unorm_pack_rgba_8unorm( + trans->staging, trans->base.stride, + rgba8, slice->pitch * rsc->cpp, + box->width, box->height); + break; + default: + assert(!"Unexpected format"); + break; + } + } + + buf = trans->staging; + offset = 0; } *pptrans = ptrans; @@ -361,9 +481,10 @@ static const struct u_resource_vtbl fd_resource_vtbl = { }; static uint32_t -setup_slices(struct fd_resource *rsc, uint32_t alignment) +setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) { struct pipe_resource *prsc = &rsc->base.b; + enum util_format_layout layout = util_format_description(format)->layout; uint32_t level, size = 0; uint32_t width = prsc->width0; uint32_t height = prsc->height0; @@ -377,9 +498,13 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment) struct fd_resource_slice *slice = fd_resource_slice(rsc, level); uint32_t blocks; - slice->pitch = width = align(width, 32); + if (layout == UTIL_FORMAT_LAYOUT_ASTC) + slice->pitch = width = + util_align_npot(width, 32 * util_format_get_blockwidth(format)); + else + slice->pitch = width = align(width, 32); slice->offset = size; - blocks = util_format_get_nblocks(prsc->format, width, height); + blocks = util_format_get_nblocks(format, width, height); /* 1d array and 2d array textures must all have the same layer size * for each miplevel on a3xx. 3d textures can have different layer * sizes for high levels, but the hw auto-sizer is buggy (or at least @@ -430,11 +555,12 @@ fd_resource_create(struct pipe_screen *pscreen, { struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); struct pipe_resource *prsc = &rsc->base.b; - uint32_t size; + enum pipe_format format = tmpl->format; + uint32_t size, alignment; DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, " "nr_samples=%u, usage=%u, bind=%x, flags=%x", - tmpl->target, util_format_name(tmpl->format), + tmpl->target, util_format_name(format), tmpl->width0, tmpl->height0, tmpl->depth0, tmpl->array_size, tmpl->last_level, tmpl->nr_samples, tmpl->usage, tmpl->bind, tmpl->flags); @@ -451,13 +577,18 @@ fd_resource_create(struct pipe_screen *pscreen, util_range_init(&rsc->valid_buffer_range); rsc->base.vtbl = &fd_resource_vtbl; - if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) - rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT); - else - rsc->cpp = util_format_get_blocksize(tmpl->format); + + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; + else if (fd_screen(pscreen)->gpu_id < 400 && + util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; + rsc->internal_format = format; + rsc->cpp = util_format_get_blocksize(format); assert(rsc->cpp); + alignment = slice_alignment(pscreen, tmpl); if (is_a4xx(fd_screen(pscreen))) { switch (tmpl->target) { case PIPE_TEXTURE_3D: @@ -465,11 +596,12 @@ fd_resource_create(struct pipe_screen *pscreen, break; default: rsc->layer_first = true; + alignment = 1; break; } } - size = setup_slices(rsc, slice_alignment(pscreen, tmpl)); + size = setup_slices(rsc, alignment, format); if (rsc->layer_first) { rsc->layer_size = align(size, 4096); @@ -548,7 +680,7 @@ fail: return NULL; } -static void fd_blitter_pipe_begin(struct fd_context *ctx); +static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond); static void fd_blitter_pipe_end(struct fd_context *ctx); /** @@ -570,7 +702,7 @@ fd_blitter_pipe_copy_region(struct fd_context *ctx, if (!util_blitter_is_copy_supported(ctx->blitter, dst, src)) return false; - fd_blitter_pipe_begin(ctx); + fd_blitter_pipe_begin(ctx, false); util_blitter_copy_texture(ctx->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); @@ -612,6 +744,25 @@ fd_resource_copy_region(struct pipe_context *pctx, src, src_level, src_box); } +bool +fd_render_condition_check(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + if (!ctx->cond_query) + return true; + + union pipe_query_result res = { 0 }; + bool wait = + ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT && + ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; + + if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res)) + return (bool)res.u64 != ctx->cond_cond; + + return true; +} + /** * Optimal hardware path for blitting pixels. * Scaling, format conversion, up- and downsampling (resolve) are allowed. @@ -630,6 +781,9 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) return; } + if (info.render_condition_enable && !fd_render_condition_check(pctx)) + return; + if (util_try_blit_via_copy_region(pctx, &info)) { return; /* done */ } @@ -646,13 +800,13 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) return; } - fd_blitter_pipe_begin(ctx); + fd_blitter_pipe_begin(ctx, info.render_condition_enable); util_blitter_blit(ctx->blitter, &info); fd_blitter_pipe_end(ctx); } static void -fd_blitter_pipe_begin(struct fd_context *ctx) +fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond) { util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb); util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx); @@ -673,6 +827,9 @@ fd_blitter_pipe_begin(struct fd_context *ctx) (void **)ctx->fragtex.samplers); util_blitter_save_fragment_sampler_views(ctx->blitter, ctx->fragtex.num_textures, ctx->fragtex.textures); + if (!render_cond) + util_blitter_save_render_condition(ctx->blitter, + ctx->cond_query, ctx->cond_cond, ctx->cond_mode); fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT); } diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index 7549becaa1f..9a9b0d08244 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -73,6 +73,7 @@ struct fd_resource { struct u_resource base; struct fd_bo *bo; uint32_t cpp; + enum pipe_format internal_format; bool layer_first; /* see above description */ uint32_t layer_size; struct fd_resource_slice slices[MAX_MIP_LEVELS]; @@ -135,4 +136,6 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer) void fd_resource_screen_init(struct pipe_screen *pscreen); void fd_resource_context_init(struct pipe_context *pctx); +bool fd_render_condition_check(struct pipe_context *pctx); + #endif /* FREEDRENO_RESOURCE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 56d1834ef9c..5bbe4016a2a 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -160,11 +160,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: - case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_START_INSTANCE: case PIPE_CAP_COMPUTE: return 0; @@ -176,27 +174,31 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_CLIP_HALFZ: return is_a3xx(screen) || is_a4xx(screen); case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: - /* ignoring first/last_element.. but I guess that should be - * easy to add.. - */ + if (is_a3xx(screen)) return 16; + if (is_a4xx(screen)) return 32; return 0; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: - /* I think 32k on a4xx.. and we could possibly emulate more - * by pretending 2d/rect textures and splitting high bits - * of index into 2nd dimension.. + /* We could possibly emulate more by pretending 2d/rect textures and + * splitting high bits of index into 2nd dimension.. */ - return 16383; - - case PIPE_CAP_DEPTH_CLIP_DISABLE: - case PIPE_CAP_CLIP_HALFZ: - case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: - return is_a3xx(screen); + if (is_a3xx(screen)) return 8192; + if (is_a4xx(screen)) return 16384; + return 0; case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_TEXTURE_QUERY_LOD: return is_a4xx(screen); case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: @@ -205,7 +207,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_GLSL_FEATURE_LEVEL: if (glsl120) return 120; - return is_ir3(screen) ? 130 : 120; + return is_ir3(screen) ? 140 : 120; /* Unsupported features. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: @@ -220,15 +222,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: case PIPE_CAP_TEXTURE_GATHER_SM5: - case PIPE_CAP_FAKE_SW_MSAA: - case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: - case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: - case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c index 04e4643b4c9..f5611abaec8 100644 --- a/src/gallium/drivers/freedreno/freedreno_texture.c +++ b/src/gallium/drivers/freedreno/freedreno_texture.c @@ -197,33 +197,15 @@ fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr, continue; const struct util_format_channel_description *chan = - &desc->channel[desc->swizzle[j]]; - int size = chan->size; - - /* The Z16 texture format we use seems to look in the - * 32-bit border color slots - */ - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) - size = 32; - - /* Formats like R11G11B10 or RGB9_E5 don't specify - * per-channel sizes properly. - */ - if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER) - size = 16; - - if (chan->pure_integer && size > 16) - bcolor32[desc->swizzle[j] + 4] = - sampler->border_color.i[j]; - else if (size > 16) - bcolor32[desc->swizzle[j]] = - fui(sampler->border_color.f[j]); - else if (chan->pure_integer) - bcolor[desc->swizzle[j] + 8] = - sampler->border_color.i[j]; - else + &desc->channel[desc->swizzle[j]]; + if (chan->pure_integer) { + bcolor32[desc->swizzle[j] + 4] = sampler->border_color.i[j]; + bcolor[desc->swizzle[j] + 8] = sampler->border_color.i[j]; + } else { + bcolor32[desc->swizzle[j]] = fui(sampler->border_color.f[j]); bcolor[desc->swizzle[j]] = - util_float_to_half(sampler->border_color.f[j]); + util_float_to_half(sampler->border_color.f[j]); + } } } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 157dc73a3c6..156bb0be247 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1177,6 +1177,33 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) dst[0] = ir3_SEL_B32(b, src[1], 0, ir3_b2n(b, src[0]), 0, src[2], 0); break; + case nir_op_bit_count: + dst[0] = ir3_CBITS_B(b, src[0], 0); + break; + case nir_op_ifind_msb: { + struct ir3_instruction *cmp; + dst[0] = ir3_CLZ_S(b, src[0], 0); + cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0); + cmp->cat2.condition = IR3_COND_GE; + dst[0] = ir3_SEL_B32(b, + ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0, + cmp, 0, dst[0], 0); + break; + } + case nir_op_ufind_msb: + dst[0] = ir3_CLZ_B(b, src[0], 0); + dst[0] = ir3_SEL_B32(b, + ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0, + src[0], 0, dst[0], 0); + break; + case nir_op_find_lsb: + dst[0] = ir3_BFREV_B(b, src[0], 0); + dst[0] = ir3_CLZ_B(b, dst[0], 0); + break; + case nir_op_bitfield_reverse: + dst[0] = ir3_BFREV_B(b, src[0], 0); + break; + default: compile_error(ctx, "Unhandled ALU op: %s\n", nir_op_infos[alu->op].name); @@ -1547,10 +1574,10 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp) unreachable("bad sampler_dim"); } - if (tex->is_shadow) + if (tex->is_shadow && tex->op != nir_texop_lod) flags |= IR3_INSTR_S; - if (tex->is_array) + if (tex->is_array && tex->op != nir_texop_lod) flags |= IR3_INSTR_A; *flagsp = flags; @@ -1618,12 +1645,13 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) case nir_texop_txl: opc = OPC_SAML; break; case nir_texop_txd: opc = OPC_SAMGQ; break; case nir_texop_txf: opc = OPC_ISAML; break; + case nir_texop_lod: opc = OPC_GETLOD; break; case nir_texop_txf_ms: case nir_texop_txs: - case nir_texop_lod: case nir_texop_tg4: case nir_texop_query_levels: case nir_texop_texture_samples: + case nir_texop_samples_identical: compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op); return; } @@ -1665,10 +1693,10 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) src0[nsrc0++] = create_immed(b, fui(0.5)); } - if (tex->is_shadow) + if (tex->is_shadow && tex->op != nir_texop_lod) src0[nsrc0++] = compare; - if (tex->is_array) + if (tex->is_array && tex->op != nir_texop_lod) src0[nsrc0++] = coord[coords]; if (has_proj) { @@ -1717,7 +1745,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) case nir_type_int: type = TYPE_S32; break; - case nir_type_unsigned: + case nir_type_uint: case nir_type_bool: type = TYPE_U32; break; @@ -1725,12 +1753,26 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) unreachable("bad dest_type"); } + if (opc == OPC_GETLOD) + type = TYPE_U32; + sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, flags, tex->sampler_index, tex->sampler_index, create_collect(b, src0, nsrc0), create_collect(b, src1, nsrc1)); split_dest(b, dst, sam, 4); + + /* GETLOD returns results in 4.8 fixed point */ + if (opc == OPC_GETLOD) { + struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256)); + + compile_assert(ctx, tex->dest_type == nir_type_float); + for (i = 0; i < 2; i++) { + dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0, + factor, 0); + } + } } static void @@ -1889,6 +1931,8 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr) case nir_texop_query_levels: emit_tex_query_levels(ctx, tex); break; + case nir_texop_samples_identical: + unreachable("nir_texop_samples_identical"); default: emit_tex(ctx, tex); break; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 7e2c27d9765..5d1cccb0daa 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -166,7 +166,9 @@ struct ir3_shader_variant { } outputs[16 + 2]; /* +POSITION +PSIZE */ bool writes_pos, writes_psize; - /* vertices/inputs: */ + /* attributes (VS) / varyings (FS): + * Note that sysval's should come *after* normal inputs. + */ unsigned inputs_count; struct { uint8_t slot; @@ -229,7 +231,7 @@ struct ir3_shader { struct ir3_compiler *compiler; - struct pipe_context *pctx; + struct pipe_context *pctx; /* TODO replace w/ pipe_screen */ const struct tgsi_token *tokens; struct pipe_stream_output_info stream_output; diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 83f81135590..31a93659647 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -64,6 +64,8 @@ NV50_C_SOURCES := \ nv50/nv50_3ddefs.xml.h \ nv50/nv50_3d.xml.h \ nv50/nv50_blit.h \ + nv50/nv50_compute.c \ + nv50/nv50_compute.xml.h \ nv50/nv50_context.c \ nv50/nv50_context.h \ nv50/nv50_defs.xml.h \ @@ -76,6 +78,10 @@ NV50_C_SOURCES := \ nv50/nv50_query.h \ nv50/nv50_query_hw.c \ nv50/nv50_query_hw.h \ + nv50/nv50_query_hw_metric.c \ + nv50/nv50_query_hw_metric.h \ + nv50/nv50_query_hw_sm.c \ + nv50/nv50_query_hw_sm.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 2a13e1086a0..9f84de03a4a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -2357,6 +2357,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) case OP_PFETCH: emitPFETCH(insn); break; + case OP_AFETCH: + emitAFETCH(insn); + break; case OP_EMIT: case OP_RESTART: emitOUT(insn); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 7859c8e79bd..41d2cc9167c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -1573,10 +1573,28 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval) Instruction *st; if (slot->reg.file == FILE_MEMORY_LOCAL) { - st = new_Instruction(func, OP_STORE, ty); - st->setSrc(0, slot); - st->setSrc(1, lval); lval->noSpill = 1; + if (ty != TYPE_B96) { + st = new_Instruction(func, OP_STORE, ty); + st->setSrc(0, slot); + st->setSrc(1, lval); + } else { + st = new_Instruction(func, OP_SPLIT, ty); + st->setSrc(0, lval); + for (int d = 0; d < lval->reg.size / 4; ++d) + st->setDef(d, new_LValue(func, FILE_GPR)); + + for (int d = lval->reg.size / 4 - 1; d >= 0; --d) { + Value *tmp = cloneShallow(func, slot); + tmp->reg.size = 4; + tmp->reg.data.offset += 4 * d; + + Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32); + s->setSrc(0, tmp); + s->setSrc(1, st->getDef(d)); + defi->bb->insertAfter(defi, s); + } + } } else { st = new_Instruction(func, OP_CVT, ty); st->setDef(0, slot); @@ -1596,7 +1614,27 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot) Instruction *ld; if (slot->reg.file == FILE_MEMORY_LOCAL) { lval->noSpill = 1; - ld = new_Instruction(func, OP_LOAD, ty); + if (ty != TYPE_B96) { + ld = new_Instruction(func, OP_LOAD, ty); + } else { + ld = new_Instruction(func, OP_MERGE, ty); + for (int d = 0; d < lval->reg.size / 4; ++d) { + Value *tmp = cloneShallow(func, slot); + LValue *val; + tmp->reg.size = 4; + tmp->reg.data.offset += 4 * d; + + Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32); + l->setDef(0, (val = new_LValue(func, FILE_GPR))); + l->setSrc(0, tmp); + usei->bb->insertBefore(usei, l); + ld->setSrc(d, val); + val->noSpill = 1; + } + ld->setDef(0, lval); + usei->bb->insertBefore(usei, ld); + return lval; + } } else { ld = new_Instruction(func, OP_CVT, ty); } diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index 68e69beb08f..1695553d793 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -657,8 +657,8 @@ nouveau_buffer_create(struct pipe_screen *pscreen, if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | PIPE_RESOURCE_FLAG_MAP_COHERENT)) { buffer->domain = NOUVEAU_BO_GART; - } else if (buffer->base.bind & - (screen->vidmem_bindings & screen->sysmem_bindings)) { + } else if (buffer->base.bind == 0 || (buffer->base.bind & + (screen->vidmem_bindings & screen->sysmem_bindings))) { switch (buffer->base.usage) { case PIPE_USAGE_DEFAULT: case PIPE_USAGE_IMMUTABLE: @@ -685,6 +685,10 @@ nouveau_buffer_create(struct pipe_screen *pscreen, if (buffer->base.bind & screen->sysmem_bindings) buffer->domain = NOUVEAU_BO_GART; } + /* There can be very special situations where we want non-gpu-mapped + * buffers, but never through this interface. + */ + assert(buffer->domain); ret = nouveau_buffer_allocate(screen, buffer, buffer->domain); if (ret == false) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c new file mode 100644 index 00000000000..6d23fd66945 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -0,0 +1,320 @@ +/* + * Copyright 2012 Francisco Jerez + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_compute.xml.h" + +#include "codegen/nv50_ir_driver.h" + +int +nv50_screen_compute_setup(struct nv50_screen *screen, + struct nouveau_pushbuf *push) +{ + struct nouveau_device *dev = screen->base.device; + struct nouveau_object *chan = screen->base.channel; + struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data; + unsigned obj_class; + int i, ret; + + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + obj_class = NV50_COMPUTE_CLASS; + break; + case 0xa0: + switch (dev->chipset) { + case 0xa3: + case 0xa5: + case 0xa8: + obj_class = NVA3_COMPUTE_CLASS; + break; + default: + obj_class = NV50_COMPUTE_CLASS; + break; + } + break; + default: + NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); + return -1; + } + + ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0, + &screen->compute); + if (ret) + return ret; + + BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->compute->handle); + + BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->stack_bo->offset); + PUSH_DATA (push, screen->stack_bo->offset); + BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1); + PUSH_DATA (push, 4); + + BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1); + PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); + BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1); + PUSH_DATA (push, 0x100); + BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1); + PUSH_DATA (push, fifo->vram); + + for (i = 0; i < 15; i++) { + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + } + + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1); + PUSH_DATA (push, ~0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1); + PUSH_DATA (push, 7); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1); + PUSH_DATA (push, 7); + BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1); + PUSH_DATA (push, 0x54); + BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1); + PUSH_DATA (push, fifo->vram); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->tls_bo->offset + 65536); + PUSH_DATA (push, screen->tls_bo->offset + 65536); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1); + PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); + + return 0; +} + +static bool +nv50_compute_validate_program(struct nv50_context *nv50) +{ + struct nv50_program *prog = nv50->compprog; + + if (prog->mem) + return true; + + if (!prog->translated) { + prog->translated = nv50_program_translate( + prog, nv50->screen->base.device->chipset, &nv50->base.debug); + if (!prog->translated) + return false; + } + if (unlikely(!prog->code_size)) + return false; + + if (likely(prog->code_size)) { + if (nv50_program_upload_code(nv50, prog)) { + struct nouveau_pushbuf *push = nv50->base.pushbuf; + BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1); + PUSH_DATA (push, 0); + return true; + } + } + return false; +} + +static void +nv50_compute_validate_globals(struct nv50_context *nv50) +{ + unsigned i; + + for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource *res = *util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, i); + if (res) + nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL, + nv04_resource(res), NOUVEAU_BO_RDWR); + } +} + +static bool +nv50_compute_state_validate(struct nv50_context *nv50) +{ + if (!nv50_compute_validate_program(nv50)) + return false; + + if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS) + nv50_compute_validate_globals(nv50); + + /* TODO: validate textures, samplers, surfaces */ + + nv50_bufctx_fence(nv50->bufctx_cp, false); + + nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp); + if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf))) + return false; + if (unlikely(nv50->state.flushed)) + nv50_bufctx_fence(nv50->bufctx_cp, true); + + return true; +} + +static void +nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_pushbuf *push = screen->base.pushbuf; + unsigned size = align(nv50->compprog->parm_size, 0x4); + + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + PUSH_DATA (push, (size / 4) << 8); + + if (size) { + struct nouveau_mm_allocation *mm; + struct nouveau_bo *bo = NULL; + unsigned offset; + + mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset); + assert(mm); + + nouveau_bo_map(bo, 0, screen->base.client); + memcpy(bo->map + offset, input, size); + + nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + nouveau_pushbuf_bufctx(push, nv50->bufctx); + nouveau_pushbuf_validate(push); + + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4); + nouveau_pushbuf_data(push, bo, offset, size); + + nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm); + nouveau_bo_ref(NULL, &bo); + nouveau_bufctx_reset(nv50->bufctx, 0); + } +} + +static uint32_t +nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label) +{ + struct nv50_program *prog = nv50->compprog; + const struct nv50_ir_prog_symbol *syms = + (const struct nv50_ir_prog_symbol *)prog->cp.syms; + unsigned i; + + for (i = 0; i < prog->cp.num_syms; ++i) { + if (syms[i].label == label) + return prog->code_base + syms[i].offset; + } + return prog->code_base; /* no symbols or symbol not found */ +} + +void +nv50_launch_grid(struct pipe_context *pipe, + const uint *block_layout, const uint *grid_layout, + uint32_t label, const void *input) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2]; + struct nv50_program *cp = nv50->compprog; + bool ret; + + ret = !nv50_compute_state_validate(nv50); + if (ret) { + NOUVEAU_ERR("Failed to launch grid !\n"); + return; + } + + nv50_compute_upload_input(nv50, input); + + BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1); + PUSH_DATA (push, nv50_compute_find_symbol(nv50, label)); + + BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1); + PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); + BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1); + PUSH_DATA (push, cp->max_gpr); + + /* grid/block setup */ + BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2); + PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]); + PUSH_DATA (push, block_layout[2]); + BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1); + PUSH_DATA (push, 1 << 16 | block_size); + BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1); + PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]); + BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1); + PUSH_DATA (push, 1); + + /* kernel launching */ + BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + + /* bind a compute shader clobbers fragment shader state */ + nv50->dirty |= NV50_NEW_FRAGPROG; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h new file mode 100644 index 00000000000..268d11253b6 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h @@ -0,0 +1,444 @@ +#ifndef NV50_COMPUTE_XML +#define NV50_COMPUTE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/envytools/envytools/ +git clone https://github.com/envytools/envytools.git + +The rules-ng-ng source files this header was generated from are: +- rnndb/graph/g80_compute.xml ( 14027 bytes, from 2015-02-14 02:01:36) +- rnndb/copyright.xml ( 6456 bytes, from 2015-02-14 02:01:36) +- rnndb/nvchipsets.xml ( 2833 bytes, from 2015-04-28 16:28:33) +- rnndb/fifo/nv_object.xml ( 15390 bytes, from 2015-04-22 20:36:09) +- rnndb/g80_defs.xml ( 18210 bytes, from 2015-10-19 20:49:59) + +Copyright (C) 2006-2015 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- Ilia Mirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NV50_COMPUTE_DMA_NOTIFY 0x00000180 + +#define NV50_COMPUTE_DMA_GLOBAL 0x000001a0 + +#define NV50_COMPUTE_DMA_QUERY 0x000001a4 + +#define NV50_COMPUTE_DMA_LOCAL 0x000001b8 + +#define NV50_COMPUTE_DMA_STACK 0x000001bc + +#define NV50_COMPUTE_DMA_CODE_CB 0x000001c0 + +#define NV50_COMPUTE_DMA_TSC 0x000001c4 + +#define NV50_COMPUTE_DMA_TIC 0x000001c8 + +#define NV50_COMPUTE_DMA_TEXTURE 0x000001cc + +#define NV50_COMPUTE_UNK0200 0x00000200 +#define NV50_COMPUTE_UNK0200_UNK1__MASK 0x0000ffff +#define NV50_COMPUTE_UNK0200_UNK1__SHIFT 0 +#define NV50_COMPUTE_UNK0200_UNK2__MASK 0x00ff0000 +#define NV50_COMPUTE_UNK0200_UNK2__SHIFT 16 + +#define NV50_COMPUTE_UNK0204 0x00000204 + +#define NV50_COMPUTE_UNK0208 0x00000208 + +#define NV50_COMPUTE_UNK020C 0x0000020c + +#define NV50_COMPUTE_CP_ADDRESS_HIGH 0x00000210 + +#define NV50_COMPUTE_CP_ADDRESS_LOW 0x00000214 + +#define NV50_COMPUTE_STACK_ADDRESS_HIGH 0x00000218 + +#define NV50_COMPUTE_STACK_ADDRESS_LOW 0x0000021c + +#define NV50_COMPUTE_STACK_SIZE_LOG 0x00000220 + +#define NV50_COMPUTE_CALL_LIMIT_LOG 0x00000224 + +#define NV50_COMPUTE_UNK0228 0x00000228 +#define NV50_COMPUTE_UNK0228_UNK0 0x00000001 +#define NV50_COMPUTE_UNK0228_UNK4__MASK 0x00000ff0 +#define NV50_COMPUTE_UNK0228_UNK4__SHIFT 4 +#define NV50_COMPUTE_UNK0228_UNK12__MASK 0x000ff000 +#define NV50_COMPUTE_UNK0228_UNK12__SHIFT 12 + +#define NV50_COMPUTE_TSC_ADDRESS_HIGH 0x0000022c + +#define NV50_COMPUTE_TSC_ADDRESS_LOW 0x00000230 +#define NV50_COMPUTE_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NV50_COMPUTE_TSC_LIMIT 0x00000234 +#define NV50_COMPUTE_TSC_LIMIT__MAX 0x00001fff + +#define NV50_COMPUTE_CB_ADDR 0x00000238 +#define NV50_COMPUTE_CB_ADDR_ID__MASK 0x003fff00 +#define NV50_COMPUTE_CB_ADDR_ID__SHIFT 8 +#define NV50_COMPUTE_CB_ADDR_BUFFER__MASK 0x0000007f +#define NV50_COMPUTE_CB_ADDR_BUFFER__SHIFT 0 + +#define NV50_COMPUTE_CB_DATA(i0) (0x0000023c + 0x4*(i0)) +#define NV50_COMPUTE_CB_DATA__ESIZE 0x00000004 +#define NV50_COMPUTE_CB_DATA__LEN 0x00000010 + +#define NV50_COMPUTE_TSC_FLUSH 0x0000027c +#define NV50_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001 +#define NV50_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_COMPUTE_TIC_FLUSH 0x00000280 +#define NV50_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001 +#define NV50_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_COMPUTE_DELAY1 0x00000284 + +#define NV50_COMPUTE_WATCHDOG_TIMER 0x00000288 + +#define NV50_COMPUTE_DELAY2 0x0000028c + +#define NV50_COMPUTE_UNK0290 0x00000290 + +#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH 0x00000294 + +#define NV50_COMPUTE_LOCAL_ADDRESS_LOW 0x00000298 +#define NV50_COMPUTE_LOCAL_ADDRESS_LOW__ALIGN 0x00000100 + +#define NV50_COMPUTE_LOCAL_SIZE_LOG 0x0000029c + +#define NV50_COMPUTE_UNK02A0 0x000002a0 + +#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH 0x000002a4 + +#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW 0x000002a8 + +#define NV50_COMPUTE_CB_DEF_SET 0x000002ac +#define NV50_COMPUTE_CB_DEF_SET_SIZE__MASK 0x0000ffff +#define NV50_COMPUTE_CB_DEF_SET_SIZE__SHIFT 0 +#define NV50_COMPUTE_CB_DEF_SET_BUFFER__MASK 0x007f0000 +#define NV50_COMPUTE_CB_DEF_SET_BUFFER__SHIFT 16 + +#define NV50_COMPUTE_UNK02B0 0x000002b0 + +#define NV50_COMPUTE_BLOCK_ALLOC 0x000002b4 +#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__MASK 0x0000ffff +#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__SHIFT 0 +#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__MASK 0x00ff0000 +#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__SHIFT 16 + +#define NV50_COMPUTE_LANES32_ENABLE 0x000002b8 + +#define NV50_COMPUTE_UNK02BC 0x000002bc +#define NV50_COMPUTE_UNK02BC_UNK1__MASK 0x00000007 +#define NV50_COMPUTE_UNK02BC_UNK1__SHIFT 0 +#define NV50_COMPUTE_UNK02BC_UNK2__MASK 0x00000070 +#define NV50_COMPUTE_UNK02BC_UNK2__SHIFT 4 + +#define NV50_COMPUTE_CP_REG_ALLOC_TEMP 0x000002c0 + +#define NV50_COMPUTE_TIC_ADDRESS_HIGH 0x000002c4 + +#define NV50_COMPUTE_TIC_ADDRESS_LOW 0x000002c8 + +#define NV50_COMPUTE_TIC_LIMIT 0x000002cc + +#define NV50_COMPUTE_MP_PM_SET(i0) (0x000002d0 + 0x4*(i0)) +#define NV50_COMPUTE_MP_PM_SET__ESIZE 0x00000004 +#define NV50_COMPUTE_MP_PM_SET__LEN 0x00000004 + +#define NV50_COMPUTE_MP_PM_CONTROL(i0) (0x000002e0 + 0x4*(i0)) +#define NV50_COMPUTE_MP_PM_CONTROL__ESIZE 0x00000004 +#define NV50_COMPUTE_MP_PM_CONTROL__LEN 0x00000004 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE__MASK 0x00000001 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE__SHIFT 0 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP 0x00000000 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP_PULSE 0x00000001 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__MASK 0x00000070 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__SHIFT 4 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK0 0x00000000 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK1 0x00000010 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK2 0x00000020 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK3 0x00000030 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK4 0x00000040 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK5 0x00000050 +#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__MASK 0x00ffff00 +#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__SHIFT 8 +#define NV50_COMPUTE_MP_PM_CONTROL_SIG__MASK 0xff000000 +#define NV50_COMPUTE_MP_PM_CONTROL_SIG__SHIFT 24 + +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE 0x000002f0 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_0 0x00000001 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_1 0x00000002 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_2 0x00000004 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_3 0x00000008 + +#define NV50_COMPUTE_UNK02F4 0x000002f4 + +#define NV50_COMPUTE_BLOCKDIM_LATCH 0x000002f8 + +#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC 0x000002fc + +#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP 0x00000300 + +#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC 0x00000304 + +#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP 0x00000308 + +#define NV50_COMPUTE_UNK030C 0x0000030c + +#define NV50_COMPUTE_QUERY_ADDRESS_HIGH 0x00000310 + +#define NV50_COMPUTE_QUERY_ADDRESS_LOW 0x00000314 + +#define NV50_COMPUTE_QUERY_SEQUENCE 0x00000318 + +#define NV50_COMPUTE_QUERY_GET 0x0000031c +#define NV50_COMPUTE_QUERY_GET_INTR 0x00000200 +#define NV50_COMPUTE_QUERY_GET_SHORT 0x00008000 + +#define NV50_COMPUTE_COND_ADDRESS_HIGH 0x00000320 + +#define NV50_COMPUTE_COND_ADDRESS_LOW 0x00000324 + +#define NV50_COMPUTE_COND_MODE 0x00000328 +#define NV50_COMPUTE_COND_MODE_NEVER 0x00000000 +#define NV50_COMPUTE_COND_MODE_ALWAYS 0x00000001 +#define NV50_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002 +#define NV50_COMPUTE_COND_MODE_EQUAL 0x00000003 +#define NV50_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004 + +#define NV50_COMPUTE_UNK032C 0x0000032c + +#define NV50_COMPUTE_UNK0330 0x00000330 + +#define NV50_COMPUTE_UNK0334(i0) (0x00000334 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0334__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0334__LEN 0x00000003 + +#define NV50_COMPUTE_UNK0340(i0) (0x00000340 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0340__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0340__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0348(i0) (0x00000348 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0348__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0348__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0350(i0) (0x00000350 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0350__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0350__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0358 0x00000358 + +#define NV50_COMPUTE_UNK035C 0x0000035c + +#define NV50_COMPUTE_UNK0360 0x00000360 +#define NV50_COMPUTE_UNK0360_UNK0__MASK 0x000000f0 +#define NV50_COMPUTE_UNK0360_UNK0__SHIFT 4 +#define NV50_COMPUTE_UNK0360_UNK1__MASK 0x00000f00 +#define NV50_COMPUTE_UNK0360_UNK1__SHIFT 8 + +#define NV50_COMPUTE_UNK0364 0x00000364 + +#define NV50_COMPUTE_LAUNCH 0x00000368 + +#define NV50_COMPUTE_UNK036C 0x0000036c + +#define NV50_COMPUTE_UNK0370 0x00000370 + +#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374 +#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__MASK 0x000000ff +#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__SHIFT 0 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MASK 0x0000ff00 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__SHIFT 8 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MAX 0x00000040 + +#define NV50_COMPUTE_LINKED_TSC 0x00000378 + +#define NV50_COMPUTE_UNK037C 0x0000037c +#define NV50_COMPUTE_UNK037C_ALWAYS_DERIV 0x00000001 +#define NV50_COMPUTE_UNK037C_UNK16 0x00010000 + +#define NV50_COMPUTE_CODE_CB_FLUSH 0x00000380 + +#define NV50_COMPUTE_UNK0384 0x00000384 + +#define NV50_COMPUTE_GRIDID 0x00000388 + +#define NV50_COMPUTE_UNK038C(i0) (0x0000038c + 0x4*(i0)) +#define NV50_COMPUTE_UNK038C__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK038C__LEN 0x00000003 + +#define NV50_COMPUTE_WRCACHE_FLUSH 0x00000398 + +#define NV50_COMPUTE_UNK039C(i0) (0x0000039c + 0x4*(i0)) +#define NV50_COMPUTE_UNK039C__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK039C__LEN 0x00000002 + +#define NV50_COMPUTE_GRIDDIM 0x000003a4 +#define NV50_COMPUTE_GRIDDIM_X__MASK 0x0000ffff +#define NV50_COMPUTE_GRIDDIM_X__SHIFT 0 +#define NV50_COMPUTE_GRIDDIM_Y__MASK 0xffff0000 +#define NV50_COMPUTE_GRIDDIM_Y__SHIFT 16 + +#define NV50_COMPUTE_SHARED_SIZE 0x000003a8 +#define NV50_COMPUTE_SHARED_SIZE__MAX 0x00004000 +#define NV50_COMPUTE_SHARED_SIZE__ALIGN 0x00000040 + +#define NV50_COMPUTE_BLOCKDIM_XY 0x000003ac +#define NV50_COMPUTE_BLOCKDIM_XY_X__MASK 0x0000ffff +#define NV50_COMPUTE_BLOCKDIM_XY_X__SHIFT 0 +#define NV50_COMPUTE_BLOCKDIM_XY_Y__MASK 0xffff0000 +#define NV50_COMPUTE_BLOCKDIM_XY_Y__SHIFT 16 + +#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0 +#define NV50_COMPUTE_BLOCKDIM_Z__MIN 0x00000001 +#define NV50_COMPUTE_BLOCKDIM_Z__MAX 0x00000040 + +#define NV50_COMPUTE_CP_START_ID 0x000003b4 + +#define NV50_COMPUTE_REG_MODE 0x000003b8 +#define NV50_COMPUTE_REG_MODE_PACKED 0x00000001 +#define NV50_COMPUTE_REG_MODE_STRIPED 0x00000002 + +#define NV50_COMPUTE_TEX_LIMITS 0x000003bc +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0 +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000 +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007 + +#define NV50_COMPUTE_BIND_TSC 0x000003c0 +#define NV50_COMPUTE_BIND_TSC_VALID 0x00000001 +#define NV50_COMPUTE_BIND_TSC_SAMPLER__MASK 0x000000f0 +#define NV50_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4 +#define NV50_COMPUTE_BIND_TSC_TSC__MASK 0x001ff000 +#define NV50_COMPUTE_BIND_TSC_TSC__SHIFT 12 + +#define NV50_COMPUTE_BIND_TIC 0x000003c4 +#define NV50_COMPUTE_BIND_TIC_VALID 0x00000001 +#define NV50_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NV50_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1 +#define NV50_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NV50_COMPUTE_BIND_TIC_TIC__SHIFT 9 + +#define NV50_COMPUTE_SET_PROGRAM_CB 0x000003c8 +#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__MASK 0x00000f00 +#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__SHIFT 8 +#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000 +#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__SHIFT 12 +#define NV50_COMPUTE_SET_PROGRAM_CB_VALID 0x000000ff + +#define NV50_COMPUTE_UNK03CC 0x000003cc + +#define NV50_COMPUTE_TEX_CACHE_CTL 0x000003d0 +#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NV50_COMPUTE_UNK03D4 0x000003d4 + +#define NV50_COMPUTE_UNK03D8 0x000003d8 + +#define NV50_COMPUTE_UNK03DC 0x000003dc + +#define NV50_COMPUTE_UNK03E0 0x000003e0 + +#define NV50_COMPUTE_UNK03E4 0x000003e4 + +#define NVA3_COMPUTE_TEX_MISC 0x000003e8 +#define NVA3_COMPUTE_TEX_MISC_UNK1 0x00000001 +#define NVA3_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002 + +#define NV50_COMPUTE_GLOBAL(i0) (0x00000400 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL__ESIZE 0x00000020 +#define NV50_COMPUTE_GLOBAL__LEN 0x00000010 + +#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(i0) (0x00000400 + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(i0) (0x00000404 + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_PITCH(i0) (0x00000408 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL_PITCH__MAX 0x00800000 +#define NV50_COMPUTE_GLOBAL_PITCH__ALIGN 0x00000100 + +#define NV50_COMPUTE_GLOBAL_LIMIT(i0) (0x0000040c + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_MODE(i0) (0x00000410 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL_MODE_LINEAR 0x00000001 +#define NV50_COMPUTE_GLOBAL_MODE_UNK1__MASK 0x000000f0 +#define NV50_COMPUTE_GLOBAL_MODE_UNK1__SHIFT 4 +#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__MASK 0x00000f00 +#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__SHIFT 8 + +#define NV50_COMPUTE_USER_PARAM(i0) (0x00000600 + 0x4*(i0)) +#define NV50_COMPUTE_USER_PARAM__ESIZE 0x00000004 +#define NV50_COMPUTE_USER_PARAM__LEN 0x00000040 + +#define NV50_COMPUTE_UNK0700(i0) (0x00000700 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0700__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0700__LEN 0x00000010 + + +#endif /* NV50_COMPUTE_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index 7867c2df7f3..4874b77b1e1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -113,6 +113,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50) nouveau_bufctx_del(&nv50->bufctx_3d); nouveau_bufctx_del(&nv50->bufctx); + nouveau_bufctx_del(&nv50->bufctx_cp); util_unreference_framebuffer_state(&nv50->framebuffer); @@ -131,6 +132,14 @@ nv50_context_unreference_resources(struct nv50_context *nv50) if (!nv50->constbuf[s][i].user) pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL); } + + for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource **res = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, i); + pipe_resource_reference(res, NULL); + } + util_dynarray_fini(&nv50->global_residents); } static void @@ -159,9 +168,10 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, int ref) { struct nv50_context *nv50 = nv50_context(&ctx->pipe); + unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER; unsigned s, i; - if (res->bind & PIPE_BIND_RENDER_TARGET) { + if (bind & PIPE_BIND_RENDER_TARGET) { assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS); for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) { if (nv50->framebuffer.cbufs[i] && @@ -173,7 +183,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, } } } - if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (bind & PIPE_BIND_DEPTH_STENCIL) { if (nv50->framebuffer.zsbuf && nv50->framebuffer.zsbuf->texture == res) { nv50->dirty |= NV50_NEW_FRAMEBUFFER; @@ -183,11 +193,11 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, } } - if (res->bind & (PIPE_BIND_VERTEX_BUFFER | - PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_CONSTANT_BUFFER | - PIPE_BIND_STREAM_OUTPUT | - PIPE_BIND_SAMPLER_VIEW)) { + if (bind & (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_STREAM_OUTPUT | + PIPE_BIND_SAMPLER_VIEW)) { assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS); for (i = 0; i < nv50->num_vtxbufs; ++i) { @@ -263,10 +273,13 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) nv50->base.pushbuf = screen->base.pushbuf; nv50->base.client = screen->base.client; - ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT, - &nv50->bufctx_3d); + ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx); + if (!ret) + ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_3D_COUNT, + &nv50->bufctx_3d); if (!ret) - ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx); + ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_CP_COUNT, + &nv50->bufctx_cp); if (ret) goto out_err; @@ -290,6 +303,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) pipe->draw_vbo = nv50_draw_vbo; pipe->clear = nv50_clear; + pipe->launch_grid = nv50_launch_grid; pipe->flush = nv50_flush; pipe->texture_barrier = nv50_texture_barrier; @@ -335,19 +349,30 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms); BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc); BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo); + if (screen->compute) { + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code); + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc); + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo); + } flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo); BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo); + if (screen->compute) + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); nv50->base.scratch.bo_size = 2 << 20; + util_dynarray_init(&nv50->global_residents); + return pipe; out_err: if (nv50->bufctx_3d) nouveau_bufctx_del(&nv50->bufctx_3d); + if (nv50->bufctx_cp) + nouveau_bufctx_del(&nv50->bufctx_cp); if (nv50->bufctx) nouveau_bufctx_del(&nv50->bufctx); FREE(nv50->blit); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index fb74a9748a3..2cebcd99423 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -49,6 +49,10 @@ #define NV50_NEW_MIN_SAMPLES (1 << 22) #define NV50_NEW_CONTEXT (1 << 31) +#define NV50_NEW_CP_PROGRAM (1 << 0) +#define NV50_NEW_CP_GLOBALS (1 << 1) + +/* 3d bufctx (during draw_vbo, blit_3d) */ #define NV50_BIND_FB 0 #define NV50_BIND_VERTEX 1 #define NV50_BIND_VERTEX_TMP 2 @@ -58,7 +62,15 @@ #define NV50_BIND_SO 53 #define NV50_BIND_SCREEN 54 #define NV50_BIND_TLS 55 -#define NV50_BIND_COUNT 56 +#define NV50_BIND_3D_COUNT 56 + +/* compute bufctx (during launch_grid) */ +#define NV50_BIND_CP_GLOBAL 0 +#define NV50_BIND_CP_SCREEN 1 +#define NV50_BIND_CP_QUERY 2 +#define NV50_BIND_CP_COUNT 3 + +/* bufctx for other operations */ #define NV50_BIND_2D 0 #define NV50_BIND_M2MF 0 #define NV50_BIND_FENCE 1 @@ -101,8 +113,10 @@ struct nv50_context { struct nouveau_bufctx *bufctx_3d; struct nouveau_bufctx *bufctx; + struct nouveau_bufctx *bufctx_cp; uint32_t dirty; + uint32_t dirty_cp; /* dirty flags for compute state */ bool cb_dirty; struct nv50_graph_state state; @@ -115,6 +129,7 @@ struct nv50_context { struct nv50_program *vertprog; struct nv50_program *gmtyprog; struct nv50_program *fragprog; + struct nv50_program *compprog; struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; uint16_t constbuf_dirty[3]; @@ -163,6 +178,8 @@ struct nv50_context { uint32_t cond_condmode; /* the calculated condition */ struct nv50_blitctx *blit; + + struct util_dynarray global_residents; }; static inline struct nv50_context * @@ -302,4 +319,9 @@ struct pipe_video_buffer * nv98_video_buffer_create(struct pipe_context *pipe, const struct pipe_video_buffer *template); +/* nv50_compute.c */ +void +nv50_launch_grid(struct pipe_context *, const uint *, const uint *, + uint32_t, const void *); + #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 89e7a338283..a4b8ddfda95 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -66,7 +66,6 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_VERTEXID: prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; - prog->vp.vertexid = 1; continue; default: break; @@ -259,6 +258,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) return nv50_vertprog_assign_slots(info); case PIPE_SHADER_FRAGMENT: return nv50_fragprog_assign_slots(info); + case PIPE_SHADER_COMPUTE: + return 0; default: return -1; } @@ -355,6 +356,9 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, prog->gp.has_layer = 0; prog->gp.has_viewport = 0; + if (prog->type == PIPE_SHADER_COMPUTE) + info->prop.cp.inputOffset = 0x10; + info->driverPriv = prog; #ifdef DEBUG @@ -378,6 +382,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; + prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; + if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; @@ -401,6 +407,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, break; } prog->gp.vert_count = info->prop.gp.maxVertices; + } else + if (prog->type == PIPE_SHADER_COMPUTE) { + prog->cp.syms = info->bin.syms; + prog->cp.num_syms = info->bin.numSyms; } if (prog->pipe.stream_output.num_outputs) @@ -423,11 +433,13 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) struct nouveau_heap *heap; int ret; uint32_t size = align(prog->code_size, 0x40); + uint8_t prog_type; switch (prog->type) { case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break; case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break; + case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break; default: assert(!"invalid program type"); return false; @@ -450,7 +462,14 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) return false; } } - prog->code_base = prog->mem->start; + + if (prog->type == PIPE_SHADER_COMPUTE) { + /* CP code must be uploaded in FP code segment. */ + prog_type = 1; + } else { + prog->code_base = prog->mem->start; + prog_type = prog->type; + } ret = nv50_tls_realloc(nv50->screen, prog->tls_space); if (ret < 0) { @@ -468,7 +487,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) false /* flatshade */); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, - (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, + (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, NOUVEAU_BO_VRAM, prog->code_size, prog->code); BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); @@ -489,7 +508,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) FREE(p->code); FREE(p->fixups); - + FREE(p->interps); FREE(p->so); memset(p, 0, sizeof(*p)); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index 7a33eb11d6d..1de5122a56e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -76,9 +76,9 @@ struct nv50_program { ubyte psiz; /* output slot of point size */ ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ ubyte edgeflag; - ubyte vertexid; ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ ubyte clpd_nr; + bool need_vertex_id; } vp; struct { @@ -98,6 +98,13 @@ struct nv50_program { ubyte viewportid; /* hw value of viewport index output */ } gp; + struct { + uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */ + uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */ + void *syms; + unsigned num_syms; + } cp; + void *fixups; /* relocation records */ void *interps; /* interpolation records */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c index f31eaa0e314..cbef95d07f6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_push.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c @@ -24,6 +24,10 @@ struct push_context { struct translate *translate; bool primitive_restart; + + bool need_vertex_id; + int32_t index_bias; + uint32_t prim; uint32_t restart_index; uint32_t instance_id; @@ -74,6 +78,11 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -107,6 +116,11 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -140,6 +154,11 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -161,10 +180,18 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) static void emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) { + uint32_t elts = 0; + while (count) { unsigned push = MIN2(count, ctx->packet_vertex_limit); unsigned size = ctx->vertex_words * push; + if (unlikely(ctx->need_vertex_id)) { + /* For non-indexed draws, gl_VertexID goes up after each vertex. */ + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, elts++); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id, @@ -216,7 +243,14 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.push = nv50->base.pushbuf; ctx.translate = nv50->vertex->translate; - ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit; + + ctx.need_vertex_id = nv50->screen->base.class_3d >= NV84_3D_CLASS && + nv50->vertprog->vp.need_vertex_id && (nv50->vertex->num_elements < 32); + ctx.index_bias = info->index_bias; + + /* For indexed draws, gl_VertexID must be emitted for every vertex. */ + ctx.packet_vertex_limit = + ctx.need_vertex_id ? 1 : nv50->vertex->packet_vertex_limit; ctx.vertex_words = nv50->vertex->vertex_size; assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS); @@ -307,4 +341,10 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.instance_id++; ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } + + if (unlikely(ctx.need_vertex_id)) { + /* Reset gl_VertexID to prevent future indexed draws to be confused. */ + BEGIN_NV04(ctx.push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx.push, nv50->state.index_bias); + } } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index dd9b85b7208..4cd3b615606 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -27,6 +27,8 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query.h" #include "nv50/nv50_query_hw.h" +#include "nv50/nv50_query_hw_metric.h" +#include "nv50/nv50_query_hw_sm.h" static struct pipe_query * nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index) @@ -152,4 +154,79 @@ nv50_init_query_functions(struct nv50_context *nv50) pipe->end_query = nv50_end_query; pipe->get_query_result = nv50_get_query_result; pipe->render_condition = nv50_render_condition; + nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS; +} + +int +nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int num_hw_queries = 0; + + num_hw_queries = nv50_hw_get_driver_query_info(screen, 0, NULL); + + if (!info) + return num_hw_queries; + + /* Init default values. */ + info->name = "this_is_not_the_query_you_are_looking_for"; + info->query_type = 0xdeadd01d; + info->max_value.u64 = 0; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->group_id = -1; + info->flags = 0; + + return nv50_hw_get_driver_query_info(screen, id, info); +} + +int +nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_group_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int count = 0; + + if (screen->compute) + if (screen->base.class_3d >= NV84_3D_CLASS) + count += 2; + + if (!info) + return count; + + if (id == NV50_HW_SM_QUERY_GROUP) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = "MP counters"; + + /* Because we can't expose the number of hardware counters needed + * for each different query, we don't want to allow more than one + * active query simultaneously to avoid failure when the maximum + * number of counters is reached. Note that these groups of GPU + * counters are currently only used by AMD_performance_monitor. + */ + info->max_active_queries = 1; + info->num_queries = NV50_HW_SM_QUERY_COUNT; + return 1; + } + } + } else + if (id == NV50_HW_METRIC_QUERY_GROUP) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = "Performance metrics"; + info->max_active_queries = 1; + info->num_queries = NV50_HW_METRIC_QUERY_COUNT; + return 1; + } + } + } + + /* user asked for info about non-existing query group */ + info->name = "this_is_not_the_query_group_you_are_looking_for"; + info->max_active_queries = 0; + info->num_queries = 0; + return 0; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h index d990285c857..bd4c0a386f6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h @@ -28,6 +28,12 @@ nv50_query(struct pipe_query *pipe) return (struct nv50_query *)pipe; } +/* + * Driver queries groups: + */ +#define NV50_HW_SM_QUERY_GROUP 0 +#define NV50_HW_METRIC_QUERY_GROUP 1 + void nv50_init_query_functions(struct nv50_context *); #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index 945ce7abe50..b6ebbbf1010 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -25,6 +25,8 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query_hw.h" +#include "nv50/nv50_query_hw_metric.h" +#include "nv50/nv50_query_hw_sm.h" #include "nv_object.xml.h" #define NV50_HW_QUERY_STATE_READY 0 @@ -41,7 +43,7 @@ #define NV50_HW_QUERY_ALLOC_SPACE 256 -static bool +bool nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) { @@ -122,6 +124,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_query *hq = nv50_hw_query(q); + if (hq->funcs && hq->funcs->begin_query) + return hq->funcs->begin_query(nv50, hq); + /* For occlusion queries we have to change the storage, because a previous * query might set the initial render condition to false even *after* we re- * initialized it to true. @@ -193,6 +198,11 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_query *hq = nv50_hw_query(q); + if (hq->funcs && hq->funcs->end_query) { + hq->funcs->end_query(nv50, hq); + return; + } + hq->state = NV50_HW_QUERY_STATE_ENDED; switch (q->type) { @@ -261,6 +271,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, uint64_t *data64 = (uint64_t *)hq->data; int i; + if (hq->funcs && hq->funcs->get_query_result) + return hq->funcs->get_query_result(nv50, hq, wait, result); + if (hq->state != NV50_HW_QUERY_STATE_READY) nv50_hw_query_update(q); @@ -331,6 +344,18 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) struct nv50_hw_query *hq; struct nv50_query *q; + hq = nv50_hw_sm_create_query(nv50, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nv50_query *)hq; + } + + hq = nv50_hw_metric_create_query(nv50, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nv50_query *)hq; + } + hq = CALLOC_STRUCT(nv50_hw_query); if (!hq) return NULL; @@ -375,6 +400,26 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) return q; } +int +nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int num_hw_sm_queries = 0, num_hw_metric_queries = 0; + + num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL); + num_hw_metric_queries = + nv50_hw_metric_get_driver_query_info(screen, 0, NULL); + + if (!info) + return num_hw_sm_queries + num_hw_metric_queries; + + if (id < num_hw_sm_queries) + return nv50_hw_sm_get_driver_query_info(screen, id, info); + + return nv50_hw_metric_get_driver_query_info(screen, + id - num_hw_sm_queries, info); +} + void nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, struct nv50_query *q, unsigned result_offset) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h index 294c67de9a4..82ec6bd2d96 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -8,8 +8,19 @@ #define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) +struct nv50_hw_query; + +struct nv50_hw_query_funcs { + void (*destroy_query)(struct nv50_context *, struct nv50_hw_query *); + boolean (*begin_query)(struct nv50_context *, struct nv50_hw_query *); + void (*end_query)(struct nv50_context *, struct nv50_hw_query *); + boolean (*get_query_result)(struct nv50_context *, struct nv50_hw_query *, + boolean, union pipe_query_result *); +}; + struct nv50_hw_query { struct nv50_query base; + const struct nv50_hw_query_funcs *funcs; uint32_t *data; uint32_t sequence; struct nouveau_bo *bo; @@ -31,6 +42,11 @@ nv50_hw_query(struct nv50_query *q) struct nv50_query * nv50_hw_create_query(struct nv50_context *, unsigned, unsigned); +int +nv50_hw_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +bool +nv50_hw_query_allocate(struct nv50_context *, struct nv50_query *, int); void nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, struct nv50_query *, unsigned); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c new file mode 100644 index 00000000000..d1bccb94193 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c @@ -0,0 +1,207 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw_metric.h" +#include "nv50/nv50_query_hw_sm.h" + +/* === PERFORMANCE MONITORING METRICS for NV84+ === */ +static const char *nv50_hw_metric_names[] = +{ + "metric-branch_efficiency", +}; + +struct nv50_hw_metric_query_cfg { + uint32_t queries[4]; + uint32_t num_queries; +}; + +#define _SM(n) NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_ ##n) +#define _M(n, c) [NV50_HW_METRIC_QUERY_##n] = c + +/* ==== Compute capability 1.1 (G84+) ==== */ +static const struct nv50_hw_metric_query_cfg +sm11_branch_efficiency = +{ + .queries[0] = _SM(BRANCH), + .queries[1] = _SM(DIVERGENT_BRANCH), + .num_queries = 2, +}; + +static const struct nv50_hw_metric_query_cfg *sm11_hw_metric_queries[] = +{ + _M(BRANCH_EFFICIENCY, &sm11_branch_efficiency), +}; + +#undef _SM +#undef _M + +static const struct nv50_hw_metric_query_cfg * +nv50_hw_metric_query_get_cfg(struct nv50_context *nv50, + struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + return sm11_hw_metric_queries[q->type - NV50_HW_METRIC_QUERY(0)]; +} + +static void +nv50_hw_metric_destroy_query(struct nv50_context *nv50, + struct nv50_hw_query *hq) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) + hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]); + FREE(hmq); +} + +static boolean +nv50_hw_metric_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + boolean ret = false; + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) { + ret = hmq->queries[i]->funcs->begin_query(nv50, hmq->queries[i]); + if (!ret) + return ret; + } + return ret; +} + +static void +nv50_hw_metric_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) + hmq->queries[i]->funcs->end_query(nv50, hmq->queries[i]); +} + +static uint64_t +sm11_hw_metric_calc_result(struct nv50_hw_query *hq, uint64_t res64[8]) +{ + switch (hq->base.type - NV50_HW_METRIC_QUERY(0)) { + case NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY: + /* (branch / (branch + divergent_branch)) * 100 */ + if (res64[0] + res64[1]) + return (res64[0] / (double)(res64[0] + res64[1])) * 100; + break; + default: + debug_printf("invalid metric type: %d\n", + hq->base.type - NV50_HW_METRIC_QUERY(0)); + break; + } + return 0; +} + +static boolean +nv50_hw_metric_get_query_result(struct nv50_context *nv50, + struct nv50_hw_query *hq, boolean wait, + union pipe_query_result *result) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + union pipe_query_result results[4] = {}; + uint64_t res64[4] = {}; + boolean ret = false; + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) { + ret = hmq->queries[i]->funcs->get_query_result(nv50, hmq->queries[i], + wait, &results[i]); + if (!ret) + return ret; + res64[i] = *(uint64_t *)&results[i]; + } + + *(uint64_t *)result = sm11_hw_metric_calc_result(hq, res64); + return ret; +} + +static const struct nv50_hw_query_funcs hw_metric_query_funcs = { + .destroy_query = nv50_hw_metric_destroy_query, + .begin_query = nv50_hw_metric_begin_query, + .end_query = nv50_hw_metric_end_query, + .get_query_result = nv50_hw_metric_get_query_result, +}; + +struct nv50_hw_query * +nv50_hw_metric_create_query(struct nv50_context *nv50, unsigned type) +{ + const struct nv50_hw_metric_query_cfg *cfg; + struct nv50_hw_metric_query *hmq; + struct nv50_hw_query *hq; + unsigned i; + + if (type < NV50_HW_METRIC_QUERY(0) || type > NV50_HW_METRIC_QUERY_LAST) + return NULL; + + hmq = CALLOC_STRUCT(nv50_hw_metric_query); + if (!hmq) + return NULL; + + hq = &hmq->base; + hq->funcs = &hw_metric_query_funcs; + hq->base.type = type; + + cfg = nv50_hw_metric_query_get_cfg(nv50, hq); + + for (i = 0; i < cfg->num_queries; i++) { + hmq->queries[i] = nv50_hw_sm_create_query(nv50, cfg->queries[i]); + if (!hmq->queries[i]) { + nv50_hw_metric_destroy_query(nv50, hq); + return NULL; + } + hmq->num_queries++; + } + + return hq; +} + +int +nv50_hw_metric_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int count = 0; + + if (screen->compute) + if (screen->base.class_3d >= NV84_3D_CLASS) + count += NV50_HW_METRIC_QUERY_COUNT; + + if (!info) + return count; + + if (id < count) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = nv50_hw_metric_names[id]; + info->query_type = NV50_HW_METRIC_QUERY(id); + info->group_id = NV50_HW_METRIC_QUERY_GROUP; + return 1; + } + } + } + return 0; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h new file mode 100644 index 00000000000..f8cfc04084f --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h @@ -0,0 +1,34 @@ +#ifndef __NV50_QUERY_HW_METRIC_H__ +#define __NV50_QUERY_HW_METRIC_H__ + +#include "nv50_query_hw.h" + +struct nv50_hw_metric_query { + struct nv50_hw_query base; + struct nv50_hw_query *queries[4]; + unsigned num_queries; +}; + +static inline struct nv50_hw_metric_query * +nv50_hw_metric_query(struct nv50_hw_query *hq) +{ + return (struct nv50_hw_metric_query *)hq; +} + +/* + * Driver metrics queries: + */ +#define NV50_HW_METRIC_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i)) +#define NV50_HW_METRIC_QUERY_LAST NV50_HW_METRIC_QUERY(NV50_HW_METRIC_QUERY_COUNT - 1) +enum nv50_hw_metric_queries +{ + NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY = 0, + NV50_HW_METRIC_QUERY_COUNT +}; + +struct nv50_hw_query * +nv50_hw_metric_create_query(struct nv50_context *, unsigned); +int +nv50_hw_metric_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c new file mode 100644 index 00000000000..8453ce76095 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c @@ -0,0 +1,417 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NV50_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw_sm.h" + +#include "nv_object.xml.h" +#include "nv50/nv50_compute.xml.h" + +/* === PERFORMANCE MONITORING COUNTERS for NV84+ === */ + +/* NOTE: intentionally using the same names as NV */ +static const char *nv50_hw_sm_query_names[] = +{ + "branch", + "divergent_branch", + "instructions", + "prof_trigger_00", + "prof_trigger_01", + "prof_trigger_02", + "prof_trigger_03", + "prof_trigger_04", + "prof_trigger_05", + "prof_trigger_06", + "prof_trigger_07", + "sm_cta_launched", + "warp_serialize", +}; + +static const uint64_t nv50_read_hw_sm_counters_code[] = +{ + /* and b32 $r0 $r0 0x0000ffff + * add b32 $c0 $r0 $r0 $r0 + * (lg $c0) ret + * mov $r0 $pm0 + * mov $r1 $pm1 + * mov $r2 $pm2 + * mov $r3 $pm3 + * mov $r4 $physid + * ld $r5 b32 s[0x10] + * ld $r6 b32 s[0x14] + * and b32 $r4 $r4 0x000f0000 + * shr u32 $r4 $r4 0x10 + * mul $r4 u24 $r4 0x14 + * add b32 $r5 $r5 $r4 + * st b32 g15[$r5] $r0 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r1 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r2 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r3 + * add b32 $r5 $r5 0x04 + * exit st b32 g15[$r5] $r6 */ + 0x00000fffd03f0001ULL, + 0x040007c020000001ULL, + 0x0000028030000003ULL, + 0x6001078000000001ULL, + 0x6001478000000005ULL, + 0x6001878000000009ULL, + 0x6001c7800000000dULL, + 0x6000078000000011ULL, + 0x4400c78010000815ULL, + 0x4400c78010000a19ULL, + 0x0000f003d0000811ULL, + 0xe410078030100811ULL, + 0x0000000340540811ULL, + 0x0401078020000a15ULL, + 0xa0c00780d00f0a01ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a05ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a09ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a0dULL, + 0x0000000320048a15ULL, + 0xa0c00781d00f0a19ULL, +}; + +struct nv50_hw_sm_counter_cfg +{ + uint32_t mode : 4; /* LOGOP, LOGOP_PULSE */ + uint32_t unit : 8; /* UNK[0-5] */ + uint32_t sig : 8; /* signal selection */ +}; + +struct nv50_hw_sm_query_cfg +{ + struct nv50_hw_sm_counter_cfg ctr[4]; + uint8_t num_counters; +}; + +#define _Q(n, m, u, s) [NV50_HW_SM_QUERY_##n] = { { { NV50_COMPUTE_MP_PM_CONTROL_MODE_##m, NV50_COMPUTE_MP_PM_CONTROL_UNIT_##u, s, }, {}, {}, {} }, 1 } + +/* ==== Compute capability 1.1 (G84+) ==== */ +static const struct nv50_hw_sm_query_cfg sm11_hw_sm_queries[] = +{ + _Q(BRANCH, LOGOP, UNK4, 0x02), + _Q(DIVERGENT_BRANCH, LOGOP, UNK4, 0x09), + _Q(INSTRUCTIONS, LOGOP, UNK4, 0x04), + _Q(PROF_TRIGGER_0, LOGOP, UNK1, 0x26), + _Q(PROF_TRIGGER_1, LOGOP, UNK1, 0x27), + _Q(PROF_TRIGGER_2, LOGOP, UNK1, 0x28), + _Q(PROF_TRIGGER_3, LOGOP, UNK1, 0x29), + _Q(PROF_TRIGGER_4, LOGOP, UNK1, 0x2a), + _Q(PROF_TRIGGER_5, LOGOP, UNK1, 0x2b), + _Q(PROF_TRIGGER_6, LOGOP, UNK1, 0x2c), + _Q(PROF_TRIGGER_7, LOGOP, UNK1, 0x2d), + _Q(SM_CTA_LAUNCHED, LOGOP, UNK1, 0x33), + _Q(WARP_SERIALIZE, LOGOP, UNK0, 0x0b), +}; + +static inline uint16_t nv50_hw_sm_get_func(uint8_t slot) +{ + switch (slot) { + case 0: return 0xaaaa; + case 1: return 0xcccc; + case 2: return 0xf0f0; + case 3: return 0xff00; + } + return 0; +} + +static const struct nv50_hw_sm_query_cfg * +nv50_hw_sm_query_get_cfg(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + return &sm11_hw_sm_queries[q->type - NV50_HW_SM_QUERY(0)]; +} + +static void +nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + q->funcs->destroy_query(nv50, q); +} + +static boolean +nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + const struct nv50_hw_sm_query_cfg *cfg; + uint16_t func; + int i, c; + + cfg = nv50_hw_sm_query_get_cfg(nv50, hq); + + /* check if we have enough free counter slots */ + if (screen->pm.num_hw_sm_active + cfg->num_counters > 4) { + NOUVEAU_ERR("Not enough free MP counter slots !\n"); + return false; + } + + assert(cfg->num_counters <= 4); + PUSH_SPACE(push, 4 * 4); + + /* set sequence field to 0 (used to check if result is available) */ + for (i = 0; i < screen->MPsInTP; ++i) { + const unsigned b = (0x14 / 4) * i; + hq->data[b + 16] = 0; + } + hq->sequence++; + + for (i = 0; i < cfg->num_counters; i++) { + screen->pm.num_hw_sm_active++; + + /* find free counter slots */ + for (c = 0; c < 4; ++c) { + if (!screen->pm.mp_counter[c]) { + hsq->ctr[i] = c; + screen->pm.mp_counter[c] = hsq; + break; + } + } + + /* select func to aggregate counters */ + func = nv50_hw_sm_get_func(c); + + /* configure and reset the counter(s) */ + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) + | cfg->ctr[i].unit | cfg->ctr[i].mode); + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1); + PUSH_DATA (push, 0); + } + return true; +} + +static void +nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_screen *screen = nv50->screen; + struct pipe_context *pipe = &nv50->base.pipe; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + uint32_t mask; + uint32_t input[3]; + const uint block[3] = { 32, 1, 1 }; + const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 }; + int c; + + if (unlikely(!screen->pm.prog)) { + struct nv50_program *prog = CALLOC_STRUCT(nv50_program); + prog->type = PIPE_SHADER_COMPUTE; + prog->translated = true; + prog->max_gpr = 7; + prog->parm_size = 8; + prog->code = (uint32_t *)nv50_read_hw_sm_counters_code; + prog->code_size = sizeof(nv50_read_hw_sm_counters_code); + screen->pm.prog = prog; + } + + /* disable all counting */ + PUSH_SPACE(push, 8); + for (c = 0; c < 4; c++) { + if (screen->pm.mp_counter[c]) { + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + PUSH_DATA (push, 0); + } + } + + /* release counters for this query */ + for (c = 0; c < 4; c++) { + if (screen->pm.mp_counter[c] == hsq) { + screen->pm.num_hw_sm_active--; + screen->pm.mp_counter[c] = NULL; + } + } + + BCTX_REFN_bo(nv50->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR, + hq->bo); + + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + + pipe->bind_compute_state(pipe, screen->pm.prog); + input[0] = hq->bo->offset + hq->base_offset; + input[1] = hq->sequence; + pipe->launch_grid(pipe, block, grid, 0, input); + + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY); + + /* re-active other counters */ + PUSH_SPACE(push, 8); + mask = 0; + for (c = 0; c < 4; c++) { + const struct nv50_hw_sm_query_cfg *cfg; + unsigned i; + + hsq = screen->pm.mp_counter[c]; + if (!hsq) + continue; + + cfg = nv50_hw_sm_query_get_cfg(nv50, &hsq->base); + for (i = 0; i < cfg->num_counters; i++) { + uint16_t func; + + if (mask & (1 << hsq->ctr[i])) + break; + + mask |= 1 << hsq->ctr[i]; + func = nv50_hw_sm_get_func(hsq->ctr[i]); + + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1); + PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) + | cfg->ctr[i].unit | cfg->ctr[i].mode); + } + } +} + +static inline bool +nv50_hw_sm_query_read_data(uint32_t count[32][4], + struct nv50_context *nv50, bool wait, + struct nv50_hw_query *hq, + const struct nv50_hw_sm_query_cfg *cfg, + unsigned mp_count) +{ + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + unsigned p, c; + + for (p = 0; p < mp_count; ++p) { + const unsigned b = (0x14 / 4) * p; + + for (c = 0; c < cfg->num_counters; ++c) { + if (hq->data[b + 4] != hq->sequence) { + if (!wait) + return false; + if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->base.client)) + return false; + } + count[p][c] = hq->data[b + hsq->ctr[c]]; + } + } + return true; +} + +static boolean +nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq, + boolean wait, union pipe_query_result *result) +{ + uint32_t count[32][4]; + uint64_t value = 0; + unsigned mp_count = MIN2(nv50->screen->MPsInTP, 32); + unsigned p, c; + const struct nv50_hw_sm_query_cfg *cfg; + bool ret; + + cfg = nv50_hw_sm_query_get_cfg(nv50, hq); + + ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count); + if (!ret) + return false; + + for (c = 0; c < cfg->num_counters; ++c) + for (p = 0; p < mp_count; ++p) + value += count[p][c]; + + /* We only count a single TP, and simply multiply by the total number of + * TPs to compute result over all TPs. This is inaccurate, but enough! */ + value *= nv50->screen->TPs; + + *(uint64_t *)result = value; + return true; +} + +static const struct nv50_hw_query_funcs hw_sm_query_funcs = { + .destroy_query = nv50_hw_sm_destroy_query, + .begin_query = nv50_hw_sm_begin_query, + .end_query = nv50_hw_sm_end_query, + .get_query_result = nv50_hw_sm_get_query_result, +}; + +struct nv50_hw_query * +nv50_hw_sm_create_query(struct nv50_context *nv50, unsigned type) +{ + struct nv50_hw_sm_query *hsq; + struct nv50_hw_query *hq; + unsigned space; + + if (type < NV50_HW_SM_QUERY(0) || type > NV50_HW_SM_QUERY_LAST) + return NULL; + + hsq = CALLOC_STRUCT(nv50_hw_sm_query); + if (!hsq) + return NULL; + + hq = &hsq->base; + hq->funcs = &hw_sm_query_funcs; + hq->base.type = type; + + /* + * for each MP: + * [00] = MP.C0 + * [04] = MP.C1 + * [08] = MP.C2 + * [0c] = MP.C3 + * [10] = MP.sequence + */ + space = (4 + 1) * nv50->screen->MPsInTP * sizeof(uint32_t); + + if (!nv50_hw_query_allocate(nv50, &hq->base, space)) { + FREE(hq); + return NULL; + } + + return hq; +} + +int +nv50_hw_sm_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int count = 0; + + if (screen->compute) + if (screen->base.class_3d >= NV84_3D_CLASS) + count += NV50_HW_SM_QUERY_COUNT; + + if (!info) + return count; + + if (id < count) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = nv50_hw_sm_query_names[id]; + info->query_type = NV50_HW_SM_QUERY(id); + info->group_id = NV50_HW_SM_QUERY_GROUP; + return 1; + } + } + } + return 0; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h new file mode 100644 index 00000000000..c1a1cd175e3 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h @@ -0,0 +1,45 @@ +#ifndef __NV50_QUERY_HW_SM_H__ +#define __NV50_QUERY_HW_SM_H__ + +#include "nv50_query_hw.h" + +struct nv50_hw_sm_query { + struct nv50_hw_query base; + uint8_t ctr[4]; +}; + +static inline struct nv50_hw_sm_query * +nv50_hw_sm_query(struct nv50_hw_query *hq) +{ + return (struct nv50_hw_sm_query *)hq; +} + +/* + * Performance counter queries: + */ +#define NV50_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i)) +#define NV50_HW_SM_QUERY_LAST NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_COUNT - 1) +enum nv50_hw_sm_queries +{ + NV50_HW_SM_QUERY_BRANCH = 0, + NV50_HW_SM_QUERY_DIVERGENT_BRANCH, + NV50_HW_SM_QUERY_INSTRUCTIONS, + NV50_HW_SM_QUERY_PROF_TRIGGER_0, + NV50_HW_SM_QUERY_PROF_TRIGGER_1, + NV50_HW_SM_QUERY_PROF_TRIGGER_2, + NV50_HW_SM_QUERY_PROF_TRIGGER_3, + NV50_HW_SM_QUERY_PROF_TRIGGER_4, + NV50_HW_SM_QUERY_PROF_TRIGGER_5, + NV50_HW_SM_QUERY_PROF_TRIGGER_6, + NV50_HW_SM_QUERY_PROF_TRIGGER_7, + NV50_HW_SM_QUERY_SM_CTA_LAUNCHED, + NV50_HW_SM_QUERY_WARP_SERIALIZE, + NV50_HW_SM_QUERY_COUNT, +}; + +struct nv50_hw_query * +nv50_hw_sm_create_query(struct nv50_context *, unsigned); +int +nv50_hw_sm_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index f47e998ab1e..1e4b75f18e0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -41,8 +41,6 @@ #define THREADS_IN_WARP 32 -#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float)) - static boolean nv50_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, @@ -183,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_COMPUTE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; /* class_3d >= NVA0_3D_CLASS; */ @@ -212,7 +211,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: - case PIPE_CAP_COMPUTE: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */ @@ -251,6 +249,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_COMPUTE: break; default: return 0; @@ -336,6 +335,52 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) return 0.0f; } +static int +nv50_screen_get_compute_param(struct pipe_screen *pscreen, + enum pipe_compute_cap param, void *data) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + +#define RET(x) do { \ + if (data) \ + memcpy(data, x, sizeof(x)); \ + return sizeof(x); \ +} while (0) + + switch (param) { + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + RET((uint64_t []) { 2 }); + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + RET(((uint64_t []) { 65535, 65535 })); + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + RET(((uint64_t []) { 512, 512, 64 })); + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + RET((uint64_t []) { 512 }); + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */ + RET((uint64_t []) { 1ULL << 32 }); + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ + RET((uint64_t []) { 16 << 10 }); + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ + RET((uint64_t []) { 16 << 10 }); + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ + RET((uint64_t []) { 4096 }); + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + RET((uint32_t []) { 32 }); + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + RET((uint64_t []) { 1ULL << 40 }); + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + RET((uint32_t []) { 0 }); + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + RET((uint32_t []) { screen->mp_count }); + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */ + default: + return 0; + } + +#undef RET +} + static void nv50_screen_destroy(struct pipe_screen *pscreen) { @@ -377,6 +422,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->tesla); nouveau_object_del(&screen->eng2d); nouveau_object_del(&screen->m2mf); + nouveau_object_del(&screen->compute); nouveau_object_del(&screen->sync); nouveau_screen_fini(&screen->base); @@ -640,7 +686,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); PUSH_DATA (push, 0); if (screen->base.class_3d >= NV84_3D_CLASS) { - BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1); + BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1); PUSH_DATA (push, 0); } @@ -742,6 +788,9 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; + pscreen->get_compute_param = nv50_screen_get_compute_param; + pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; + pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info; nv50_screen_init_resource_functions(pscreen); @@ -851,6 +900,8 @@ nv50_screen_create(struct nouveau_device *dev) screen->TPs = util_bitcount(value & 0xffff); screen->MPsInTP = util_bitcount((value >> 24) & 0xf); + screen->mp_count = screen->TPs * screen->MPsInTP; + stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP * STACK_WARPS_ALLOC * 64 * 8; @@ -902,6 +953,12 @@ nv50_screen_create(struct nouveau_device *dev) nv50_screen_init_hwctx(screen); + ret = nv50_screen_compute_setup(screen, screen->base.pushbuf); + if (ret) { + NOUVEAU_ERR("Failed to init compute context: %d\n", ret); + goto fail; + } + nouveau_fence_new(&screen->base, &screen->base.fence.current, false); return pscreen; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index ce51f0fc254..2a4983d1020 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -23,6 +23,10 @@ struct nv50_context; #define NV50_MAX_VIEWPORTS 16 +#define NV50_MAX_GLOBALS 16 + +#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float)) + struct nv50_blitter; struct nv50_graph_state { @@ -66,6 +70,7 @@ struct nv50_screen { unsigned MPsInTP; unsigned max_tls_space; unsigned cur_tls_space; + unsigned mp_count; struct nouveau_heap *vp_code_heap; struct nouveau_heap *gp_code_heap; @@ -90,9 +95,16 @@ struct nv50_screen { struct nouveau_bo *bo; } fence; + struct { + struct nv50_program *prog; /* compute state object to read MP counters */ + struct nv50_hw_sm_query *mp_counter[4]; /* counter to query allocation */ + uint8_t num_hw_sm_active; + } pm; + struct nouveau_object *sync; struct nouveau_object *tesla; + struct nouveau_object *compute; struct nouveau_object *eng2d; struct nouveau_object *m2mf; }; @@ -103,12 +115,19 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_info *); +int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_group_info *); + bool nv50_blitter_create(struct nv50_screen *); void nv50_blitter_destroy(struct nv50_screen *); int nv50_screen_tic_alloc(struct nv50_screen *, void *); int nv50_screen_tsc_alloc(struct nv50_screen *, void *); +int nv50_screen_compute_setup(struct nv50_screen *, struct nouveau_pushbuf *); + static inline void nv50_resource_fence(struct nv04_resource *res, uint32_t flags) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index d27f12ca94b..b4ea08d4d13 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -792,6 +792,35 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) nv50->dirty |= NV50_NEW_GMTYPROG; } +static void * +nv50_cp_state_create(struct pipe_context *pipe, + const struct pipe_compute_state *cso) +{ + struct nv50_program *prog; + + prog = CALLOC_STRUCT(nv50_program); + if (!prog) + return NULL; + prog->type = PIPE_SHADER_COMPUTE; + + prog->cp.smem_size = cso->req_local_mem; + prog->cp.lmem_size = cso->req_private_mem; + prog->parm_size = cso->req_input_mem; + + prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog); + + return (void *)prog; +} + +static void +nv50_cp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->compprog = hwcso; + nv50->dirty_cp |= NV50_NEW_CP_PROGRAM; +} + static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct pipe_constant_buffer *cb) @@ -1134,6 +1163,70 @@ nv50_set_stream_output_targets(struct pipe_context *pipe, nv50->dirty |= NV50_NEW_STRMOUT; } +static void +nv50_set_compute_resources(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_surface **resources) +{ + /* TODO: bind surfaces */ +} + +static inline void +nv50_set_global_handle(uint32_t *phandle, struct pipe_resource *res) +{ + struct nv04_resource *buf = nv04_resource(res); + if (buf) { + uint64_t limit = (buf->address + buf->base.width0) - 1; + if (limit < (1ULL << 32)) { + *phandle = (uint32_t)buf->address; + } else { + NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: " + "resource not contained within 32-bit address space !\n"); + *phandle = 0; + } + } else { + *phandle = 0; + } +} + +static void +nv50_set_global_bindings(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_resource **resources, + uint32_t **handles) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct pipe_resource **ptr; + unsigned i; + const unsigned end = start + nr; + + if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource *))) { + const unsigned old_size = nv50->global_residents.size; + const unsigned req_size = end * sizeof(struct pipe_resource *); + util_dynarray_resize(&nv50->global_residents, req_size); + memset((uint8_t *)nv50->global_residents.data + old_size, 0, + req_size - old_size); + } + + if (resources) { + ptr = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) { + pipe_resource_reference(&ptr[i], resources[i]); + nv50_set_global_handle(handles[i], resources[i]); + } + } else { + ptr = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) + pipe_resource_reference(&ptr[i], NULL); + } + + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL); + + nv50->dirty_cp = NV50_NEW_CP_GLOBALS; +} + void nv50_init_state_functions(struct nv50_context *nv50) { @@ -1162,12 +1255,15 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->create_vs_state = nv50_vp_state_create; pipe->create_fs_state = nv50_fp_state_create; pipe->create_gs_state = nv50_gp_state_create; + pipe->create_compute_state = nv50_cp_state_create; pipe->bind_vs_state = nv50_vp_state_bind; pipe->bind_fs_state = nv50_fp_state_bind; pipe->bind_gs_state = nv50_gp_state_bind; + pipe->bind_compute_state = nv50_cp_state_bind; pipe->delete_vs_state = nv50_sp_state_delete; pipe->delete_fs_state = nv50_sp_state_delete; pipe->delete_gs_state = nv50_sp_state_delete; + pipe->delete_compute_state = nv50_sp_state_delete; pipe->set_blend_color = nv50_set_blend_color; pipe->set_stencil_ref = nv50_set_stencil_ref; @@ -1191,6 +1287,9 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->stream_output_target_destroy = nv50_so_target_destroy; pipe->set_stream_output_targets = nv50_set_stream_output_targets; + pipe->set_global_binding = nv50_set_global_bindings; + pipe->set_compute_resources = nv50_set_compute_resources; + nv50->sample_mask = ~0; nv50->min_samples = 1; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index b6181edf24f..02a759c23ad 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -503,8 +503,7 @@ static struct state_validate { { nv50_validate_samplers, NV50_NEW_SAMPLERS }, { nv50_stream_output_validate, NV50_NEW_STRMOUT | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS | - NV50_NEW_VERTPROG }, + { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }, { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES }, }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 916a7d44a31..8ba19d2cc90 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -339,12 +339,18 @@ nv50_clear_render_target(struct pipe_context *pipe, PUSH_DATA (push, (width << 16) | dstx); PUSH_DATA (push, (height << 16) | dsty); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, 0x3c | (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; } @@ -415,12 +421,18 @@ nv50_clear_depth_stencil(struct pipe_context *pipe, PUSH_DATA (push, (width << 16) | dstx); PUSH_DATA (push, (height << 16) | dsty); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, mode | (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; } @@ -673,6 +685,9 @@ nv50_clear_buffer(struct pipe_context *pipe, PUSH_DATA (push, (width << 16)); PUSH_DATA (push, (height << 16)); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); PUSH_DATA (push, 0x3c); @@ -690,6 +705,9 @@ nv50_clear_buffer(struct pipe_context *pipe, PUSH_DATA (push, 0x3c); } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 9aa593f919e..85878d5fcc7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -294,8 +294,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) uint64_t addrs[PIPE_MAX_ATTRIBS]; uint32_t limits[PIPE_MAX_ATTRIBS]; struct nouveau_pushbuf *push = nv50->base.pushbuf; - struct nv50_vertex_stateobj dummy = {}; - struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy; + struct nv50_vertex_stateobj *vertex = nv50->vertex; struct pipe_vertex_buffer *vb; struct nv50_vertex_element *ve; uint32_t mask; @@ -303,14 +302,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) unsigned i; const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts); - /* A vertexid is not generated for inline data uploads. Have to use a - * VBO. This check must come after the vertprog has been validated, - * otherwise vertexid may be unset. - */ - assert(nv50->vertprog->translated); - if (nv50->vertprog->vp.vertexid) - nv50->vbo_push_hint = 0; - if (unlikely(vertex->need_conversion)) nv50->vbo_fifo = ~0; else @@ -487,7 +478,7 @@ nv50_draw_arrays(struct nv50_context *nv50, BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); PUSH_DATA (push, 0); if (nv50->screen->base.class_3d >= NV84_3D_CLASS) { - BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1); + BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1); PUSH_DATA (push, 0); } nv50->state.index_bias = 0; @@ -613,7 +604,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten, BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); PUSH_DATA (push, index_bias); if (nv50->screen->base.class_3d >= NV84_3D_CLASS) { - BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1); + BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1); PUSH_DATA (push, index_bias); } nv50->state.index_bias = index_bias; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h index 76f1b41ea70..68002305d72 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h @@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_3D(m) 3, (m) #define NV50_3D(n) SUBC_3D(NV50_3D_##n) +#define NV84_3D(n) SUBC_3D(NV84_3D_##n) #define NVA0_3D(n) SUBC_3D(NVA0_3D_##n) #define SUBC_2D(m) 4, (m) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 82ed5a1864e..162661ff2a7 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -180,9 +180,10 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, int ref) { struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe); + unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER; unsigned s, i; - if (res->bind & PIPE_BIND_RENDER_TARGET) { + if (bind & PIPE_BIND_RENDER_TARGET) { for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) { if (nvc0->framebuffer.cbufs[i] && nvc0->framebuffer.cbufs[i]->texture == res) { @@ -193,7 +194,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } } - if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (bind & PIPE_BIND_DEPTH_STENCIL) { if (nvc0->framebuffer.zsbuf && nvc0->framebuffer.zsbuf->texture == res) { nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; @@ -203,12 +204,12 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } - if (res->bind & (PIPE_BIND_VERTEX_BUFFER | - PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_CONSTANT_BUFFER | - PIPE_BIND_STREAM_OUTPUT | - PIPE_BIND_COMMAND_ARGS_BUFFER | - PIPE_BIND_SAMPLER_VIEW)) { + if (bind & (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_STREAM_OUTPUT | + PIPE_BIND_COMMAND_ARGS_BUFFER | + PIPE_BIND_SAMPLER_VIEW)) { for (i = 0; i < nvc0->num_vtxbufs; ++i) { if (nvc0->vtxbuf[i].buffer == res) { nvc0->dirty |= NVC0_NEW_ARRAYS; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index f53921092a5..d992b10a23c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, info->max_value.u64 = 0; info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; info->group_id = -1; + info->flags = 0; #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS if (id < num_sw_queries) @@ -200,7 +201,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, if (id == NVC0_HW_SM_QUERY_GROUP) { if (screen->compute) { info->name = "MP counters"; - info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; /* Because we can't expose the number of hardware counters needed for * each different query, we don't want to allow more than one active @@ -224,7 +224,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, if (screen->compute) { if (screen->base.class_3d < NVE4_3D_CLASS) { info->name = "Performance metrics"; - info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; info->max_active_queries = 1; info->num_queries = NVC0_HW_METRIC_QUERY_COUNT; return 1; @@ -234,7 +233,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) { info->name = "Driver statistics"; - info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU; info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT; info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT; return 1; @@ -245,7 +243,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, info->name = "this_is_not_the_query_group_you_are_looking_for"; info->max_active_queries = 0; info->num_queries = 0; - info->type = 0; return 0; } @@ -260,4 +257,5 @@ nvc0_init_query_functions(struct nvc0_context *nvc0) pipe->end_query = nvc0_end_query; pipe->get_query_result = nvc0_get_query_result; pipe->render_condition = nvc0_render_condition; + nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 44b222e5134..7962143d45a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -1014,14 +1014,15 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); prog->type = PIPE_SHADER_COMPUTE; prog->translated = true; - prog->num_gprs = 14; prog->parm_size = 12; if (is_nve4) { prog->code = (uint32_t *)nve4_read_hw_sm_counters_code; prog->code_size = sizeof(nve4_read_hw_sm_counters_code); + prog->num_gprs = 14; } else { prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code; prog->code_size = sizeof(nvc0_read_hw_sm_counters_code); + prog->num_gprs = 12; } screen->pm.prog = prog; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index cdb1fc1145f..6a4ae5be2ab 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -341,12 +341,16 @@ nvc0_clear_render_target(struct pipe_context *pipe, nvc0_resource_fence(res, NOUVEAU_BO_WR); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, 0x3c | (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } @@ -470,6 +474,8 @@ nvc0_clear_buffer(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0); IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0); + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c); if (width * height != elements) { @@ -486,6 +492,8 @@ nvc0_clear_buffer(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; @@ -545,12 +553,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, PUSH_DATA (push, dst->u.tex.first_layer); IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode); + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, mode | (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index f63790c329e..1dbad2f39e3 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -7,12 +7,14 @@ C_SOURCES := \ r600_pipe_common.c \ r600_pipe_common.h \ r600_query.c \ + r600_query.h \ r600_streamout.c \ r600_texture.c \ radeon_uvd.c \ radeon_uvd.h \ radeon_vce_40_2_2.c \ radeon_vce_50.c \ + radeon_vce_52.c \ radeon_vce.c \ radeon_vce.h \ radeon_video.c \ diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 3599692a857..7464f677398 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -27,6 +27,7 @@ #include "r600_pipe_common.h" #include "r600_cs.h" #include "tgsi/tgsi_parse.h" +#include "util/list.h" #include "util/u_draw_quad.h" #include "util/u_memory.h" #include "util/u_format_s3tc.h" @@ -135,12 +136,10 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags) void r600_preflush_suspend_features(struct r600_common_context *ctx) { /* suspend queries */ - ctx->queries_suspended_for_flush = false; - if (ctx->num_cs_dw_nontimer_queries_suspend) { + if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries)) r600_suspend_nontimer_queries(ctx); + if (!LIST_IS_EMPTY(&ctx->active_timer_queries)) r600_suspend_timer_queries(ctx); - ctx->queries_suspended_for_flush = true; - } ctx->streamout.suspended = false; if (ctx->streamout.begin_emitted) { @@ -157,10 +156,10 @@ void r600_postflush_resume_features(struct r600_common_context *ctx) } /* resume queries */ - if (ctx->queries_suspended_for_flush) { - r600_resume_nontimer_queries(ctx); + if (!LIST_IS_EMPTY(&ctx->active_timer_queries)) r600_resume_timer_queries(ctx); - } + if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries)) + r600_resume_nontimer_queries(ctx); } static void r600_flush_from_st(struct pipe_context *ctx, @@ -718,50 +717,6 @@ static uint64_t r600_get_timestamp(struct pipe_screen *screen) rscreen->info.r600_clock_crystal_freq; } -static int r600_get_driver_query_info(struct pipe_screen *screen, - unsigned index, - struct pipe_driver_query_info *info) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct pipe_driver_query_info list[] = { - {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64, - PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE}, - {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64, - PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE}, - {"draw-calls", R600_QUERY_DRAW_CALLS, {0}}, - {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, - {"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, - {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS, - PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE}, - {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}}, - {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES, - PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE}, - {"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, - {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, - {"GPU-load", R600_QUERY_GPU_LOAD, {100}}, - {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}}, - {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ}, - {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ}, - }; - unsigned num_queries; - - if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) - num_queries = Elements(list); - else if (rscreen->info.drm_major == 3) - num_queries = Elements(list) - 3; - else - num_queries = Elements(list) - 4; - - if (!info) - return num_queries; - - if (index >= num_queries) - return 0; - - *info = list[index]; - return 1; -} - static void r600_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **dst, struct pipe_fence_handle *src) @@ -949,7 +904,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->b.get_device_vendor = r600_get_device_vendor; rscreen->b.get_compute_param = r600_get_compute_param; rscreen->b.get_paramf = r600_get_paramf; - rscreen->b.get_driver_query_info = r600_get_driver_query_info; rscreen->b.get_timestamp = r600_get_timestamp; rscreen->b.fence_finish = r600_fence_finish; rscreen->b.fence_reference = r600_fence_reference; @@ -965,6 +919,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, } r600_init_screen_texture_functions(rscreen); + r600_init_screen_query_functions(rscreen); rscreen->ws = ws; rscreen->family = rscreen->info.family; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index ebe633b9125..fbdc5c410ae 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -47,21 +47,6 @@ #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) #define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) -#define R600_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0) -#define R600_QUERY_REQUESTED_VRAM (PIPE_QUERY_DRIVER_SPECIFIC + 1) -#define R600_QUERY_REQUESTED_GTT (PIPE_QUERY_DRIVER_SPECIFIC + 2) -#define R600_QUERY_BUFFER_WAIT_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 3) -#define R600_QUERY_NUM_CS_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 4) -#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5) -#define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6) -#define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7) -#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8) -#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9) -#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10) -#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11) -#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12) -#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13) - #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) #define R600_CONTEXT_PRIVATE_FLAG (1u << 1) @@ -408,8 +393,6 @@ struct r600_common_context { struct list_head active_timer_queries; unsigned num_cs_dw_nontimer_queries_suspend; unsigned num_cs_dw_timer_queries_suspend; - /* If queries have been suspended. */ - bool queries_suspended_for_flush; /* Additional hardware info. */ unsigned backend_mask; unsigned max_db; /* for OQ */ @@ -526,6 +509,7 @@ uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen); unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin); /* r600_query.c */ +void r600_init_screen_query_functions(struct r600_common_screen *rscreen); void r600_query_init(struct r600_common_context *rctx); void r600_suspend_nontimer_queries(struct r600_common_context *ctx); void r600_resume_nontimer_queries(struct r600_common_context *ctx); diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 8c2b601a96c..b1cfb6e462b 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -22,81 +22,218 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "r600_query.h" #include "r600_cs.h" #include "util/u_memory.h" +/* Queries without buffer handling or suspend/resume. */ +struct r600_query_sw { + struct r600_query b; -struct r600_query_buffer { - /* The buffer where query results are stored. */ - struct r600_resource *buf; - /* Offset of the next free result after current query data */ - unsigned results_end; - /* If a query buffer is full, a new buffer is created and the old one - * is put in here. When we calculate the result, we sum up the samples - * from all buffers. */ - struct r600_query_buffer *previous; -}; - -struct r600_query { - /* The query buffer and how many results are in it. */ - struct r600_query_buffer buffer; - /* The type of query */ - unsigned type; - /* Size of the result in memory for both begin_query and end_query, - * this can be one or two numbers, or it could even be a size of a structure. */ - unsigned result_size; - /* The number of dwords for begin_query or end_query. */ - unsigned num_cs_dw; - /* linked list of queries */ - struct list_head list; - /* for custom non-GPU queries */ uint64_t begin_result; uint64_t end_result; /* Fence for GPU_FINISHED. */ struct pipe_fence_handle *fence; - /* For transform feedback: which stream the query is for */ - unsigned stream; }; - -static bool r600_is_timer_query(unsigned type) +static void r600_query_sw_destroy(struct r600_common_context *rctx, + struct r600_query *rquery) { - return type == PIPE_QUERY_TIME_ELAPSED || - type == PIPE_QUERY_TIMESTAMP; + struct pipe_screen *screen = rctx->b.screen; + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + screen->fence_reference(screen, &query->fence, NULL); + FREE(query); } -static bool r600_query_needs_begin(unsigned type) +static enum radeon_value_id winsys_id_from_type(unsigned type) { - return type != PIPE_QUERY_GPU_FINISHED && - type != PIPE_QUERY_TIMESTAMP; + switch (type) { + case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY; + case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY; + case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS; + case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES; + case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED; + case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE; + case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE; + case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE; + case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK; + case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK; + default: unreachable("query type does not correspond to winsys id"); + } } -static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type) +static boolean r600_query_sw_begin(struct r600_common_context *rctx, + struct r600_query *rquery) { - unsigned j, i, num_results, buf_size = 4096; - uint32_t *results; + struct r600_query_sw *query = (struct r600_query_sw *)rquery; - /* Non-GPU queries. */ - switch (type) { + switch(query->b.type) { case PIPE_QUERY_TIMESTAMP_DISJOINT: case PIPE_QUERY_GPU_FINISHED: + break; case R600_QUERY_DRAW_CALLS: + query->begin_result = rctx->num_draw_calls; + break; case R600_QUERY_REQUESTED_VRAM: case R600_QUERY_REQUESTED_GTT: + case R600_QUERY_VRAM_USAGE: + case R600_QUERY_GTT_USAGE: + case R600_QUERY_GPU_TEMPERATURE: + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: + query->begin_result = 0; + break; case R600_QUERY_BUFFER_WAIT_TIME: case R600_QUERY_NUM_CS_FLUSHES: - case R600_QUERY_NUM_BYTES_MOVED: + case R600_QUERY_NUM_BYTES_MOVED: { + enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); + query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); + break; + } + case R600_QUERY_GPU_LOAD: + query->begin_result = r600_gpu_load_begin(rctx->screen); + break; + case R600_QUERY_NUM_COMPILATIONS: + query->begin_result = p_atomic_read(&rctx->screen->num_compilations); + break; + case R600_QUERY_NUM_SHADERS_CREATED: + query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); + break; + default: + unreachable("r600_query_sw_begin: bad query type"); + } + + return TRUE; +} + +static void r600_query_sw_end(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + switch(query->b.type) { + case PIPE_QUERY_TIMESTAMP_DISJOINT: + break; + case PIPE_QUERY_GPU_FINISHED: + rctx->b.flush(&rctx->b, &query->fence, 0); + break; + case R600_QUERY_DRAW_CALLS: + query->begin_result = rctx->num_draw_calls; + break; + case R600_QUERY_REQUESTED_VRAM: + case R600_QUERY_REQUESTED_GTT: case R600_QUERY_VRAM_USAGE: case R600_QUERY_GTT_USAGE: case R600_QUERY_GPU_TEMPERATURE: case R600_QUERY_CURRENT_GPU_SCLK: case R600_QUERY_CURRENT_GPU_MCLK: + case R600_QUERY_BUFFER_WAIT_TIME: + case R600_QUERY_NUM_CS_FLUSHES: + case R600_QUERY_NUM_BYTES_MOVED: { + enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); + query->end_result = rctx->ws->query_value(rctx->ws, ws_id); + break; + } case R600_QUERY_GPU_LOAD: + query->end_result = r600_gpu_load_end(rctx->screen, + query->begin_result); + query->begin_result = 0; + break; case R600_QUERY_NUM_COMPILATIONS: + query->begin_result = p_atomic_read(&rctx->screen->num_compilations); + break; case R600_QUERY_NUM_SHADERS_CREATED: + query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); + break; + default: + unreachable("r600_query_sw_end: bad query type"); + } +} + +static boolean r600_query_sw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + boolean wait, + union pipe_query_result *result) +{ + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + switch (query->b.type) { + case PIPE_QUERY_TIMESTAMP_DISJOINT: + /* Convert from cycles per millisecond to cycles per second (Hz). */ + result->timestamp_disjoint.frequency = + (uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000; + result->timestamp_disjoint.disjoint = FALSE; + return TRUE; + case PIPE_QUERY_GPU_FINISHED: { + struct pipe_screen *screen = rctx->b.screen; + result->b = screen->fence_finish(screen, query->fence, + wait ? PIPE_TIMEOUT_INFINITE : 0); + return result->b; + } + } + + result->u64 = query->end_result - query->begin_result; + + switch (query->b.type) { + case R600_QUERY_BUFFER_WAIT_TIME: + case R600_QUERY_GPU_TEMPERATURE: + result->u64 /= 1000; + break; + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: + result->u64 *= 1000000; + break; + } + + return TRUE; +} + +static struct r600_query_ops sw_query_ops = { + .destroy = r600_query_sw_destroy, + .begin = r600_query_sw_begin, + .end = r600_query_sw_end, + .get_result = r600_query_sw_get_result +}; + +static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx, + unsigned query_type) +{ + struct r600_query_sw *query; + + query = CALLOC_STRUCT(r600_query_sw); + if (query == NULL) return NULL; + + query->b.type = query_type; + query->b.ops = &sw_query_ops; + + return (struct pipe_query *)query; +} + +void r600_query_hw_destroy(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + struct r600_query_buffer *prev = query->buffer.previous; + + /* Release all query buffers. */ + while (prev) { + struct r600_query_buffer *qbuf = prev; + prev = prev->previous; + pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); + FREE(qbuf); } + pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL); + FREE(rquery); +} + +static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, + struct r600_query_hw *query) +{ + unsigned buf_size = 4096; + /* Queries are normally read by the CPU after * being written by the gpu, hence staging is probably a good * usage pattern. @@ -105,14 +242,30 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, buf_size); - switch (type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); - memset(results, 0, buf_size); + if (query->flags & R600_QUERY_HW_FLAG_PREDICATE) + query->ops->prepare_buffer(ctx, query, buf); + + return buf; +} + +static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer) +{ + /* Callers ensure that the buffer is currently unused by the GPU. */ + uint32_t *results = ctx->ws->buffer_map(buffer->cs_buf, NULL, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED); + + memset(results, 0, buffer->b.b.width0); + + if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || + query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) { + unsigned num_results; + unsigned i, j; /* Set top bits for unused backends. */ - num_results = buf_size / (16 * ctx->max_db); + num_results = buffer->b.b.width0 / (16 * ctx->max_db); for (j = 0; j < num_results; j++) { for (i = 0; i < ctx->max_db; i++) { if (!(ctx->backend_mask & (1<<i))) { @@ -122,22 +275,109 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c } results += 4 * ctx->max_db; } + } +} + +static struct r600_query_ops query_hw_ops = { + .destroy = r600_query_hw_destroy, + .begin = r600_query_hw_begin, + .end = r600_query_hw_end, + .get_result = r600_query_hw_get_result, +}; + +static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va); +static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va); +static void r600_query_hw_add_result(struct r600_common_context *ctx, + struct r600_query_hw *, void *buffer, + union pipe_query_result *result); +static void r600_query_hw_clear_result(struct r600_query_hw *, + union pipe_query_result *); + +static struct r600_query_hw_ops query_hw_default_hw_ops = { + .prepare_buffer = r600_query_hw_prepare_buffer, + .emit_start = r600_query_hw_do_emit_start, + .emit_stop = r600_query_hw_do_emit_stop, + .clear_result = r600_query_hw_clear_result, + .add_result = r600_query_hw_add_result, +}; + +boolean r600_query_hw_init(struct r600_common_context *rctx, + struct r600_query_hw *query) +{ + query->buffer.buf = r600_new_query_buffer(rctx, query); + if (!query->buffer.buf) + return FALSE; + + return TRUE; +} + +static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx, + unsigned query_type, + unsigned index) +{ + struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw); + if (!query) + return NULL; + + query->b.type = query_type; + query->b.ops = &query_hw_ops; + query->ops = &query_hw_default_hw_ops; + + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + query->result_size = 16 * rctx->max_db; + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6; + query->flags |= R600_QUERY_HW_FLAG_PREDICATE; break; case PIPE_QUERY_TIME_ELAPSED: + query->result_size = 16; + query->num_cs_dw_begin = 8; + query->num_cs_dw_end = 8; + query->flags = R600_QUERY_HW_FLAG_TIMER; + break; case PIPE_QUERY_TIMESTAMP: + query->result_size = 8; + query->num_cs_dw_end = 8; + query->flags = R600_QUERY_HW_FLAG_TIMER | + R600_QUERY_HW_FLAG_NO_START; break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ + query->result_size = 32; + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6; + query->stream = index; + query->flags |= R600_QUERY_HW_FLAG_PREDICATE; + break; case PIPE_QUERY_PIPELINE_STATISTICS: - results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); - memset(results, 0, buf_size); + /* 11 values on EG, 8 on R600. */ + query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16; + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6; break; default: assert(0); + FREE(query); + return NULL; } - return buf; + + if (!r600_query_hw_init(rctx, query)) { + FREE(query); + return NULL; + } + + return (struct pipe_query *)query; } static void r600_update_occlusion_query_state(struct r600_common_context *rctx, @@ -159,7 +399,7 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx, } } -static unsigned event_type_for_stream(struct r600_query *query) +static unsigned event_type_for_stream(struct r600_query_hw *query) { switch (query->stream) { default: @@ -170,28 +410,14 @@ static unsigned event_type_for_stream(struct r600_query *query) } } -static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query) +static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va) { struct radeon_winsys_cs *cs = ctx->gfx.cs; - uint64_t va; - - r600_update_occlusion_query_state(ctx, query->type, 1); - r600_update_prims_generated_query_state(ctx, query->type, 1); - ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE); - - /* Get a new query buffer if needed. */ - if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { - struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); - *qbuf = query->buffer; - query->buffer.buf = r600_new_query_buffer(ctx, query->type); - query->buffer.results_end = 0; - query->buffer.previous = qbuf; - } - - /* emit begin query */ - va = query->buffer.buf->gpu_address + query->buffer.results_end; - switch (query->type) { + switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); @@ -227,30 +453,50 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q } r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - - if (r600_is_timer_query(query->type)) - ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw; - else - ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw; } -static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query) +static void r600_query_hw_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query) { - struct radeon_winsys_cs *cs = ctx->gfx.cs; uint64_t va; - /* The queries which need begin already called this in begin_query. */ - if (!r600_query_needs_begin(query->type)) { - ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE); + r600_update_occlusion_query_state(ctx, query->b.type, 1); + r600_update_prims_generated_query_state(ctx, query->b.type, 1); + + ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end, + TRUE); + + /* Get a new query buffer if needed. */ + if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { + struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); + *qbuf = query->buffer; + query->buffer.buf = r600_new_query_buffer(ctx, query); + query->buffer.results_end = 0; + query->buffer.previous = qbuf; } - va = query->buffer.buf->gpu_address; + /* emit begin query */ + va = query->buffer.buf->gpu_address + query->buffer.results_end; + + query->ops->emit_start(ctx, query, query->buffer.buf, va); - /* emit end query */ - switch (query->type) { + if (query->flags & R600_QUERY_HW_FLAG_TIMER) + ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end; + else + ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end; +} + +static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va) +{ + struct radeon_winsys_cs *cs = ctx->gfx.cs; + + switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: - va += query->buffer.results_end + 8; + va += 8; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, va); @@ -260,14 +506,14 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - va += query->buffer.results_end + query->result_size/2; + va += query->result_size/2; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3)); radeon_emit(cs, va); radeon_emit(cs, (va >> 32) & 0xFFFF); break; case PIPE_QUERY_TIME_ELAPSED: - va += query->buffer.results_end + query->result_size/2; + va += query->result_size/2; /* fall through */ case PIPE_QUERY_TIMESTAMP: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); @@ -278,7 +524,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que radeon_emit(cs, 0); break; case PIPE_QUERY_PIPELINE_STATISTICS: - va += query->buffer.results_end + query->result_size/2; + va += query->result_size/2; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, va); @@ -289,25 +535,41 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que } r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); +} + +static void r600_query_hw_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query) +{ + uint64_t va; + + /* The queries which need begin already called this in begin_query. */ + if (query->flags & R600_QUERY_HW_FLAG_NO_START) { + ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, FALSE); + } + + /* emit end query */ + va = query->buffer.buf->gpu_address + query->buffer.results_end; + + query->ops->emit_stop(ctx, query, query->buffer.buf, va); query->buffer.results_end += query->result_size; - if (r600_query_needs_begin(query->type)) { - if (r600_is_timer_query(query->type)) - ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw; + if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) { + if (query->flags & R600_QUERY_HW_FLAG_TIMER) + ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw_end; else - ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw; + ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw_end; } - r600_update_occlusion_query_state(ctx, query->type, -1); - r600_update_prims_generated_query_state(ctx, query->type, -1); + r600_update_occlusion_query_state(ctx, query->b.type, -1); + r600_update_prims_generated_query_state(ctx, query->b.type, -1); } static void r600_emit_query_predication(struct r600_common_context *ctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = ctx->gfx.cs; - struct r600_query *query = (struct r600_query*)ctx->render_cond; + struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond; struct r600_query_buffer *qbuf; uint32_t op; bool flag_wait; @@ -318,7 +580,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT || ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT; - switch (query->type) { + switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: op = PRED_OP(PREDICATION_OP_ZPASS); @@ -364,94 +626,21 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *query; - bool skip_allocation = false; - query = CALLOC_STRUCT(r600_query); - if (query == NULL) - return NULL; - - query->type = query_type; - - switch (query_type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - query->result_size = 16 * rctx->max_db; - query->num_cs_dw = 6; - break; - break; - case PIPE_QUERY_TIME_ELAPSED: - query->result_size = 16; - query->num_cs_dw = 8; - break; - case PIPE_QUERY_TIMESTAMP: - query->result_size = 8; - query->num_cs_dw = 8; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ - query->result_size = 32; - query->num_cs_dw = 6; - query->stream = index; - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - /* 11 values on EG, 8 on R600. */ - query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16; - query->num_cs_dw = 6; - break; - /* Non-GPU queries and queries not requiring a buffer. */ - case PIPE_QUERY_TIMESTAMP_DISJOINT: - case PIPE_QUERY_GPU_FINISHED: - case R600_QUERY_DRAW_CALLS: - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_BUFFER_WAIT_TIME: - case R600_QUERY_NUM_CS_FLUSHES: - case R600_QUERY_NUM_BYTES_MOVED: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - case R600_QUERY_GPU_LOAD: - case R600_QUERY_NUM_COMPILATIONS: - case R600_QUERY_NUM_SHADERS_CREATED: - skip_allocation = true; - break; - default: - assert(0); - FREE(query); - return NULL; - } + if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT || + query_type == PIPE_QUERY_GPU_FINISHED || + query_type >= PIPE_QUERY_DRIVER_SPECIFIC) + return r600_query_sw_create(ctx, query_type); - if (!skip_allocation) { - query->buffer.buf = r600_new_query_buffer(rctx, query_type); - if (!query->buffer.buf) { - FREE(query); - return NULL; - } - } - return (struct pipe_query*)query; + return r600_query_hw_create(rctx, query_type, index); } static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_query *rquery = (struct r600_query*)query; - struct r600_query_buffer *prev = rquery->buffer.previous; - - /* Release all query buffers. */ - while (prev) { - struct r600_query_buffer *qbuf = prev; - prev = prev->previous; - pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); - FREE(qbuf); - } + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; - pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); - FREE(query); + rquery->ops->destroy(rctx, rquery); } static boolean r600_begin_query(struct pipe_context *ctx, @@ -459,48 +648,14 @@ static boolean r600_begin_query(struct pipe_context *ctx, { struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; - struct r600_query_buffer *prev = rquery->buffer.previous; - if (!r600_query_needs_begin(rquery->type)) { - assert(0); - return false; - } + return rquery->ops->begin(rctx, rquery); +} - /* Non-GPU queries. */ - switch (rquery->type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - return true; - case R600_QUERY_DRAW_CALLS: - rquery->begin_result = rctx->num_draw_calls; - return true; - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - rquery->begin_result = 0; - return true; - case R600_QUERY_BUFFER_WAIT_TIME: - rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000; - return true; - case R600_QUERY_NUM_CS_FLUSHES: - rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES); - return true; - case R600_QUERY_NUM_BYTES_MOVED: - rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED); - return true; - case R600_QUERY_GPU_LOAD: - rquery->begin_result = r600_gpu_load_begin(rctx->screen); - return true; - case R600_QUERY_NUM_COMPILATIONS: - rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations); - return true; - case R600_QUERY_NUM_SHADERS_CREATED: - rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); - return true; - } +static void r600_query_hw_reset_buffers(struct r600_common_context *rctx, + struct r600_query_hw *query) +{ + struct r600_query_buffer *prev = query->buffer.previous; /* Discard the old query buffers. */ while (prev) { @@ -510,22 +665,39 @@ static boolean r600_begin_query(struct pipe_context *ctx, FREE(qbuf); } - /* Obtain a new buffer if the current one can't be mapped without a stall. */ - if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) || - !rctx->ws->buffer_wait(rquery->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { - pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); - rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type); + if (query->flags & R600_QUERY_HW_FLAG_PREDICATE) { + /* Obtain a new buffer if the current one can't be mapped without a stall. */ + if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) || + !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { + pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL); + query->buffer.buf = r600_new_query_buffer(rctx, query); + } else { + query->ops->prepare_buffer(rctx, query, query->buffer.buf); + } } - rquery->buffer.results_end = 0; - rquery->buffer.previous = NULL; + query->buffer.results_end = 0; + query->buffer.previous = NULL; +} - r600_emit_query_begin(rctx, rquery); +boolean r600_query_hw_begin(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; - if (r600_is_timer_query(rquery->type)) - LIST_ADDTAIL(&rquery->list, &rctx->active_timer_queries); + if (query->flags & R600_QUERY_HW_FLAG_NO_START) { + assert(0); + return false; + } + + r600_query_hw_reset_buffers(rctx, query); + + r600_query_hw_emit_start(rctx, query); + + if (query->flags & R600_QUERY_HW_FLAG_TIMER) + LIST_ADDTAIL(&query->list, &rctx->active_timer_queries); else - LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries); + LIST_ADDTAIL(&query->list, &rctx->active_nontimer_queries); return true; } @@ -534,64 +706,24 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; - /* Non-GPU queries. */ - switch (rquery->type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - return; - case PIPE_QUERY_GPU_FINISHED: - ctx->flush(ctx, &rquery->fence, 0); - return; - case R600_QUERY_DRAW_CALLS: - rquery->end_result = rctx->num_draw_calls; - return; - case R600_QUERY_REQUESTED_VRAM: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY); - return; - case R600_QUERY_REQUESTED_GTT: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY); - return; - case R600_QUERY_BUFFER_WAIT_TIME: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000; - return; - case R600_QUERY_NUM_CS_FLUSHES: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES); - return; - case R600_QUERY_NUM_BYTES_MOVED: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED); - return; - case R600_QUERY_VRAM_USAGE: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE); - return; - case R600_QUERY_GTT_USAGE: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE); - return; - case R600_QUERY_GPU_TEMPERATURE: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000; - return; - case R600_QUERY_CURRENT_GPU_SCLK: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000; - return; - case R600_QUERY_CURRENT_GPU_MCLK: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000; - return; - case R600_QUERY_GPU_LOAD: - rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result); - return; - case R600_QUERY_NUM_COMPILATIONS: - rquery->end_result = p_atomic_read(&rctx->screen->num_compilations); - return; - case R600_QUERY_NUM_SHADERS_CREATED: - rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created); - return; - } + rquery->ops->end(rctx, rquery); +} - r600_emit_query_end(rctx, rquery); +void r600_query_hw_end(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + + if (query->flags & R600_QUERY_HW_FLAG_NO_START) + r600_query_hw_reset_buffers(rctx, query); - if (r600_query_needs_begin(rquery->type)) - LIST_DELINIT(&rquery->list); + r600_query_hw_emit_stop(rctx, query); + + if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) + LIST_DELINIT(&query->list); } -static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, +static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index, bool test_status_bit) { uint32_t *current_result = (uint32_t*)map; @@ -609,80 +741,36 @@ static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned return 0; } -static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, - struct r600_query *query, - struct r600_query_buffer *qbuf, - boolean wait, - union pipe_query_result *result) +static void r600_query_hw_add_result(struct r600_common_context *ctx, + struct r600_query_hw *query, + void *buffer, + union pipe_query_result *result) { - struct pipe_screen *screen = ctx->b.screen; - unsigned results_base = 0; - char *map; - - /* Non-GPU queries. */ - switch (query->type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - /* Convert from cycles per millisecond to cycles per second (Hz). */ - result->timestamp_disjoint.frequency = - (uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000; - result->timestamp_disjoint.disjoint = FALSE; - return TRUE; - case PIPE_QUERY_GPU_FINISHED: - result->b = screen->fence_finish(screen, query->fence, - wait ? PIPE_TIMEOUT_INFINITE : 0); - return result->b; - case R600_QUERY_DRAW_CALLS: - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_BUFFER_WAIT_TIME: - case R600_QUERY_NUM_CS_FLUSHES: - case R600_QUERY_NUM_BYTES_MOVED: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - case R600_QUERY_NUM_COMPILATIONS: - case R600_QUERY_NUM_SHADERS_CREATED: - result->u64 = query->end_result - query->begin_result; - return TRUE; - case R600_QUERY_GPU_LOAD: - result->u64 = query->end_result; - return TRUE; - } - - map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf, - PIPE_TRANSFER_READ | - (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); - if (!map) - return FALSE; - - /* count all results across all data blocks */ - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - while (results_base != qbuf->results_end) { + switch (query->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: { + unsigned results_base = 0; + while (results_base != query->result_size) { result->u64 += - r600_query_read_result(map + results_base, 0, 2, true); + r600_query_read_result(buffer + results_base, 0, 2, true); results_base += 16; } break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - while (results_base != qbuf->results_end) { + } + case PIPE_QUERY_OCCLUSION_PREDICATE: { + unsigned results_base = 0; + while (results_base != query->result_size) { result->b = result->b || - r600_query_read_result(map + results_base, 0, 2, true) != 0; + r600_query_read_result(buffer + results_base, 0, 2, true) != 0; results_base += 16; } break; + } case PIPE_QUERY_TIME_ELAPSED: - while (results_base != qbuf->results_end) { - result->u64 += - r600_query_read_result(map + results_base, 0, 2, false); - results_base += query->result_size; - } + result->u64 += r600_query_read_result(buffer, 0, 2, false); break; case PIPE_QUERY_TIMESTAMP: { - uint32_t *current_result = (uint32_t*)map; + uint32_t *current_result = (uint32_t*)buffer; result->u64 = (uint64_t)current_result[0] | (uint64_t)current_result[1] << 32; break; @@ -694,84 +782,64 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, * u64 PrimitiveStorageNeeded; * } * We only need NumPrimitivesWritten here. */ - while (results_base != qbuf->results_end) { - result->u64 += - r600_query_read_result(map + results_base, 2, 6, true); - results_base += query->result_size; - } + result->u64 += r600_query_read_result(buffer, 2, 6, true); break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* Here we read PrimitiveStorageNeeded. */ - while (results_base != qbuf->results_end) { - result->u64 += - r600_query_read_result(map + results_base, 0, 4, true); - results_base += query->result_size; - } + result->u64 += r600_query_read_result(buffer, 0, 4, true); break; case PIPE_QUERY_SO_STATISTICS: - while (results_base != qbuf->results_end) { - result->so_statistics.num_primitives_written += - r600_query_read_result(map + results_base, 2, 6, true); - result->so_statistics.primitives_storage_needed += - r600_query_read_result(map + results_base, 0, 4, true); - results_base += query->result_size; - } + result->so_statistics.num_primitives_written += + r600_query_read_result(buffer, 2, 6, true); + result->so_statistics.primitives_storage_needed += + r600_query_read_result(buffer, 0, 4, true); break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - while (results_base != qbuf->results_end) { - result->b = result->b || - r600_query_read_result(map + results_base, 2, 6, true) != - r600_query_read_result(map + results_base, 0, 4, true); - results_base += query->result_size; - } + result->b = result->b || + r600_query_read_result(buffer, 2, 6, true) != + r600_query_read_result(buffer, 0, 4, true); break; case PIPE_QUERY_PIPELINE_STATISTICS: if (ctx->chip_class >= EVERGREEN) { - while (results_base != qbuf->results_end) { - result->pipeline_statistics.ps_invocations += - r600_query_read_result(map + results_base, 0, 22, false); - result->pipeline_statistics.c_primitives += - r600_query_read_result(map + results_base, 2, 24, false); - result->pipeline_statistics.c_invocations += - r600_query_read_result(map + results_base, 4, 26, false); - result->pipeline_statistics.vs_invocations += - r600_query_read_result(map + results_base, 6, 28, false); - result->pipeline_statistics.gs_invocations += - r600_query_read_result(map + results_base, 8, 30, false); - result->pipeline_statistics.gs_primitives += - r600_query_read_result(map + results_base, 10, 32, false); - result->pipeline_statistics.ia_primitives += - r600_query_read_result(map + results_base, 12, 34, false); - result->pipeline_statistics.ia_vertices += - r600_query_read_result(map + results_base, 14, 36, false); - result->pipeline_statistics.hs_invocations += - r600_query_read_result(map + results_base, 16, 38, false); - result->pipeline_statistics.ds_invocations += - r600_query_read_result(map + results_base, 18, 40, false); - result->pipeline_statistics.cs_invocations += - r600_query_read_result(map + results_base, 20, 42, false); - results_base += query->result_size; - } + result->pipeline_statistics.ps_invocations += + r600_query_read_result(buffer, 0, 22, false); + result->pipeline_statistics.c_primitives += + r600_query_read_result(buffer, 2, 24, false); + result->pipeline_statistics.c_invocations += + r600_query_read_result(buffer, 4, 26, false); + result->pipeline_statistics.vs_invocations += + r600_query_read_result(buffer, 6, 28, false); + result->pipeline_statistics.gs_invocations += + r600_query_read_result(buffer, 8, 30, false); + result->pipeline_statistics.gs_primitives += + r600_query_read_result(buffer, 10, 32, false); + result->pipeline_statistics.ia_primitives += + r600_query_read_result(buffer, 12, 34, false); + result->pipeline_statistics.ia_vertices += + r600_query_read_result(buffer, 14, 36, false); + result->pipeline_statistics.hs_invocations += + r600_query_read_result(buffer, 16, 38, false); + result->pipeline_statistics.ds_invocations += + r600_query_read_result(buffer, 18, 40, false); + result->pipeline_statistics.cs_invocations += + r600_query_read_result(buffer, 20, 42, false); } else { - while (results_base != qbuf->results_end) { - result->pipeline_statistics.ps_invocations += - r600_query_read_result(map + results_base, 0, 16, false); - result->pipeline_statistics.c_primitives += - r600_query_read_result(map + results_base, 2, 18, false); - result->pipeline_statistics.c_invocations += - r600_query_read_result(map + results_base, 4, 20, false); - result->pipeline_statistics.vs_invocations += - r600_query_read_result(map + results_base, 6, 22, false); - result->pipeline_statistics.gs_invocations += - r600_query_read_result(map + results_base, 8, 24, false); - result->pipeline_statistics.gs_primitives += - r600_query_read_result(map + results_base, 10, 26, false); - result->pipeline_statistics.ia_primitives += - r600_query_read_result(map + results_base, 12, 28, false); - result->pipeline_statistics.ia_vertices += - r600_query_read_result(map + results_base, 14, 30, false); - results_base += query->result_size; - } + result->pipeline_statistics.ps_invocations += + r600_query_read_result(buffer, 0, 16, false); + result->pipeline_statistics.c_primitives += + r600_query_read_result(buffer, 2, 18, false); + result->pipeline_statistics.c_invocations += + r600_query_read_result(buffer, 4, 20, false); + result->pipeline_statistics.vs_invocations += + r600_query_read_result(buffer, 6, 22, false); + result->pipeline_statistics.gs_invocations += + r600_query_read_result(buffer, 8, 24, false); + result->pipeline_statistics.gs_primitives += + r600_query_read_result(buffer, 10, 26, false); + result->pipeline_statistics.ia_primitives += + r600_query_read_result(buffer, 12, 28, false); + result->pipeline_statistics.ia_vertices += + r600_query_read_result(buffer, 14, 30, false); } #if 0 /* for testing */ printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, " @@ -793,23 +861,47 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, default: assert(0); } - - return TRUE; } static boolean r600_get_query_result(struct pipe_context *ctx, - struct pipe_query *query, - boolean wait, union pipe_query_result *result) + struct pipe_query *query, boolean wait, + union pipe_query_result *result) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; + + return rquery->ops->get_result(rctx, rquery, wait, result); +} + +static void r600_query_hw_clear_result(struct r600_query_hw *query, + union pipe_query_result *result) +{ + util_query_clear_result(result, query->b.type); +} + +boolean r600_query_hw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + boolean wait, union pipe_query_result *result) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; struct r600_query_buffer *qbuf; - util_query_clear_result(result, rquery->type); + query->ops->clear_result(query, result); - for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) { - if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) { + for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { + unsigned results_base = 0; + void *map; + + map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, + PIPE_TRANSFER_READ | + (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); + if (!map) return FALSE; + + while (results_base != qbuf->results_end) { + query->ops->add_result(rctx, query, map + results_base, + result); + results_base += query->result_size; } } @@ -827,7 +919,7 @@ static void r600_render_condition(struct pipe_context *ctx, uint mode) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *rquery = (struct r600_query*)query; + struct r600_query_hw *rquery = (struct r600_query_hw *)query; struct r600_query_buffer *qbuf; struct r600_atom *atom = &rctx->render_cond_atom; @@ -837,8 +929,10 @@ static void r600_render_condition(struct pipe_context *ctx, /* Compute the size of SET_PREDICATION packets. */ atom->num_dw = 0; - for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) - atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; + if (query) { + for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) + atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; + } rctx->set_atom_dirty(rctx, atom, query != NULL); } @@ -847,10 +941,10 @@ static void r600_suspend_queries(struct r600_common_context *ctx, struct list_head *query_list, unsigned *num_cs_dw_queries_suspend) { - struct r600_query *query; + struct r600_query_hw *query; LIST_FOR_EACH_ENTRY(query, query_list, list) { - r600_emit_query_end(ctx, query); + r600_query_hw_emit_stop(ctx, query); } assert(*num_cs_dw_queries_suspend == 0); } @@ -870,19 +964,19 @@ void r600_suspend_timer_queries(struct r600_common_context *ctx) static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx, struct list_head *query_list) { - struct r600_query *query; + struct r600_query_hw *query; unsigned num_dw = 0; LIST_FOR_EACH_ENTRY(query, query_list, list) { /* begin + end */ - num_dw += query->num_cs_dw * 2; + num_dw += query->num_cs_dw_begin + query->num_cs_dw_end; /* Workaround for the fact that * num_cs_dw_nontimer_queries_suspend is incremented for every * resumed query, which raises the bar in need_cs_space for * queries about to be resumed. */ - num_dw += query->num_cs_dw; + num_dw += query->num_cs_dw_end; } /* primitives generated query */ num_dw += ctx->streamout.enable_atom.num_dw; @@ -896,7 +990,7 @@ static void r600_resume_queries(struct r600_common_context *ctx, struct list_head *query_list, unsigned *num_cs_dw_queries_suspend) { - struct r600_query *query; + struct r600_query_hw *query; unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list); assert(*num_cs_dw_queries_suspend == 0); @@ -905,7 +999,7 @@ static void r600_resume_queries(struct r600_common_context *ctx, ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE); LIST_FOR_EACH_ENTRY(query, query_list, list) { - r600_emit_query_begin(ctx, query); + r600_query_hw_emit_start(ctx, query); } } @@ -1002,6 +1096,76 @@ err: return; } +#define X(name_, query_type_, type_, result_type_) \ + { \ + .name = name_, \ + .query_type = R600_QUERY_##query_type_, \ + .type = PIPE_DRIVER_QUERY_TYPE_##type_, \ + .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \ + .group_id = ~(unsigned)0 \ + } + +static struct pipe_driver_query_info r600_driver_query_list[] = { + X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE), + X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE), + X("draw-calls", DRAW_CALLS, UINT64, CUMULATIVE), + X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE), + X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE), + X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE), + X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, CUMULATIVE), + X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE), + X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE), + X("GTT-usage", GTT_USAGE, BYTES, AVERAGE), + X("GPU-load", GPU_LOAD, UINT64, AVERAGE), + X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE), + X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE), + X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE), +}; + +#undef X + +static unsigned r600_get_num_queries(struct r600_common_screen *rscreen) +{ + if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) + return Elements(r600_driver_query_list); + else if (rscreen->info.drm_major == 3) + return Elements(r600_driver_query_list) - 3; + else + return Elements(r600_driver_query_list) - 4; +} + +static int r600_get_driver_query_info(struct pipe_screen *screen, + unsigned index, + struct pipe_driver_query_info *info) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + unsigned num_queries = r600_get_num_queries(rscreen); + + if (!info) + return num_queries; + + if (index >= num_queries) + return 0; + + *info = r600_driver_query_list[index]; + + switch (info->query_type) { + case R600_QUERY_REQUESTED_VRAM: + case R600_QUERY_VRAM_USAGE: + info->max_value.u64 = rscreen->info.vram_size; + break; + case R600_QUERY_REQUESTED_GTT: + case R600_QUERY_GTT_USAGE: + info->max_value.u64 = rscreen->info.gart_size; + break; + case R600_QUERY_GPU_TEMPERATURE: + info->max_value.u64 = 125; + break; + } + + return 1; +} + void r600_query_init(struct r600_common_context *rctx) { rctx->b.create_query = r600_create_query; @@ -1017,3 +1181,8 @@ void r600_query_init(struct r600_common_context *rctx) LIST_INITHEAD(&rctx->active_nontimer_queries); LIST_INITHEAD(&rctx->active_timer_queries); } + +void r600_init_screen_query_functions(struct r600_common_screen *rscreen) +{ + rscreen->b.get_driver_query_info = r600_get_driver_query_info; +} diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h new file mode 100644 index 00000000000..0ea5707ca45 --- /dev/null +++ b/src/gallium/drivers/radeon/r600_query.h @@ -0,0 +1,136 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Nicolai Hähnle <[email protected]> + * + */ + +#ifndef R600_QUERY_H +#define R600_QUERY_H + +#include "pipe/p_defines.h" +#include "util/list.h" + +struct r600_common_context; +struct r600_query; +struct r600_query_hw; +struct r600_resource; + +#define R600_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0) +#define R600_QUERY_REQUESTED_VRAM (PIPE_QUERY_DRIVER_SPECIFIC + 1) +#define R600_QUERY_REQUESTED_GTT (PIPE_QUERY_DRIVER_SPECIFIC + 2) +#define R600_QUERY_BUFFER_WAIT_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 3) +#define R600_QUERY_NUM_CS_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 4) +#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5) +#define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6) +#define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7) +#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8) +#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9) +#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10) +#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11) +#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12) +#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13) +#define R600_QUERY_FIRST_PERFCOUNTER (PIPE_QUERY_DRIVER_SPECIFIC + 100) + +struct r600_query_ops { + void (*destroy)(struct r600_common_context *, struct r600_query *); + boolean (*begin)(struct r600_common_context *, struct r600_query *); + void (*end)(struct r600_common_context *, struct r600_query *); + boolean (*get_result)(struct r600_common_context *, + struct r600_query *, boolean wait, + union pipe_query_result *result); +}; + +struct r600_query { + struct r600_query_ops *ops; + + /* The type of query */ + unsigned type; +}; + +enum { + R600_QUERY_HW_FLAG_NO_START = (1 << 0), + R600_QUERY_HW_FLAG_TIMER = (1 << 1), + R600_QUERY_HW_FLAG_PREDICATE = (1 << 2), +}; + +struct r600_query_hw_ops { + void (*prepare_buffer)(struct r600_common_context *, + struct r600_query_hw *, + struct r600_resource *); + void (*emit_start)(struct r600_common_context *, + struct r600_query_hw *, + struct r600_resource *buffer, uint64_t va); + void (*emit_stop)(struct r600_common_context *, + struct r600_query_hw *, + struct r600_resource *buffer, uint64_t va); + void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); + void (*add_result)(struct r600_common_context *ctx, + struct r600_query_hw *, void *buffer, + union pipe_query_result *result); +}; + +struct r600_query_buffer { + /* The buffer where query results are stored. */ + struct r600_resource *buf; + /* Offset of the next free result after current query data */ + unsigned results_end; + /* If a query buffer is full, a new buffer is created and the old one + * is put in here. When we calculate the result, we sum up the samples + * from all buffers. */ + struct r600_query_buffer *previous; +}; + +struct r600_query_hw { + struct r600_query b; + struct r600_query_hw_ops *ops; + unsigned flags; + + /* The query buffer and how many results are in it. */ + struct r600_query_buffer buffer; + /* Size of the result in memory for both begin_query and end_query, + * this can be one or two numbers, or it could even be a size of a structure. */ + unsigned result_size; + /* The number of dwords for begin_query or end_query. */ + unsigned num_cs_dw_begin; + unsigned num_cs_dw_end; + /* Linked list of queries */ + struct list_head list; + /* For transform feedback: which stream the query is for */ + unsigned stream; +}; + +boolean r600_query_hw_init(struct r600_common_context *rctx, + struct r600_query_hw *query); +void r600_query_hw_destroy(struct r600_common_context *rctx, + struct r600_query *rquery); +boolean r600_query_hw_begin(struct r600_common_context *rctx, + struct r600_query *rquery); +void r600_query_hw_end(struct r600_common_context *rctx, + struct r600_query *rquery); +boolean r600_query_hw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + boolean wait, + union pipe_query_result *result); + +#endif /* R600_QUERY_H */ diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 0dac6fbbdce..8a60441c056 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -49,6 +49,7 @@ #define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8)) #define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8)) #define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8)) +#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8)) /** * flush commands to the hardware @@ -405,7 +406,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, enc->use_vm = true; if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42)) enc->use_vui = true; - if (rscreen->info.family >= CHIP_TONGA) + if (rscreen->info.family >= CHIP_TONGA && + rscreen->info.family != CHIP_STONEY) enc->dual_pipe = true; /* TODO enable B frame with dual instance */ if ((rscreen->info.family >= CHIP_TONGA) && @@ -478,6 +480,10 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, radeon_vce_50_init(enc); break; + case FW_52_0_3: + radeon_vce_52_init(enc); + break; + default: goto error; } @@ -500,11 +506,17 @@ error: */ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen) { - return rscreen->info.vce_fw_version == FW_40_2_2 || - rscreen->info.vce_fw_version == FW_50_0_1 || - rscreen->info.vce_fw_version == FW_50_1_2 || - rscreen->info.vce_fw_version == FW_50_10_2 || - rscreen->info.vce_fw_version == FW_50_17_3; + switch (rscreen->info.vce_fw_version) { + case FW_40_2_2: + case FW_50_0_1: + case FW_50_1_2: + case FW_50_10_2: + case FW_50_17_3: + case FW_52_0_3: + return true; + default: + return false; + } } /** diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h index 624bda479f8..25e2133521f 100644 --- a/src/gallium/drivers/radeon/radeon_vce.h +++ b/src/gallium/drivers/radeon/radeon_vce.h @@ -140,4 +140,7 @@ void radeon_vce_40_2_2_init(struct rvce_encoder *enc); /* init vce fw 50 specific callbacks */ void radeon_vce_50_init(struct rvce_encoder *enc); +/* init vce fw 52 specific callbacks */ +void radeon_vce_52_init(struct rvce_encoder *enc); + #endif diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c b/src/gallium/drivers/radeon/radeon_vce_52.c new file mode 100644 index 00000000000..fbae1f97f41 --- /dev/null +++ b/src/gallium/drivers/radeon/radeon_vce_52.c @@ -0,0 +1,242 @@ +/************************************************************************** + * + * Copyright 2015 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdio.h> + +#include "pipe/p_video_codec.h" + +#include "util/u_video.h" +#include "util/u_memory.h" + +#include "vl/vl_video_buffer.h" + +#include "r600_pipe_common.h" +#include "radeon_video.h" +#include "radeon_vce.h" + +static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 }; + +static void create(struct rvce_encoder *enc) +{ + enc->task_info(enc, 0x00000000, 0, 0, 0); + + RVCE_BEGIN(0x01000001); // create cmd + RVCE_CS(0x00000000); // encUseCircularBuffer + RVCE_CS(profiles[enc->base.profile - + PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile + RVCE_CS(enc->base.level); // encLevel + RVCE_CS(0x00000000); // encPicStructRestriction + RVCE_CS(enc->base.width); // encImageWidth + RVCE_CS(enc->base.height); // encImageHeight + RVCE_CS(enc->luma->level[0].pitch_bytes); // encRefPicLumaPitch + RVCE_CS(enc->chroma->level[0].pitch_bytes); // encRefPicChromaPitch + RVCE_CS(align(enc->luma->npix_y, 16) / 8); // encRefYHeightInQw + RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO + + RVCE_CS(0x00000000); // encPreEncodeContextBufferOffset + RVCE_CS(0x00000000); // encPreEncodeInputLumaBufferOffset + RVCE_CS(0x00000000); // encPreEncodeInputChromaBufferOffs + RVCE_CS(0x00000000); // encPreEncodeMode|ChromaFlag|VBAQMode|SceneChangeSensitivity + RVCE_END(); +} + +static void encode(struct rvce_encoder *enc) +{ + signed luma_offset, chroma_offset, bs_offset; + unsigned dep, bs_idx = enc->bs_idx++; + int i; + + if (enc->dual_inst) { + if (bs_idx == 0) + dep = 1; + else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) + dep = 0; + else + dep = 2; + } else + dep = 0; + + enc->task_info(enc, 0x00000003, dep, 0, bs_idx); + + RVCE_BEGIN(0x05000001); // context buffer + RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo + RVCE_END(); + + bs_offset = -(signed)(bs_idx * enc->bs_size); + + RVCE_BEGIN(0x05000004); // video bitstream buffer + RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo + RVCE_CS(enc->bs_size); // videoBitstreamRingSize + RVCE_END(); + + if (enc->dual_pipe) { + unsigned aux_offset = enc->cpb.res->buf->size - + RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; + RVCE_BEGIN(0x05000002); // auxiliary buffer + for (i = 0; i < 8; ++i) { + RVCE_CS(aux_offset); + aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE; + } + for (i = 0; i < 8; ++i) + RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE); + RVCE_END(); + } + + RVCE_BEGIN(0x03000001); // encode + RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize + RVCE_CS(0x00000000); // forceRefreshMap + RVCE_CS(0x00000000); // insertAUD + RVCE_CS(0x00000000); // endOfSequence + RVCE_CS(0x00000000); // endOfStream + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo + RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch + RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch + if (enc->dual_pipe) + RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) + else + RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) + RVCE_CS(0x00000000); // encInputPicTileConfig + RVCE_CS(enc->pic.picture_type); // encPicType + RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag + RVCE_CS(0x00000000); // encIdrPicId + RVCE_CS(0x00000000); // encMGSKeyPic + RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag + RVCE_CS(0x00000000); // encTemporalLayerIndex + RVCE_CS(0x00000000); // num_ref_idx_active_override_flag + RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 + RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 + + i = enc->pic.frame_num - enc->pic.ref_idx_l0; + if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { + RVCE_CS(0x00000001); // encRefListModificationOp + RVCE_CS(i - 1); // encRefListModificationNum + } else { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + + for (i = 0; i < 3; ++i) { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + for (i = 0; i < 4; ++i) { + RVCE_CS(0x00000000); // encDecodedPictureMarkingOp + RVCE_CS(0x00000000); // encDecodedPictureMarkingNum + RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum + } + + // encReferencePictureL0[0] + RVCE_CS(0x00000000); // pictureStructure + if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l0 = l0_slot(enc); + rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset); + RVCE_CS(l0->picture_type); // encPicType + RVCE_CS(l0->frame_num); // frameNumber + RVCE_CS(l0->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + // encReferencePictureL0[1] + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + + // encReferencePictureL1[0] + RVCE_CS(0x00000000); // pictureStructure + if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l1 = l1_slot(enc); + rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset); + RVCE_CS(l1->picture_type); // encPicType + RVCE_CS(l1->frame_num); // frameNumber + RVCE_CS(l1->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset); + RVCE_CS(luma_offset); // encReconstructedLumaOffset + RVCE_CS(chroma_offset); // encReconstructedChromaOffset + RVCE_CS(0x00000000); // encColocBufferOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset + RVCE_CS(0x00000000); // pictureCount + RVCE_CS(enc->pic.frame_num); // frameNumber + RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount + RVCE_CS(0x00000000); // numIPicRemainInRCGOP + RVCE_CS(0x00000000); // numPPicRemainInRCGOP + RVCE_CS(0x00000000); // numBPicRemainInRCGOP + RVCE_CS(0x00000000); // numIRPicRemainInRCGOP + RVCE_CS(0x00000000); // enableIntraRefresh + + RVCE_CS(0x00000000); // aq_variance_en + RVCE_CS(0x00000000); // aq_block_size + RVCE_CS(0x00000000); // aq_mb_variance_sel + RVCE_CS(0x00000000); // aq_frame_variance_sel + RVCE_CS(0x00000000); // aq_param_a + RVCE_CS(0x00000000); // aq_param_b + RVCE_CS(0x00000000); // aq_param_c + RVCE_CS(0x00000000); // aq_param_d + RVCE_CS(0x00000000); // aq_param_e + + RVCE_CS(0x00000000); // contextInSFB + RVCE_END(); +} + +void radeon_vce_52_init(struct rvce_encoder *enc) +{ + radeon_vce_50_init(enc); + + enc->create = create; + enc->encode = encode; +} diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 93847d5ec2f..209b940aa11 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3256,25 +3256,34 @@ si_write_harvested_raster_configs(struct si_context *sctx, } } - /* GRBM_GFX_INDEX is privileged on VI */ - if (sctx->b.chip_class <= CIK) + /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ + if (sctx->b.chip_class < CIK) si_pm4_set_reg(pm4, GRBM_GFX_INDEX, SE_INDEX(se) | SH_BROADCAST_WRITES | INSTANCE_BROADCAST_WRITES); + else + si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, + S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | + S_030800_INSTANCE_BROADCAST_WRITES(1)); si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); if (sctx->b.chip_class >= CIK) si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } - /* GRBM_GFX_INDEX is privileged on VI */ - if (sctx->b.chip_class <= CIK) + /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ + if (sctx->b.chip_class < CIK) si_pm4_set_reg(pm4, GRBM_GFX_INDEX, SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | INSTANCE_BROADCAST_WRITES); + else + si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, + S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | + S_030800_INSTANCE_BROADCAST_WRITES(1)); } static void si_init_config(struct si_context *sctx) { + struct si_screen *sscreen = sctx->screen; unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16); unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; unsigned raster_config, raster_config_1; @@ -3345,9 +3354,14 @@ static void si_init_config(struct si_context *sctx) raster_config_1 = 0x0000002e; break; case CHIP_FIJI: - /* Fiji should be same as Hawaii, but that causes corruption in some cases */ - raster_config = 0x16000012; /* 0x3a00161a */ - raster_config_1 = 0x0000002a; /* 0x0000002e */ + if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) { + /* old kernels with old tiling config */ + raster_config = 0x16000012; + raster_config_1 = 0x0000002a; + } else { + raster_config = 0x3a00161a; + raster_config_1 = 0x0000002e; + } break; case CHIP_TONGA: raster_config = 0x16000012; diff --git a/src/gallium/drivers/softpipe/Automake.inc b/src/gallium/drivers/softpipe/Automake.inc index 6455f3caa3d..bd3c2eead16 100644 --- a/src/gallium/drivers/softpipe/Automake.inc +++ b/src/gallium/drivers/softpipe/Automake.inc @@ -3,13 +3,10 @@ if HAVE_GALLIUM_SOFTPIPE TARGET_DRIVERS += swrast TARGET_CPPFLAGS += -DGALLIUM_SOFTPIPE TARGET_LIB_DEPS += \ - $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la \ $(top_builddir)/src/gallium/drivers/softpipe/libsoftpipe.la -if HAVE_DRI2 +if HAVE_DRISW_KMS TARGET_DRIVERS += kms_swrast -TARGET_LIB_DEPS += \ - $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la endif endif diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index bcce18a3502..6a4f9d8d076 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -51,14 +51,16 @@ #define SVGA_QUERY_NUM_VALIDATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 3) #define SVGA_QUERY_MAP_BUFFER_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 4) #define SVGA_QUERY_NUM_RESOURCES_MAPPED (PIPE_QUERY_DRIVER_SPECIFIC + 5) +#define SVGA_QUERY_NUM_BYTES_UPLOADED (PIPE_QUERY_DRIVER_SPECIFIC + 6) + /* running total counters */ -#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 6) -#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 7) -#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 8) -#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 9) -#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 10) +#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 7) +#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 8) +#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 9) +#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 10) +#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 11) /*SVGA_QUERY_MAX has to be last because it is size of an array*/ -#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 11) +#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 12) /** * Maximum supported number of constant buffers per shader @@ -485,6 +487,7 @@ struct svga_context uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */ uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */ uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */ + uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */ } hud; /** The currently bound stream output targets */ diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c index 28b8064bf70..2b549dfa5bb 100644 --- a/src/gallium/drivers/svga/svga_format.c +++ b/src/gallium/drivers/svga/svga_format.c @@ -53,17 +53,17 @@ static const struct vgpu10_format_entry format_conversion_table[] = { PIPE_FORMAT_A8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_X8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_B5G5R5A1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G5R5A1_UNORM, 0 }, - { PIPE_FORMAT_B4G4R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A4R4G4B4, 0 }, + { PIPE_FORMAT_B4G4R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_B5G6R5_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G6R5_UNORM, 0 }, { PIPE_FORMAT_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, 0 }, - { PIPE_FORMAT_L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_LUMINANCE8, 0 }, + { PIPE_FORMAT_L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A8_UNORM, 0 }, { PIPE_FORMAT_I8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_L8A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_L16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_UYVY, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_YUYV, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, - { PIPE_FORMAT_Z16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_Z_D16, 0 }, + { PIPE_FORMAT_Z16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_D16_UNORM, 0 }, { PIPE_FORMAT_Z32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_Z32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT, 0 }, { PIPE_FORMAT_Z24_UNORM_S8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, 0 }, @@ -152,14 +152,14 @@ static const struct vgpu10_format_entry format_conversion_table[] = { PIPE_FORMAT_A8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_X8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_R8G8B8A8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_R8G8B8A8_UNORM_SRGB, 0 }, - { PIPE_FORMAT_DXT1_RGB, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 }, - { PIPE_FORMAT_DXT1_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 }, - { PIPE_FORMAT_DXT3_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT3, 0 }, - { PIPE_FORMAT_DXT5_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT5, 0 }, - { PIPE_FORMAT_DXT1_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 }, - { PIPE_FORMAT_DXT1_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 }, - { PIPE_FORMAT_DXT3_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT3, 0 }, - { PIPE_FORMAT_DXT5_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT5, 0 }, + { PIPE_FORMAT_DXT1_RGB, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM, 0 }, + { PIPE_FORMAT_DXT1_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM, 0 }, + { PIPE_FORMAT_DXT3_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC2_UNORM, 0 }, + { PIPE_FORMAT_DXT5_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC3_UNORM, 0 }, + { PIPE_FORMAT_DXT1_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM_SRGB, 0 }, + { PIPE_FORMAT_DXT1_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM_SRGB, 0 }, + { PIPE_FORMAT_DXT3_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC2_UNORM_SRGB, 0 }, + { PIPE_FORMAT_DXT5_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC3_UNORM_SRGB, 0 }, { PIPE_FORMAT_RGTC1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_UNORM, 0 }, { PIPE_FORMAT_RGTC1_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_SNORM, 0 }, { PIPE_FORMAT_RGTC2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC5_UNORM, 0 }, @@ -326,6 +326,34 @@ static const struct vgpu10_format_entry format_conversion_table[] = { PIPE_FORMAT_ETC2_R11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_ETC2_RG11_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_ETC2_RG11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_4x4, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x4, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x10, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x10, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x12, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_4x4_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x4_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_5x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_6x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_8x8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_10x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_ASTC_12x12_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, }; @@ -472,7 +500,7 @@ struct format_cap { * PIPE_FORMAT_Z24_UNORM_S8_UINT is converted to SVGA3D_D24_UNORM_S8_UINT * for rendering but converted to SVGA3D_R24_UNORM_X8_TYPELESS for sampling. * If we want to query if a format supports both rendering and sampling the - * host will tell us no for both SVGA3D_D24_UNORM_S8_UINT and + * host will tell us no for SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D16_UNORM and * SVGA3D_R24_UNORM_X8_TYPELESS. So we override the host query for those * formats and report that both can do rendering and sampling. */ @@ -1410,27 +1438,50 @@ static const struct format_cap format_cap_table[] = { }, { "SVGA3D_BC1_TYPELESS", - SVGA3D_BC1_TYPELESS, 0, 0, 0, 0, 0 + SVGA3D_BC1_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC1_UNORM_SRGB", - SVGA3D_BC1_UNORM_SRGB, 0, 0, 0, 0, 0 + SVGA3D_BC1_UNORM_SRGB, + SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC2_TYPELESS", - SVGA3D_BC2_TYPELESS, 0, 0, 0, 0, 0 + SVGA3D_BC2_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC2_UNORM_SRGB", - SVGA3D_BC2_UNORM_SRGB, 0, 0, 0, 0, 0 + SVGA3D_BC2_UNORM_SRGB, + SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC3_TYPELESS", - SVGA3D_BC3_TYPELESS, 0, 0, 0, 0, 0 + SVGA3D_BC3_TYPELESS, + SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC3_UNORM_SRGB", - SVGA3D_BC3_UNORM_SRGB, 0, 0, 0, 0, 0 + SVGA3D_BC3_UNORM_SRGB, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC4_TYPELESS", @@ -1671,7 +1722,7 @@ static const struct format_cap format_cap_table[] = { { "SVGA3D_D16_UNORM", SVGA3D_D16_UNORM, - SVGA3D_DEVCAP_DXFMT_D16_UNORM, + 0, /*SVGA3D_DEVCAP_DXFMT_D16_UNORM*/ 1, 1, 2, SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_CUBETEXTURE | @@ -1690,15 +1741,27 @@ static const struct format_cap format_cap_table[] = { }, { "SVGA3D_BC1_UNORM", - SVGA3D_BC1_UNORM, 0, 0, 0, 0, 0 + SVGA3D_BC1_UNORM, + SVGA3D_DEVCAP_DXFMT_BC1_UNORM, + 4, 4, 8, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC2_UNORM", - SVGA3D_BC2_UNORM, 0, 0, 0, 0, 0 + SVGA3D_BC2_UNORM, + SVGA3D_DEVCAP_DXFMT_BC2_UNORM, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_BC3_UNORM", - SVGA3D_BC3_UNORM, 0, 0, 0, 0, 0 + SVGA3D_BC3_UNORM, + SVGA3D_DEVCAP_DXFMT_BC3_UNORM, + 4, 4, 16, + SVGA3DFORMAT_OP_TEXTURE | + SVGA3DFORMAT_OP_CUBETEXTURE }, { "SVGA3D_B5G6R5_UNORM", @@ -2053,6 +2116,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format) case SVGA3D_R8G8_UINT: case SVGA3D_R8G8_SINT: return SVGA3D_R8G8_TYPELESS; + case SVGA3D_D16_UNORM: case SVGA3D_R16_UNORM: case SVGA3D_R16_UINT: case SVGA3D_R16_SNORM: @@ -2070,6 +2134,15 @@ svga_typeless_format(SVGA3dSurfaceFormat format) case SVGA3D_B8G8R8X8_UNORM_SRGB: case SVGA3D_B8G8R8X8_UNORM: return SVGA3D_B8G8R8X8_TYPELESS; + case SVGA3D_BC1_UNORM: + case SVGA3D_BC1_UNORM_SRGB: + return SVGA3D_BC1_TYPELESS; + case SVGA3D_BC2_UNORM: + case SVGA3D_BC2_UNORM_SRGB: + return SVGA3D_BC2_TYPELESS; + case SVGA3D_BC3_UNORM: + case SVGA3D_BC3_UNORM_SRGB: + return SVGA3D_BC3_TYPELESS; case SVGA3D_BC4_UNORM: case SVGA3D_BC4_SNORM: return SVGA3D_BC4_TYPELESS; @@ -2079,18 +2152,10 @@ svga_typeless_format(SVGA3dSurfaceFormat format) /* Special cases (no corresponding _TYPELESS formats) */ case SVGA3D_A8_UNORM: - case SVGA3D_A4R4G4B4: case SVGA3D_B5G5R5A1_UNORM: case SVGA3D_B5G6R5_UNORM: - case SVGA3D_DXT1: - case SVGA3D_DXT2: - case SVGA3D_DXT3: - case SVGA3D_DXT4: - case SVGA3D_DXT5: case SVGA3D_R11G11B10_FLOAT: case SVGA3D_R9G9B9E5_SHAREDEXP: - case SVGA3D_Z_D32: - case SVGA3D_Z_D16: return format; default: debug_printf("Unexpected format %s in %s\n", @@ -2098,3 +2163,26 @@ svga_typeless_format(SVGA3dSurfaceFormat format) return format; } } + + +/** + * Given a surface format, return the corresponding format to use for + * a texture sampler. In most cases, it's the format unchanged, but there + * are some special cases. + */ +SVGA3dSurfaceFormat +svga_sampler_format(SVGA3dSurfaceFormat format) +{ + switch (format) { + case SVGA3D_D16_UNORM: + return SVGA3D_R16_UNORM; + case SVGA3D_D24_UNORM_S8_UINT: + return SVGA3D_R24_UNORM_X8_TYPELESS; + case SVGA3D_D32_FLOAT: + return SVGA3D_R32_FLOAT; + case SVGA3D_D32_FLOAT_S8X24_UINT: + return SVGA3D_R32_FLOAT_X8X24_TYPELESS; + default: + return format; + } +} diff --git a/src/gallium/drivers/svga/svga_format.h b/src/gallium/drivers/svga/svga_format.h index 0af218cb01a..9f9a530d473 100644 --- a/src/gallium/drivers/svga/svga_format.h +++ b/src/gallium/drivers/svga/svga_format.h @@ -93,4 +93,8 @@ SVGA3dSurfaceFormat svga_typeless_format(SVGA3dSurfaceFormat format); +SVGA3dSurfaceFormat +svga_sampler_format(SVGA3dSurfaceFormat format); + + #endif /* SVGA_FORMAT_H_ */ diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c index 8b9818334ca..5416a009dcb 100644 --- a/src/gallium/drivers/svga/svga_pipe_query.c +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -731,6 +731,7 @@ svga_create_query(struct pipe_context *pipe, case SVGA_QUERY_MAP_BUFFER_TIME: case SVGA_QUERY_NUM_SURFACE_VIEWS: case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BYTES_UPLOADED: break; default: assert(!"unexpected query type in svga_create_query()"); @@ -797,6 +798,7 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_MAP_BUFFER_TIME: case SVGA_QUERY_NUM_SURFACE_VIEWS: case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BYTES_UPLOADED: /* nothing */ break; default: @@ -876,6 +878,9 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_RESOURCES_MAPPED: sq->begin_count = svga->hud.num_resources_mapped; break; + case SVGA_QUERY_NUM_BYTES_UPLOADED: + sq->begin_count = svga->hud.num_bytes_uploaded; + break; case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: case SVGA_QUERY_NUM_RESOURCES: @@ -966,6 +971,9 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_RESOURCES_MAPPED: sq->end_count = svga->hud.num_resources_mapped; break; + case SVGA_QUERY_NUM_BYTES_UPLOADED: + sq->end_count = svga->hud.num_bytes_uploaded; + break; case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: case SVGA_QUERY_NUM_RESOURCES: @@ -1061,6 +1069,7 @@ svga_get_query_result(struct pipe_context *pipe, case SVGA_QUERY_NUM_FLUSHES: case SVGA_QUERY_NUM_VALIDATIONS: case SVGA_QUERY_NUM_RESOURCES_MAPPED: + case SVGA_QUERY_NUM_BYTES_UPLOADED: case SVGA_QUERY_MAP_BUFFER_TIME: vresult->u64 = sq->end_count - sq->begin_count; break; diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index 71f2f4f2779..449cc149a81 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -80,6 +80,11 @@ svga_buffer_transfer_map(struct pipe_context *pipe, uint8_t *map; int64_t begin = os_time_get(); + assert(box->y == 0); + assert(box->z == 0); + assert(box->height == 1); + assert(box->depth == 1); + transfer = CALLOC_STRUCT(pipe_transfer); if (transfer == NULL) { return NULL; diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 69e5f75e208..8c5cff5abc1 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -429,6 +429,8 @@ svga_buffer_upload_flush(struct svga_context *svga, assert(box->x <= sbuf->b.b.width0); assert(box->x + box->w <= sbuf->b.b.width0); + + svga->hud.num_bytes_uploaded += box->w; } } else { @@ -454,6 +456,8 @@ svga_buffer_upload_flush(struct svga_context *svga, assert(box->x <= sbuf->b.b.width0); assert(box->x + box->w <= sbuf->b.b.width0); + + svga->hud.num_bytes_uploaded += box->w; } } diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index a02d1e495ff..81594777258 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -380,6 +380,12 @@ svga_texture_transfer_map(struct pipe_context *pipe, break; } + if (usage & PIPE_TRANSFER_WRITE) { + /* record texture upload for HUD */ + svga->hud.num_bytes_uploaded += + nblocksx * nblocksy * d * util_format_get_blocksize(texture->format); + } + if (!use_direct_map) { /* Use a DMA buffer */ st->hw_nblocksy = nblocksy; diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index a80bc9b9119..09a3d33552b 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -789,6 +789,8 @@ svga_get_driver_query_info(struct pipe_screen *screen, {"map-buffer-time", SVGA_QUERY_MAP_BUFFER_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS}, {"num-resources-mapped", SVGA_QUERY_NUM_RESOURCES_MAPPED, {0}}, + {"num-bytes-uploaded", SVGA_QUERY_NUM_BYTES_UPLOADED, {0}, + PIPE_DRIVER_QUERY_TYPE_BYTES, PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE}, /* running total counters */ {"memory-used", SVGA_QUERY_MEMORY_USED, {0}, diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c index 611d2c6102f..c5d52bbfd14 100644 --- a/src/gallium/drivers/svga/svga_state_sampler.c +++ b/src/gallium/drivers/svga/svga_state_sampler.c @@ -108,6 +108,9 @@ svga_validate_pipe_sampler_view(struct svga_context *svga, PIPE_BIND_SAMPLER_VIEW); assert(format != SVGA3D_FORMAT_INVALID); + /* Convert the format to a sampler-friendly format, if needed */ + format = svga_sampler_format(format); + if (texture->target == PIPE_BUFFER) { viewDesc.buffer.firstElement = sv->base.u.buf.first_element; viewDesc.buffer.numElements = (sv->base.u.buf.last_element - diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 8b02680c77e..62a51e9a94d 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -456,9 +456,6 @@ trace_screen_create(struct pipe_screen *screen) { struct trace_screen *tr_scr; - if(!screen) - goto error1; - if (!trace_enabled()) goto error1; diff --git a/src/gallium/drivers/vc4/Automake.inc b/src/gallium/drivers/vc4/Automake.inc index 6fa3e190cac..5664c2ab14e 100644 --- a/src/gallium/drivers/vc4/Automake.inc +++ b/src/gallium/drivers/vc4/Automake.inc @@ -6,8 +6,4 @@ TARGET_LIB_DEPS += \ $(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \ $(top_builddir)/src/gallium/drivers/vc4/libvc4.la -if USE_VC4_SIMULATOR -TARGET_CPPFLAGS += -DUSE_VC4_SIMULATOR -endif - endif diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am index f4a57ba3404..a3bf72fc72a 100644 --- a/src/gallium/drivers/vc4/Makefile.am +++ b/src/gallium/drivers/vc4/Makefile.am @@ -23,7 +23,6 @@ include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc if USE_VC4_SIMULATOR -SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1 SIM_LDFLAGS = -lsimpenrose endif diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index 373c9e12d11..0672a92226f 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -509,8 +509,8 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color); nir_ssa_def *src_color[4], *unpacked_dst_color[4]; for (unsigned i = 0; i < 4; i++) { - src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false); - unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false); + src_color[i] = nir_channel(b, intr->src[0].ssa, i); + unpacked_dst_color[i] = nir_channel(b, dst_vec4, i); } vc4_nir_emit_alpha_test_discard(c, b, src_color[3]); diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index 7ea263afb68..1afe52a63f4 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -84,7 +84,7 @@ vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan) static nir_ssa_def * vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan) { - return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false); + return nir_channel(b, nir_unpack_unorm_4x8(b, src), chan); } static nir_ssa_def * @@ -326,9 +326,8 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, intr_comp->const_index[0] = intr->const_index[0] * 4 + i; assert(intr->src[0].is_ssa); - intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b, - intr->src[0].ssa, - &i, 1, false)); + intr_comp->src[0] = + nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i)); nir_builder_instr_insert(b, &intr_comp->instr); } diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index f1bab810eff..07a92266dd2 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -144,6 +144,8 @@ qir_opt_algebraic(struct vc4_compile *c) case QOP_SEL_X_Y_ZC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: + case QOP_SEL_X_Y_CS: + case QOP_SEL_X_Y_CC: if (is_zero(c, inst->src[1])) { /* Replace references to a 0 uniform value * with the SEL_X_0 equivalent. diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index a48dad804e2..197577b6c20 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -987,6 +987,10 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) qir_SF(c, qir_SUB(c, src[0], src[1])); *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0)); break; + case nir_op_uge: + qir_SF(c, qir_SUB(c, src[0], src[1])); + *dest = qir_SEL_X_0_CC(c, qir_uniform_ui(c, ~0)); + break; case nir_op_ilt: qir_SF(c, qir_SUB(c, src[0], src[1])); *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0)); @@ -1167,7 +1171,7 @@ emit_point_size_write(struct vc4_compile *c) struct qreg point_size; if (c->output_point_size_index != -1) - point_size = c->outputs[c->output_point_size_index + 3]; + point_size = c->outputs[c->output_point_size_index]; else point_size = qir_uniform_f(c, 1.0); diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 7894b081b19..f2855e159fc 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -69,10 +69,14 @@ static const struct qir_op_info qir_op_info[] = { [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true }, [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true }, [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true }, + [QOP_SEL_X_0_CS] = { "fsel_x_0_cs", 1, 1, false, true }, + [QOP_SEL_X_0_CC] = { "fsel_x_0_cc", 1, 1, false, true }, [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true }, [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true }, [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true }, [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true }, + [QOP_SEL_X_Y_CS] = { "fsel_x_y_cs", 1, 2, false, true }, + [QOP_SEL_X_Y_CC] = { "fsel_x_y_cc", 1, 2, false, true }, [QOP_RCP] = { "rcp", 1, 1, false, true }, [QOP_RSQ] = { "rsq", 1, 1, false, true }, @@ -218,10 +222,14 @@ qir_depends_on_flags(struct qinst *inst) case QOP_SEL_X_0_NC: case QOP_SEL_X_0_ZS: case QOP_SEL_X_0_ZC: + case QOP_SEL_X_0_CS: + case QOP_SEL_X_0_CC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: case QOP_SEL_X_Y_ZS: case QOP_SEL_X_Y_ZC: + case QOP_SEL_X_Y_CS: + case QOP_SEL_X_Y_CC: return true; default: return false; diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index a92ad93ee07..ddb35e41fcf 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -99,11 +99,15 @@ enum qop { QOP_SEL_X_0_ZC, QOP_SEL_X_0_NS, QOP_SEL_X_0_NC, + QOP_SEL_X_0_CS, + QOP_SEL_X_0_CC, /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ QOP_SEL_X_Y_ZS, QOP_SEL_X_Y_ZC, QOP_SEL_X_Y_NS, QOP_SEL_X_Y_NC, + QOP_SEL_X_Y_CS, + QOP_SEL_X_Y_CC, QOP_FTOI, QOP_ITOF, @@ -567,10 +571,14 @@ QIR_ALU1(SEL_X_0_ZS) QIR_ALU1(SEL_X_0_ZC) QIR_ALU1(SEL_X_0_NS) QIR_ALU1(SEL_X_0_NC) +QIR_ALU1(SEL_X_0_CS) +QIR_ALU1(SEL_X_0_CC) QIR_ALU2(SEL_X_Y_ZS) QIR_ALU2(SEL_X_Y_ZC) QIR_ALU2(SEL_X_Y_NS) QIR_ALU2(SEL_X_Y_NC) +QIR_ALU2(SEL_X_Y_CS) +QIR_ALU2(SEL_X_Y_CC) QIR_ALU2(FMIN) QIR_ALU2(FMAX) QIR_ALU2(FMINABS) diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c index f087c3b81b5..a57e100593c 100644 --- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c @@ -22,14 +22,10 @@ */ /** - * @file vc4_opt_algebraic.c + * @file vc4_qir_lower_uniforms.c * - * This is the optimization pass for miscellaneous changes to instructions - * where we can simplify the operation by some knowledge about the specific - * operations. - * - * Mostly this will be a matter of turning things into MOVs so that they can - * later be copy-propagated out. + * This is the pre-code-generation pass for fixing up instructions that try to + * read from multiple uniform values. */ #include "vc4_qir.h" @@ -85,6 +81,33 @@ is_lowerable_uniform(struct qinst *inst, int i) return true; } +/* Returns the number of different uniform values referenced by the + * instruction. + */ +static uint32_t +qir_get_instruction_uniform_count(struct qinst *inst) +{ + uint32_t count = 0; + + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file != QFILE_UNIF) + continue; + + bool is_duplicate = false; + for (int j = 0; j < i; j++) { + if (inst->src[j].file == QFILE_UNIF && + inst->src[j].index == inst->src[i].index) { + is_duplicate = true; + break; + } + } + if (!is_duplicate) + count++; + } + + return count; +} + void qir_lower_uniforms(struct vc4_compile *c) { @@ -98,13 +121,7 @@ qir_lower_uniforms(struct vc4_compile *c) list_for_each_entry(struct qinst, inst, &c->instructions, link) { uint32_t nsrc = qir_get_op_nsrc(inst->op); - uint32_t count = 0; - for (int i = 0; i < nsrc; i++) { - if (inst->src[i].file == QFILE_UNIF) - count++; - } - - if (count <= 1) + if (qir_get_instruction_uniform_count(inst) <= 1) continue; for (int i = 0; i < nsrc; i++) { @@ -140,23 +157,22 @@ qir_lower_uniforms(struct vc4_compile *c) list_for_each_entry(struct qinst, inst, &c->instructions, link) { uint32_t nsrc = qir_get_op_nsrc(inst->op); - uint32_t count = 0; - for (int i = 0; i < nsrc; i++) { - if (inst->src[i].file == QFILE_UNIF) - count++; - } + uint32_t count = qir_get_instruction_uniform_count(inst); if (count <= 1) continue; + bool removed = false; for (int i = 0; i < nsrc; i++) { if (is_lowerable_uniform(inst, i) && inst->src[i].index == max_index) { inst->src[i] = temp; remove_uniform(ht, unif); - count--; + removed = true; } } + if (removed) + count--; /* If the instruction doesn't need lowering any more, * then drop it from the list. diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 133e1385178..e0d3633da42 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -311,6 +311,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_SEL_X_0_ZC: case QOP_SEL_X_0_NS: case QOP_SEL_X_0_NC: + case QOP_SEL_X_0_CS: + case QOP_SEL_X_0_CC: queue(c, qpu_a_MOV(dst, src[0]) | unpack); set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS + QPU_COND_ZS); @@ -324,6 +326,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_SEL_X_Y_ZC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: + case QOP_SEL_X_Y_CS: + case QOP_SEL_X_Y_CC: queue(c, qpu_a_MOV(dst, src[0])); if (qinst->src[0].pack) *(last_inst(c)) |= unpack; diff --git a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c index 7f11fba2340..85a0c95e851 100644 --- a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c @@ -44,18 +44,28 @@ qir_reorder_uniforms(struct vc4_compile *c) uint32_t next_uniform = 0; list_for_each_entry(struct qinst, inst, &c->instructions, link) { + uint32_t new = ~0; + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { if (inst->src[i].file != QFILE_UNIF) continue; - uint32_t new = next_uniform++; - if (uniform_index_size <= new) { - uniform_index_size = - MAX2(uniform_index_size * 2, 16); - uniform_index = - realloc(uniform_index, - uniform_index_size * - sizeof(uint32_t)); + if (new == ~0) { + new = next_uniform++; + if (uniform_index_size <= new) { + uniform_index_size = + MAX2(uniform_index_size * 2, 16); + uniform_index = + realloc(uniform_index, + uniform_index_size * + sizeof(uint32_t)); + } + } else { + /* If we've got two uniform references in this + * instruction, they need to be the same + * uniform value. + */ + assert(inst->src[i].index == uniform_index[new]); } uniform_index[new] = inst->src[i].index; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 27f358f8fb9..be7447de67d 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -116,6 +116,25 @@ struct pipe_context { unsigned query_type, unsigned index ); + /** + * Create a query object that queries all given query types simultaneously. + * + * This can only be used for those query types for which + * get_driver_query_info indicates that it must be used. Only one batch + * query object may be active at a time. + * + * There may be additional constraints on which query types can be used + * together, in particular those that are implied by + * get_driver_query_group_info. + * + * \param num_queries the number of query types + * \param query_types array of \p num_queries query types + * \return a query object, or NULL on error. + */ + struct pipe_query *(*create_batch_query)( struct pipe_context *pipe, + unsigned num_queries, + unsigned *query_types ); + void (*destroy_query)(struct pipe_context *pipe, struct pipe_query *q); diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 7240154727e..b3c8b9f7360 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics }; /** + * For batch queries. + */ +union pipe_numeric_type_union +{ + uint64_t u64; + uint32_t u32; + float f; +}; + +/** * Query result (returned by pipe_context::get_query_result). */ union pipe_query_result @@ -791,6 +801,8 @@ union pipe_query_result /* PIPE_QUERY_PRIMITIVES_GENERATED */ /* PIPE_QUERY_PRIMITIVES_EMITTED */ /* PIPE_DRIVER_QUERY_TYPE_UINT64 */ + /* PIPE_DRIVER_QUERY_TYPE_BYTES */ + /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */ /* PIPE_DRIVER_QUERY_TYPE_HZ */ uint64_t u64; @@ -809,6 +821,9 @@ union pipe_query_result /* PIPE_QUERY_PIPELINE_STATISTICS */ struct pipe_query_data_pipeline_statistics pipeline_statistics; + + /* batch queries */ + union pipe_numeric_type_union batch[0]; }; union pipe_color_union @@ -829,12 +844,6 @@ enum pipe_driver_query_type PIPE_DRIVER_QUERY_TYPE_HZ = 6, }; -enum pipe_driver_query_group_type -{ - PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0, - PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1, -}; - /* Whether an average value per frame or a cumulative value should be * displayed. */ @@ -844,12 +853,13 @@ enum pipe_driver_query_result_type PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1, }; -union pipe_numeric_type_union -{ - uint64_t u64; - uint32_t u32; - float f; -}; +/** + * Some hardware requires some hardware-specific queries to be submitted + * as batched queries. The corresponding query objects are created using + * create_batch_query, and at most one such query may be active at + * any time. + */ +#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0) struct pipe_driver_query_info { @@ -859,12 +869,12 @@ struct pipe_driver_query_info enum pipe_driver_query_type type; enum pipe_driver_query_result_type result_type; unsigned group_id; + unsigned flags; }; struct pipe_driver_query_group_info { const char *name; - enum pipe_driver_query_group_type type; unsigned max_active_queries; unsigned num_queries; }; diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 5f0690e5ae6..d9c9f9b5cc2 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -359,6 +359,36 @@ enum pipe_format { PIPE_FORMAT_ETC2_RG11_UNORM = 277, PIPE_FORMAT_ETC2_RG11_SNORM = 278, + PIPE_FORMAT_ASTC_4x4 = 279, + PIPE_FORMAT_ASTC_5x4 = 280, + PIPE_FORMAT_ASTC_5x5 = 281, + PIPE_FORMAT_ASTC_6x5 = 282, + PIPE_FORMAT_ASTC_6x6 = 283, + PIPE_FORMAT_ASTC_8x5 = 284, + PIPE_FORMAT_ASTC_8x6 = 285, + PIPE_FORMAT_ASTC_8x8 = 286, + PIPE_FORMAT_ASTC_10x5 = 287, + PIPE_FORMAT_ASTC_10x6 = 288, + PIPE_FORMAT_ASTC_10x8 = 289, + PIPE_FORMAT_ASTC_10x10 = 290, + PIPE_FORMAT_ASTC_12x10 = 291, + PIPE_FORMAT_ASTC_12x12 = 292, + + PIPE_FORMAT_ASTC_4x4_SRGB = 293, + PIPE_FORMAT_ASTC_5x4_SRGB = 294, + PIPE_FORMAT_ASTC_5x5_SRGB = 295, + PIPE_FORMAT_ASTC_6x5_SRGB = 296, + PIPE_FORMAT_ASTC_6x6_SRGB = 297, + PIPE_FORMAT_ASTC_8x5_SRGB = 298, + PIPE_FORMAT_ASTC_8x6_SRGB = 299, + PIPE_FORMAT_ASTC_8x8_SRGB = 300, + PIPE_FORMAT_ASTC_10x5_SRGB = 301, + PIPE_FORMAT_ASTC_10x6_SRGB = 302, + PIPE_FORMAT_ASTC_10x8_SRGB = 303, + PIPE_FORMAT_ASTC_10x10_SRGB = 304, + PIPE_FORMAT_ASTC_12x10_SRGB = 305, + PIPE_FORMAT_ASTC_12x12_SRGB = 306, + PIPE_FORMAT_COUNT }; diff --git a/src/gallium/include/state_tracker/drm_driver.h b/src/gallium/include/state_tracker/drm_driver.h index 740c4bbe1a6..959a7625e30 100644 --- a/src/gallium/include/state_tracker/drm_driver.h +++ b/src/gallium/include/state_tracker/drm_driver.h @@ -117,10 +117,4 @@ struct drm_driver_descriptor driver_descriptor = { \ .configuration = (conf), \ }; -extern struct pipe_screen *dd_create_screen(int fd); - -extern const char *dd_driver_name(void); - -extern const struct drm_conf_ret *dd_configuration(enum drm_conf conf); - #endif diff --git a/src/gallium/include/state_tracker/sw_driver.h b/src/gallium/include/state_tracker/sw_driver.h new file mode 100644 index 00000000000..0eb2b44d6fd --- /dev/null +++ b/src/gallium/include/state_tracker/sw_driver.h @@ -0,0 +1,21 @@ + +#ifndef _SW_DRIVER_H_ +#define _SW_DRIVER_H_ + +#include "pipe/p_compiler.h" + +struct pipe_screen; +struct sw_winsys; + +struct sw_driver_descriptor +{ + struct pipe_screen *(*create_screen)(struct sw_winsys *ws); + struct { + const char * const name; + struct sw_winsys *(*create_winsys)(); + } winsys[]; +}; + +extern struct sw_driver_descriptor swrast_driver_descriptor; + +#endif diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am index fd0ccf88cc5..3c9421692fc 100644 --- a/src/gallium/state_trackers/clover/Makefile.am +++ b/src/gallium/state_trackers/clover/Makefile.am @@ -1,8 +1,6 @@ include Makefile.sources AM_CPPFLAGS = \ - $(GALLIUM_PIPE_LOADER_DEFINES) \ - -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/gallium/include \ diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index 6efff79c7f4..1be2f6413f4 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -41,7 +41,7 @@ namespace { device::device(clover::platform &platform, pipe_loader_device *ldev) : platform(platform), ldev(ldev) { - pipe = pipe_loader_create_screen(ldev, PIPE_SEARCH_DIR); + pipe = pipe_loader_create_screen(ldev); if (!pipe || !pipe->get_param(pipe, PIPE_CAP_COMPUTE)) { if (pipe) pipe->destroy(pipe); diff --git a/src/gallium/state_trackers/dri/Android.mk b/src/gallium/state_trackers/dri/Android.mk index 43f0de9b464..f0eb18dcacf 100644 --- a/src/gallium/state_trackers/dri/Android.mk +++ b/src/gallium/state_trackers/dri/Android.mk @@ -29,9 +29,6 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := $(common_SOURCES) -LOCAL_CFLAGS := \ - -DGALLIUM_STATIC_TARGETS=1 \ - LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa \ diff --git a/src/gallium/state_trackers/dri/Makefile.am b/src/gallium/state_trackers/dri/Makefile.am index 9f4deba0c1e..74bccaa6416 100644 --- a/src/gallium/state_trackers/dri/Makefile.am +++ b/src/gallium/state_trackers/dri/Makefile.am @@ -25,8 +25,6 @@ include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc AM_CPPFLAGS = \ - $(GALLIUM_PIPE_LOADER_DEFINES) \ - -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa \ @@ -36,15 +34,10 @@ AM_CPPFLAGS = \ $(LIBDRM_CFLAGS) \ $(VISIBILITY_CFLAGS) -if HAVE_GALLIUM_STATIC_TARGETS -AM_CPPFLAGS += \ - -DGALLIUM_STATIC_TARGETS=1 - if HAVE_GALLIUM_SOFTPIPE AM_CPPFLAGS += \ -DGALLIUM_SOFTPIPE endif # HAVE_GALLIUM_SOFTPIPE -endif # HAVE_GALLIUM_STATIC_TARGETS noinst_LTLIBRARIES = libdri.la libdri_la_SOURCES = $(common_SOURCES) diff --git a/src/gallium/state_trackers/dri/SConscript b/src/gallium/state_trackers/dri/SConscript index 657300baf13..fa48fb8a0d7 100644 --- a/src/gallium/state_trackers/dri/SConscript +++ b/src/gallium/state_trackers/dri/SConscript @@ -15,10 +15,6 @@ env.Append(CPPPATH = [ xmlpool_options.dir.dir, # Dir to generated xmlpool/options.h ]) -env.Append(CPPDEFINES = [ - ('GALLIUM_STATIC_TARGETS', '1'), -]) - sources = env.ParseSourceList('Makefile.sources', 'common_SOURCES') # XXX: if HAVE_DRISW diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index 019414b56fe..beb0866c83f 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -1444,8 +1444,8 @@ dri2_init_screen(__DRIscreen * sPriv) const __DRIconfig **configs; struct dri_screen *screen; struct pipe_screen *pscreen = NULL; - const struct drm_conf_ret *throttle_ret = NULL; - const struct drm_conf_ret *dmabuf_ret = NULL; + const struct drm_conf_ret *throttle_ret; + const struct drm_conf_ret *dmabuf_ret; screen = CALLOC_STRUCT(dri_screen); if (!screen) @@ -1457,19 +1457,14 @@ dri2_init_screen(__DRIscreen * sPriv) sPriv->driverPrivate = (void *)screen; -#if GALLIUM_STATIC_TARGETS - pscreen = dd_create_screen(screen->fd); + if (pipe_loader_drm_probe_fd(&screen->dev, dup(screen->fd))) + pscreen = pipe_loader_create_screen(screen->dev); - throttle_ret = dd_configuration(DRM_CONF_THROTTLE); - dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD); -#else - if (pipe_loader_drm_probe_fd(&screen->dev, screen->fd)) { - pscreen = pipe_loader_create_screen(screen->dev, PIPE_SEARCH_DIR); + if (!pscreen) + goto fail; - throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE); - dmabuf_ret = pipe_loader_configuration(screen->dev, DRM_CONF_SHARE_FD); - } -#endif // GALLIUM_STATIC_TARGETS + throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE); + dmabuf_ret = pipe_loader_configuration(screen->dev, DRM_CONF_SHARE_FD); if (throttle_ret && throttle_ret->val.val_int != -1) { screen->throttling_enabled = TRUE; @@ -1486,20 +1481,14 @@ dri2_init_screen(__DRIscreen * sPriv) } } - if (pscreen && pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) { + if (pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) { sPriv->extensions = dri_robust_screen_extensions; screen->has_reset_status_query = true; } else sPriv->extensions = dri_screen_extensions; - /* dri_init_screen_helper checks pscreen for us */ - -#if GALLIUM_STATIC_TARGETS - configs = dri_init_screen_helper(screen, pscreen, dd_driver_name()); -#else configs = dri_init_screen_helper(screen, pscreen, screen->dev->driver_name); -#endif // GALLIUM_STATIC_TARGETS if (!configs) goto fail; @@ -1511,10 +1500,8 @@ dri2_init_screen(__DRIscreen * sPriv) return configs; fail: dri_destroy_screen_helper(screen); -#if !GALLIUM_STATIC_TARGETS if (screen->dev) pipe_loader_release(&screen->dev, 1); -#endif // !GALLIUM_STATIC_TARGETS FREE(screen); return NULL; } @@ -1527,7 +1514,6 @@ fail: static const __DRIconfig ** dri_kms_init_screen(__DRIscreen * sPriv) { -#if GALLIUM_STATIC_TARGETS #if defined(GALLIUM_SOFTPIPE) const __DRIconfig **configs; struct dri_screen *screen; @@ -1543,7 +1529,11 @@ dri_kms_init_screen(__DRIscreen * sPriv) sPriv->driverPrivate = (void *)screen; - pscreen = kms_swrast_create_screen(screen->fd); + if (pipe_loader_sw_probe_kms(&screen->dev, dup(screen->fd))) + pscreen = pipe_loader_create_screen(screen->dev); + + if (!pscreen) + goto fail; if (drmGetCap(sPriv->fd, DRM_CAP_PRIME, &cap) == 0 && (cap & DRM_PRIME_CAP_IMPORT)) { @@ -1553,7 +1543,6 @@ dri_kms_init_screen(__DRIscreen * sPriv) sPriv->extensions = dri_screen_extensions; - /* dri_init_screen_helper checks pscreen for us */ configs = dri_init_screen_helper(screen, pscreen, "swrast"); if (!configs) goto fail; @@ -1566,9 +1555,10 @@ dri_kms_init_screen(__DRIscreen * sPriv) return configs; fail: dri_destroy_screen_helper(screen); + if (screen->dev) + pipe_loader_release(&screen->dev, 1); FREE(screen); #endif // GALLIUM_SOFTPIPE -#endif // GALLIUM_STATIC_TARGETS return NULL; } diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index c4c2d9c8fb1..2ac55c88926 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -390,9 +390,7 @@ dri_destroy_screen(__DRIscreen * sPriv) dri_destroy_screen_helper(screen); -#if !GALLIUM_STATIC_TARGETS pipe_loader_release(&screen->dev, 1); -#endif // !GALLIUM_STATIC_TARGETS free(screen); sPriv->driverPrivate = NULL; @@ -416,11 +414,6 @@ dri_init_screen_helper(struct dri_screen *screen, const char* driver_name) { screen->base.screen = pscreen; - if (!screen->base.screen) { - debug_printf("%s: failed to create pipe_screen\n", __FUNCTION__); - return NULL; - } - screen->base.get_egl_image = dri_get_egl_image; screen->base.get_param = dri_get_param; diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c index 753c59d696a..b85a73c57d2 100644 --- a/src/gallium/state_trackers/dri/drisw.c +++ b/src/gallium/state_trackers/dri/drisw.c @@ -39,6 +39,7 @@ #include "util/u_inlines.h" #include "util/u_box.h" #include "pipe/p_context.h" +#include "pipe-loader/pipe_loader.h" #include "state_tracker/drisw_api.h" #include "state_tracker/st_context.h" @@ -382,7 +383,7 @@ drisw_init_screen(__DRIscreen * sPriv) { const __DRIconfig **configs; struct dri_screen *screen; - struct pipe_screen *pscreen; + struct pipe_screen *pscreen = NULL; screen = CALLOC_STRUCT(dri_screen); if (!screen) @@ -396,8 +397,11 @@ drisw_init_screen(__DRIscreen * sPriv) sPriv->driverPrivate = (void *)screen; sPriv->extensions = drisw_screen_extensions; - pscreen = drisw_create_screen(&drisw_lf); - /* dri_init_screen_helper checks pscreen for us */ + if (pipe_loader_sw_probe_dri(&screen->dev, &drisw_lf)) + pscreen = pipe_loader_create_screen(screen->dev); + + if (!pscreen) + goto fail; configs = dri_init_screen_helper(screen, pscreen, "swrast"); if (!configs) @@ -406,6 +410,8 @@ drisw_init_screen(__DRIscreen * sPriv) return configs; fail: dri_destroy_screen_helper(screen); + if (screen->dev) + pipe_loader_release(&screen->dev, 1); FREE(screen); return NULL; } diff --git a/src/gallium/state_trackers/omx/entrypoint.c b/src/gallium/state_trackers/omx/entrypoint.c index 7df90b16a84..da9ca104d93 100644 --- a/src/gallium/state_trackers/omx/entrypoint.c +++ b/src/gallium/state_trackers/omx/entrypoint.c @@ -33,6 +33,7 @@ #include <assert.h> #include <string.h> +#include <stdbool.h> #include <X11/Xlib.h> @@ -73,29 +74,30 @@ int omx_component_library_Setup(stLoaderComponentType **stComponents) struct vl_screen *omx_get_screen(void) { + static bool first_time = true; pipe_mutex_lock(omx_lock); - if (!omx_display) { - omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL); - if (!omx_render_node) { - omx_display = XOpenDisplay(NULL); - if (!omx_display) - goto error; - } - } - if (!omx_screen) { + if (first_time) { + omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL); + first_time = false; + } if (omx_render_node) { drm_fd = loader_open_device(omx_render_node); if (drm_fd < 0) goto error; + omx_screen = vl_drm_screen_create(drm_fd); if (!omx_screen) { close(drm_fd); goto error; } } else { - omx_screen = vl_screen_create(omx_display, 0); + omx_display = XOpenDisplay(NULL); + if (!omx_display) + goto error; + + omx_screen = vl_dri2_screen_create(omx_display, 0); if (!omx_screen) { XCloseDisplay(omx_display); goto error; @@ -117,16 +119,13 @@ void omx_put_screen(void) { pipe_mutex_lock(omx_lock); if ((--omx_usecount) == 0) { - if (!omx_render_node) { - vl_screen_destroy(omx_screen); - if (omx_display) - XCloseDisplay(omx_display); - } else { - close(drm_fd); - vl_drm_screen_destroy(omx_screen); - } + omx_screen->destroy(omx_screen); omx_screen = NULL; - omx_display = NULL; + + if (omx_render_node) + close(drm_fd); + else + XCloseDisplay(omx_display); } pipe_mutex_unlock(omx_lock); } diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c index 98c4104da48..f0051e5f6a5 100644 --- a/src/gallium/state_trackers/va/context.c +++ b/src/gallium/state_trackers/va/context.c @@ -102,7 +102,6 @@ PUBLIC VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP ctx) { vlVaDriver *drv; - int drm_fd; struct drm_state *drm_info; if (!ctx) @@ -119,26 +118,20 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx) return VA_STATUS_ERROR_UNIMPLEMENTED; case VA_DISPLAY_GLX: case VA_DISPLAY_X11: - drv->vscreen = vl_screen_create(ctx->native_dpy, ctx->x11_screen); + drv->vscreen = vl_dri2_screen_create(ctx->native_dpy, ctx->x11_screen); if (!drv->vscreen) goto error_screen; break; case VA_DISPLAY_DRM: case VA_DISPLAY_DRM_RENDERNODES: { drm_info = (struct drm_state *) ctx->drm_state; - if (!drm_info) { - FREE(drv); - return VA_STATUS_ERROR_INVALID_PARAMETER; - } - - drm_fd = drm_info->fd; - if (drm_fd < 0) { + if (!drm_info || drm_info->fd < 0) { FREE(drv); return VA_STATUS_ERROR_INVALID_PARAMETER; } - drv->vscreen = vl_drm_screen_create(drm_fd); + drv->vscreen = vl_drm_screen_create(drm_info->fd); if (!drv->vscreen) goto error_screen; } @@ -182,10 +175,7 @@ error_htab: drv->pipe->destroy(drv->pipe); error_pipe: - if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11) - vl_screen_destroy(drv->vscreen); - else - vl_drm_screen_destroy(drv->vscreen); + drv->vscreen->destroy(drv->vscreen); error_screen: FREE(drv); @@ -322,10 +312,7 @@ vlVaTerminate(VADriverContextP ctx) vl_compositor_cleanup_state(&drv->cstate); vl_compositor_cleanup(&drv->compositor); drv->pipe->destroy(drv->pipe); - if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11) - vl_screen_destroy(drv->vscreen); - else - vl_drm_screen_destroy(drv->vscreen); + drv->vscreen->destroy(drv->vscreen); handle_table_destroy(drv->htab); FREE(drv); diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c index 5e7841a0521..a37a9b791db 100644 --- a/src/gallium/state_trackers/va/picture.c +++ b/src/gallium/state_trackers/va/picture.c @@ -763,7 +763,7 @@ handleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *context, v dst_rect.x1 = pipeline_param->output_region->x + pipeline_param->output_region->width; dst_rect.y1 = pipeline_param->output_region->y + pipeline_param->output_region->height; - dirty_area = vl_screen_get_dirty_area(drv->vscreen); + dirty_area = drv->vscreen->get_dirty_area(drv->vscreen); vl_compositor_clear_layers(&drv->cstate); vl_compositor_set_buffer_layer(&drv->cstate, &drv->compositor, 0, src_surface->buffer, &src_rect, NULL, VL_COMPOSITOR_WEAVE); diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c index 589d6860b6a..c052c8f2284 100644 --- a/src/gallium/state_trackers/va/surface.c +++ b/src/gallium/state_trackers/va/surface.c @@ -229,6 +229,7 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s struct pipe_screen *screen; struct pipe_resource *tex; struct pipe_surface surf_templ, *surf_draw; + struct vl_screen *vscreen; struct u_rect src_rect, *dirty_area; struct u_rect dst_rect = {destx, destx + destw, desty, desty + desth}; VAStatus status; @@ -242,17 +243,18 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s return VA_STATUS_ERROR_INVALID_SURFACE; screen = drv->pipe->screen; + vscreen = drv->vscreen; if(surf->fence) { screen->fence_finish(screen, surf->fence, PIPE_TIMEOUT_INFINITE); screen->fence_reference(screen, &surf->fence, NULL); } - tex = vl_screen_texture_from_drawable(drv->vscreen, (Drawable)draw); + tex = vscreen->texture_from_drawable(vscreen, draw); if (!tex) return VA_STATUS_ERROR_INVALID_DISPLAY; - dirty_area = vl_screen_get_dirty_area(drv->vscreen); + dirty_area = vscreen->get_dirty_area(vscreen); memset(&surf_templ, 0, sizeof(surf_templ)); surf_templ.format = tex->format; @@ -276,11 +278,8 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s if (status) return status; - screen->flush_frontbuffer - ( - screen, tex, 0, 0, - vl_screen_get_private(drv->vscreen), NULL - ); + screen->flush_frontbuffer(screen, tex, 0, 0, + vscreen->get_private(vscreen), NULL); screen->fence_reference(screen, &surf->fence, NULL); drv->pipe->flush(drv->pipe, &surf->fence, 0); diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c index 31c95054f56..c70cc6e2752 100644 --- a/src/gallium/state_trackers/vdpau/device.c +++ b/src/gallium/state_trackers/vdpau/device.c @@ -63,7 +63,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, pipe_reference_init(&dev->reference, 1); - dev->vscreen = vl_screen_create(display, screen); + dev->vscreen = vl_dri2_screen_create(display, screen); if (!dev->vscreen) { ret = VDP_STATUS_RESOURCES; goto no_vscreen; @@ -136,7 +136,7 @@ no_handle: no_resource: dev->context->destroy(dev->context); no_context: - vl_screen_destroy(dev->vscreen); + dev->vscreen->destroy(dev->vscreen); no_vscreen: FREE(dev); no_dev: @@ -227,7 +227,7 @@ vlVdpDeviceFree(vlVdpDevice *dev) vl_compositor_cleanup(&dev->compositor); pipe_sampler_view_reference(&dev->dummy_sv, NULL); dev->context->destroy(dev->context); - vl_screen_destroy(dev->vscreen); + dev->vscreen->destroy(dev->vscreen); FREE(dev); vlDestroyHTAB(); } diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c index e53303708b2..e7f387e6173 100644 --- a/src/gallium/state_trackers/vdpau/presentation.c +++ b/src/gallium/state_trackers/vdpau/presentation.c @@ -186,7 +186,8 @@ vlVdpPresentationQueueGetTime(VdpPresentationQueue presentation_queue, return VDP_STATUS_INVALID_HANDLE; pipe_mutex_lock(pq->device->mutex); - *current_time = vl_screen_get_timestamp(pq->device->vscreen, pq->drawable); + *current_time = pq->device->vscreen->get_timestamp(pq->device->vscreen, + (void *)pq->drawable); pipe_mutex_unlock(pq->device->mutex); return VDP_STATUS_OK; @@ -214,6 +215,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, struct vl_compositor *compositor; struct vl_compositor_state *cstate; + struct vl_screen *vscreen; pq = vlGetDataHTAB(presentation_queue); if (!pq) @@ -226,15 +228,16 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, pipe = pq->device->context; compositor = &pq->device->compositor; cstate = &pq->cstate; + vscreen = pq->device->vscreen; pipe_mutex_lock(pq->device->mutex); - tex = vl_screen_texture_from_drawable(pq->device->vscreen, pq->drawable); + tex = vscreen->texture_from_drawable(vscreen, (void *)pq->drawable); if (!tex) { pipe_mutex_unlock(pq->device->mutex); return VDP_STATUS_INVALID_HANDLE; } - dirty_area = vl_screen_get_dirty_area(pq->device->vscreen); + dirty_area = vscreen->get_dirty_area(vscreen); memset(&surf_templ, 0, sizeof(surf_templ)); surf_templ.format = tex->format; @@ -267,12 +270,9 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, vl_compositor_render(cstate, compositor, surf_draw, dirty_area, true); } - vl_screen_set_next_timestamp(pq->device->vscreen, earliest_presentation_time); - pipe->screen->flush_frontbuffer - ( - pipe->screen, tex, 0, 0, - vl_screen_get_private(pq->device->vscreen), NULL - ); + vscreen->set_next_timestamp(vscreen, earliest_presentation_time); + pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0, + vscreen->get_private(vscreen), NULL); pipe->screen->fence_reference(pipe->screen, &surf->fence, NULL); pipe->flush(pipe, &surf->fence, 0); diff --git a/src/gallium/state_trackers/xa/Makefile.am b/src/gallium/state_trackers/xa/Makefile.am index 5051e8246e3..968778f995c 100644 --- a/src/gallium/state_trackers/xa/Makefile.am +++ b/src/gallium/state_trackers/xa/Makefile.am @@ -28,15 +28,6 @@ AM_CFLAGS = \ $(GALLIUM_CFLAGS) \ $(VISIBILITY_CFLAGS) -AM_CPPFLAGS = \ - $(GALLIUM_PIPE_LOADER_DEFINES) \ - -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" - -if HAVE_GALLIUM_STATIC_TARGETS -AM_CPPFLAGS += \ - -DGALLIUM_STATIC_TARGETS=1 -endif - xa_includedir = $(includedir) xa_include_HEADERS = \ xa_composite.h \ diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c index 4fdbdc96ae6..faa630c144b 100644 --- a/src/gallium/state_trackers/xa/xa_tracker.c +++ b/src/gallium/state_trackers/xa/xa_tracker.c @@ -152,21 +152,13 @@ xa_tracker_create(int drm_fd) struct xa_tracker *xa = calloc(1, sizeof(struct xa_tracker)); enum xa_surface_type stype; unsigned int num_formats; - int loader_fd; if (!xa) return NULL; -#if GALLIUM_STATIC_TARGETS - xa->screen = dd_create_screen(drm_fd); - (void) loader_fd; /* silence unused var warning */ -#else - loader_fd = dup(drm_fd); - if (loader_fd == -1) - return NULL; - if (pipe_loader_drm_probe_fd(&xa->dev, loader_fd)) - xa->screen = pipe_loader_create_screen(xa->dev, PIPE_SEARCH_DIR); -#endif + if (pipe_loader_drm_probe_fd(&xa->dev, dup(drm_fd))) + xa->screen = pipe_loader_create_screen(xa->dev); + if (!xa->screen) goto out_no_screen; @@ -214,10 +206,8 @@ xa_tracker_create(int drm_fd) out_no_pipe: xa->screen->destroy(xa->screen); out_no_screen: -#if !GALLIUM_STATIC_TARGETS if (xa->dev) pipe_loader_release(&xa->dev, 1); -#endif free(xa); return NULL; } @@ -228,9 +218,7 @@ xa_tracker_destroy(struct xa_tracker *xa) free(xa->supported_formats); xa_context_destroy(xa->default_ctx); xa->screen->destroy(xa->screen); -#if !GALLIUM_STATIC_TARGETS pipe_loader_release(&xa->dev, 1); -#endif free(xa); } diff --git a/src/gallium/state_trackers/xvmc/context.c b/src/gallium/state_trackers/xvmc/context.c index 4702b44d1f4..a6991ab8d61 100644 --- a/src/gallium/state_trackers/xvmc/context.c +++ b/src/gallium/state_trackers/xvmc/context.c @@ -229,7 +229,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, return BadAlloc; /* TODO: Reuse screen if process creates another context */ - vscreen = vl_screen_create(dpy, scrn); + vscreen = vl_dri2_screen_create(dpy, scrn); if (!vscreen) { XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL screen.\n"); @@ -240,7 +240,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, pipe = vscreen->pscreen->context_create(vscreen->pscreen, vscreen, 0); if (!pipe) { XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL context.\n"); - vl_screen_destroy(vscreen); + vscreen->destroy(vscreen); FREE(context_priv); return BadAlloc; } @@ -258,7 +258,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, if (!context_priv->decoder) { XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL decoder.\n"); pipe->destroy(pipe); - vl_screen_destroy(vscreen); + vscreen->destroy(vscreen); FREE(context_priv); return BadAlloc; } @@ -267,7 +267,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL compositor.\n"); context_priv->decoder->destroy(context_priv->decoder); pipe->destroy(pipe); - vl_screen_destroy(vscreen); + vscreen->destroy(vscreen); FREE(context_priv); return BadAlloc; } @@ -277,7 +277,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, vl_compositor_cleanup(&context_priv->compositor); context_priv->decoder->destroy(context_priv->decoder); pipe->destroy(pipe); - vl_screen_destroy(vscreen); + vscreen->destroy(vscreen); FREE(context_priv); return BadAlloc; } @@ -332,7 +332,7 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context) vl_compositor_cleanup_state(&context_priv->cstate); vl_compositor_cleanup(&context_priv->compositor); context_priv->pipe->destroy(context_priv->pipe); - vl_screen_destroy(context_priv->vscreen); + context_priv->vscreen->destroy(context_priv->vscreen); FREE(context_priv); context->privData = NULL; diff --git a/src/gallium/state_trackers/xvmc/surface.c b/src/gallium/state_trackers/xvmc/surface.c index 15eae59ff6e..199712ba168 100644 --- a/src/gallium/state_trackers/xvmc/surface.c +++ b/src/gallium/state_trackers/xvmc/surface.c @@ -355,6 +355,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, struct pipe_context *pipe; struct vl_compositor *compositor; struct vl_compositor_state *cstate; + struct vl_screen *vscreen; XvMCSurfacePrivate *surface_priv; XvMCContextPrivate *context_priv; @@ -386,9 +387,10 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, pipe = context_priv->pipe; compositor = &context_priv->compositor; cstate = &context_priv->cstate; + vscreen = context_priv->vscreen; - tex = vl_screen_texture_from_drawable(context_priv->vscreen, drawable); - dirty_area = vl_screen_get_dirty_area(context_priv->vscreen); + tex = vscreen->texture_from_drawable(vscreen, (void *)drawable); + dirty_area = vscreen->get_dirty_area(vscreen); memset(&surf_templ, 0, sizeof(surf_templ)); surf_templ.format = tex->format; @@ -444,11 +446,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface); - pipe->screen->flush_frontbuffer - ( - pipe->screen, tex, 0, 0, - vl_screen_get_private(context_priv->vscreen), NULL - ); + pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0, + vscreen->get_private(vscreen), NULL); if(dump_window == -1) { dump_window = debug_get_num_option("XVMC_DUMP", 0); diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am index b5221472ef0..d1d9829b6c5 100644 --- a/src/gallium/targets/d3dadapter9/Makefile.am +++ b/src/gallium/targets/d3dadapter9/Makefile.am @@ -34,19 +34,6 @@ AM_CFLAGS = \ $(GALLIUM_TARGET_CFLAGS) \ $(VISIBILITY_CFLAGS) -if HAVE_GALLIUM_STATIC_TARGETS -AM_CPPFLAGS = \ - -DNINE_TARGET \ - -DGALLIUM_STATIC_TARGETS=1 - -else - -AM_CPPFLAGS = \ - -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \ - $(GALLIUM_PIPE_LOADER_DEFINES) - -endif - ninedir = $(D3D_DRIVER_INSTALL_DIR) nine_LTLIBRARIES = d3dadapter9.la @@ -78,7 +65,6 @@ d3dadapter9_la_LIBADD = \ $(top_builddir)/src/glsl/libnir.la \ $(top_builddir)/src/gallium/state_trackers/nine/libninetracker.la \ $(top_builddir)/src/util/libmesautil.la \ - $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ $(EXPAT_LIBS) \ $(GALLIUM_COMMON_LIB_DEPS) @@ -87,7 +73,7 @@ EXTRA_DIST = d3dadapter9.sym TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc @@ -111,14 +97,16 @@ include $(top_srcdir)/src/gallium/drivers/llvmpipe/Automake.inc if HAVE_GALLIUM_STATIC_TARGETS d3dadapter9_la_CPPFLAGS = $(AM_CPPFLAGS) $(TARGET_CPPFLAGS) -d3dadapter9_la_LIBADD += $(TARGET_LIB_DEPS) \ +d3dadapter9_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS d3dadapter9_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c index fabc820f268..ad712db05eb 100644 --- a/src/gallium/targets/d3dadapter9/drm.c +++ b/src/gallium/targets/d3dadapter9/drm.c @@ -20,6 +20,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* XXX: header order is slightly screwy here */ #include "loader.h" #include "adapter9.h" @@ -29,8 +30,7 @@ #include "pipe/p_screen.h" #include "pipe/p_state.h" -#include "target-helpers/inline_drm_helper.h" -#include "target-helpers/inline_sw_helper.h" +#include "target-helpers/drm_helper.h" #include "state_tracker/drm_driver.h" #include "d3dadapter/d3dadapter9.h" @@ -91,53 +91,15 @@ drm_destroy( struct d3dadapter9_context *ctx ) else if (ctx->hal) ctx->hal->destroy(ctx->hal); -#if !GALLIUM_STATIC_TARGETS if (drm->swdev) pipe_loader_release(&drm->swdev, 1); if (drm->dev) pipe_loader_release(&drm->dev, 1); -#endif close(drm->fd); FREE(ctx); } -/* read a DWORD in the form 0xnnnnnnnn, which is how sysfs pci id stuff is - * formatted. */ -static inline DWORD -read_file_dword( const char *name ) -{ - char buf[32]; - int fd, r; - - fd = open(name, O_RDONLY); - if (fd < 0) { - DBG("Unable to get PCI information from `%s'\n", name); - return 0; - } - - r = read(fd, buf, 32); - close(fd); - - return (r > 0) ? (DWORD)strtol(buf, NULL, 0) : 0; -} - -/* sysfs doesn't expose the revision as its own file, so this function grabs a - * dword at an offset in the raw PCI header. The reason this isn't used for all - * data is that the kernel will make corrections but not expose them in the raw - * header bytes. */ -static inline DWORD -read_config_dword( int fd, - unsigned offset ) -{ - DWORD r = 0; - - if (lseek(fd, offset, SEEK_SET) != offset) { return 0; } - if (read(fd, &r, 4) != 4) { return 0; } - - return r; -} - static inline void get_bus_info( int fd, DWORD *vendorid, @@ -215,26 +177,16 @@ drm_create_adapter( int fd, driOptionCache userInitOptions; int throttling_value_user = -2; -#if !GALLIUM_STATIC_TARGETS - const char *paths[] = { - getenv("D3D9_DRIVERS_PATH"), - getenv("D3D9_DRIVERS_DIR"), - PIPE_SEARCH_DIR - }; -#endif - if (!ctx) { return E_OUTOFMEMORY; } ctx->base.destroy = drm_destroy; + /* Although the fd is provided from external source, mesa/nine + * takes ownership of it. */ fd = loader_get_user_preferred_fd(fd, &different_device); ctx->fd = fd; ctx->base.linear_framebuffer = !!different_device; -#if GALLIUM_STATIC_TARGETS - ctx->base.hal = dd_create_screen(fd); -#else - /* use pipe-loader to dlopen appropriate drm driver */ if (!pipe_loader_drm_probe_fd(&ctx->dev, fd)) { ERR("Failed to probe drm fd %d.\n", fd); FREE(ctx); @@ -242,26 +194,15 @@ drm_create_adapter( int fd, return D3DERR_DRIVERINTERNALERROR; } - /* use pipe-loader to create a drm screen (hal) */ - ctx->base.hal = NULL; - for (i = 0; !ctx->base.hal && i < Elements(paths); ++i) { - if (!paths[i]) { continue; } - ctx->base.hal = pipe_loader_create_screen(ctx->dev, paths[i]); - } -#endif + ctx->base.hal = pipe_loader_create_screen(ctx->dev); if (!ctx->base.hal) { ERR("Unable to load requested driver.\n"); drm_destroy(&ctx->base); return D3DERR_DRIVERINTERNALERROR; } -#if GALLIUM_STATIC_TARGETS - dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD); - throttle_ret = dd_configuration(DRM_CONF_THROTTLE); -#else dmabuf_ret = pipe_loader_configuration(ctx->dev, DRM_CONF_SHARE_FD); throttle_ret = pipe_loader_configuration(ctx->dev, DRM_CONF_THROTTLE); -#endif // GALLIUM_STATIC_TARGETS if (!dmabuf_ret || !dmabuf_ret->val.val_bool) { ERR("The driver is not capable of dma-buf sharing." "Abandon to load nine state tracker\n"); @@ -308,18 +249,10 @@ drm_create_adapter( int fd, driDestroyOptionCache(&userInitOptions); driDestroyOptionInfo(&defaultInitOptions); -#if GALLIUM_STATIC_TARGETS - ctx->base.ref = ninesw_create_screen(ctx->base.hal); -#else /* wrap it to create a software screen that can share resources */ - if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal)) { - ctx->base.ref = NULL; - for (i = 0; !ctx->base.ref && i < Elements(paths); ++i) { - if (!paths[i]) { continue; } - ctx->base.ref = pipe_loader_create_screen(ctx->swdev, paths[i]); - } - } -#endif + if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal)) + ctx->base.ref = pipe_loader_create_screen(ctx->swdev); + if (!ctx->base.ref) { ERR("Couldn't wrap drm screen to swrast screen. Software devices " "will be unavailable.\n"); diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index a33d7f83671..2d9610ee9ab 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -35,7 +35,7 @@ endif LOCAL_SRC_FILES := target.c -LOCAL_CFLAGS := -DDRI_TARGET +LOCAL_CFLAGS := LOCAL_SHARED_LIBRARIES := \ libdl \ @@ -108,6 +108,7 @@ LOCAL_STATIC_LIBRARIES := \ libmesa_dri_common \ libmesa_megadriver_stub \ libmesa_gallium \ + libmesa_pipe_loader \ libmesa_util \ libmesa_loader \ diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am index 95efdd4451c..2666524fbfe 100644 --- a/src/gallium/targets/dri/Makefile.am +++ b/src/gallium/targets/dri/Makefile.am @@ -10,7 +10,6 @@ AM_CFLAGS = \ AM_CPPFLAGS = \ $(DEFINES) \ - -DDRI_TARGET \ -DGALLIUM_DDEBUG \ -DGALLIUM_NOOP \ -DGALLIUM_RBUG \ @@ -65,7 +64,7 @@ EXTRA_DIST = \ TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc @@ -92,14 +91,16 @@ if HAVE_GALLIUM_STATIC_TARGETS gallium_dri_la_SOURCES += target.c gallium_dri_la_CPPFLAGS = $(AM_CPPFLAGS) $(TARGET_CPPFLAGS) -gallium_dri_la_LIBADD += $(TARGET_LIB_DEPS) \ +gallium_dri_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS gallium_dri_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/dri/SConscript b/src/gallium/targets/dri/SConscript index 2fb0da09200..b4516598675 100644 --- a/src/gallium/targets/dri/SConscript +++ b/src/gallium/targets/dri/SConscript @@ -30,7 +30,6 @@ env.PkgUseModules('DRM') env.Append(CPPDEFINES = [ 'GALLIUM_VMWGFX', 'GALLIUM_SOFTPIPE', - 'DRI_TARGET', ]) env.Prepend(LIBS = [ @@ -39,6 +38,7 @@ env.Prepend(LIBS = [ svga, ws_dri, softpipe, + pipe_loader, libloader, mesautil, mesa, diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c index 32a11ef6281..d6fbd01b88f 100644 --- a/src/gallium/targets/dri/target.c +++ b/src/gallium/targets/dri/target.c @@ -1,2 +1,163 @@ -#include "target-helpers/inline_drm_helper.h" -#include "target-helpers/inline_sw_helper.h" +#include "target-helpers/drm_helper.h" + +#include "dri_screen.h" + +#if defined(GALLIUM_SOFTPIPE) + +const __DRIextension **__driDriverGetExtensions_swrast(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void) +{ + globalDriverAPI = &galliumsw_driver_api; + return galliumsw_driver_extensions; +} + +#if defined(HAVE_LIBDRM) + +const __DRIextension **__driDriverGetExtensions_kms_swrast(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_kms_swrast(void) +{ + globalDriverAPI = &dri_kms_driver_api; + return galliumdrm_driver_extensions; +} + +#endif +#endif + +#if defined(GALLIUM_I915) + +const __DRIextension **__driDriverGetExtensions_i915(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_i915(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_ILO) + +const __DRIextension **__driDriverGetExtensions_i965(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_NOUVEAU) + +const __DRIextension **__driDriverGetExtensions_nouveau(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_nouveau(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_R300) + +const __DRIextension **__driDriverGetExtensions_r300(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_r300(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_R600) + +const __DRIextension **__driDriverGetExtensions_r600(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_r600(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_RADEONSI) + +const __DRIextension **__driDriverGetExtensions_radeonsi(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_radeonsi(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_VMWGFX) + +const __DRIextension **__driDriverGetExtensions_vmwgfx(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_vmwgfx(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_FREEDRENO) + +const __DRIextension **__driDriverGetExtensions_msm(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_msm(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} + +const __DRIextension **__driDriverGetExtensions_kgsl(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_kgsl(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_VIRGL) + +const __DRIextension **__driDriverGetExtensions_virtio_gpu(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif + +#if defined(GALLIUM_VC4) + +const __DRIextension **__driDriverGetExtensions_vc4(void); + +PUBLIC const __DRIextension **__driDriverGetExtensions_vc4(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} + +#if defined(USE_VC4_SIMULATOR) +const __DRIextension **__driDriverGetExtensions_i965(void); + +/** + * When building using the simulator (on x86), we advertise ourselves as the + * i965 driver so that you can just make a directory with a link from + * i965_dri.so to the built vc4_dri.so, and point LIBGL_DRIVERS_PATH to that + * on your i965-using host to run the driver under simulation. + * + * This is, of course, incompatible with building with the ilo driver, but you + * shouldn't be building that anyway. + */ +PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void) +{ + globalDriverAPI = &galliumdrm_driver_api; + return galliumdrm_driver_extensions; +} +#endif +#endif diff --git a/src/gallium/targets/omx/Makefile.am b/src/gallium/targets/omx/Makefile.am index a4dff487dd8..3bdb9eb7e61 100644 --- a/src/gallium/targets/omx/Makefile.am +++ b/src/gallium/targets/omx/Makefile.am @@ -40,7 +40,7 @@ if HAVE_GALLIUM_STATIC_TARGETS TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc @@ -50,14 +50,16 @@ include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc libomx_mesa_la_SOURCES += target.c libomx_mesa_la_CPPFLAGS = $(TARGET_CPPFLAGS) -libomx_mesa_la_LIBADD += $(TARGET_LIB_DEPS) \ +libomx_mesa_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS libomx_mesa_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/omx/target.c b/src/gallium/targets/omx/target.c index fde4a4a7dcf..42b1346d341 100644 --- a/src/gallium/targets/omx/target.c +++ b/src/gallium/targets/omx/target.c @@ -1 +1 @@ -#include "target-helpers/inline_drm_helper.h" +#include "target-helpers/drm_helper.h" diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am index c78b26832ff..3cb29766724 100644 --- a/src/gallium/targets/opencl/Makefile.am +++ b/src/gallium/targets/opencl/Makefile.am @@ -15,11 +15,10 @@ lib@OPENCL_LIBNAME@_la_LDFLAGS += \ endif lib@OPENCL_LIBNAME@_la_LIBADD = \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la \ $(top_builddir)/src/gallium/state_trackers/clover/libclover.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/util/libmesautil.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ $(ELF_LIB) \ -ldl \ -lclangCodeGen \ diff --git a/src/gallium/targets/pipe-loader/Makefile.am b/src/gallium/targets/pipe-loader/Makefile.am index 4f25b4f6073..4bc3b55f26b 100644 --- a/src/gallium/targets/pipe-loader/Makefile.am +++ b/src/gallium/targets/pipe-loader/Makefile.am @@ -27,6 +27,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src/gallium/drivers \ -I$(top_srcdir)/src/gallium/winsys \ + $(GALLIUM_PIPE_LOADER_DEFINES) \ $(LIBDRM_CFLAGS) \ $(VISIBILITY_CFLAGS) \ -DGALLIUM_RBUG \ @@ -208,6 +209,10 @@ AM_CPPFLAGS += -DGALLIUM_LLVMPIPE pipe_swrast_la_LIBADD += \ $(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la endif + +pipe_swrast_la_LIBADD += \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + endif EXTRA_DIST = pipe.sym diff --git a/src/gallium/targets/pipe-loader/pipe.sym b/src/gallium/targets/pipe-loader/pipe.sym index 19b1d77b040..b2fa619f7de 100644 --- a/src/gallium/targets/pipe-loader/pipe.sym +++ b/src/gallium/targets/pipe-loader/pipe.sym @@ -1,7 +1,7 @@ { global: driver_descriptor; - swrast_create_screen; + swrast_driver_descriptor; local: *; }; diff --git a/src/gallium/targets/pipe-loader/pipe_swrast.c b/src/gallium/targets/pipe-loader/pipe_swrast.c index f7f354acf3f..cf617f37e20 100644 --- a/src/gallium/targets/pipe-loader/pipe_swrast.c +++ b/src/gallium/targets/pipe-loader/pipe_swrast.c @@ -1,7 +1,11 @@ #include "target-helpers/inline_sw_helper.h" #include "target-helpers/inline_debug_helper.h" -#include "state_tracker/drm_driver.h" +#include "state_tracker/sw_driver.h" +#include "sw/dri/dri_sw_winsys.h" +#include "sw/kms-dri/kms_dri_sw_winsys.h" +#include "sw/null/null_sw_winsys.h" +#include "sw/wrapper/wrapper_sw_winsys.h" PUBLIC struct pipe_screen * swrast_create_screen(struct sw_winsys *ws); @@ -17,3 +21,31 @@ swrast_create_screen(struct sw_winsys *ws) return screen; } + +PUBLIC +struct sw_driver_descriptor swrast_driver_descriptor = { + .create_screen = swrast_create_screen, + .winsys = { +#ifdef HAVE_PIPE_LOADER_DRI + { + .name = "dri", + .create_winsys = dri_create_sw_winsys, + }, +#endif +#ifdef HAVE_PIPE_LOADER_KMS + { + .name = "kms_dri", + .create_winsys = kms_dri_create_winsys, + }, +#endif + { + .name = "null", + .create_winsys = null_sw_create, + }, + { + .name = "wrapped", + .create_winsys = wrapper_sw_winsys_wrap_pipe_screen, + }, + { 0 }, + } +}; diff --git a/src/gallium/targets/va/Makefile.am b/src/gallium/targets/va/Makefile.am index 9613f041b58..733e7acb455 100644 --- a/src/gallium/targets/va/Makefile.am +++ b/src/gallium/targets/va/Makefile.am @@ -40,21 +40,23 @@ if HAVE_GALLIUM_STATIC_TARGETS TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/r600/Automake.inc include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc gallium_drv_video_la_SOURCES += target.c gallium_drv_video_la_CPPFLAGS = $(TARGET_CPPFLAGS) -gallium_drv_video_la_LIBADD += $(TARGET_LIB_DEPS) \ +gallium_drv_video_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS gallium_drv_video_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/va/target.c b/src/gallium/targets/va/target.c index fde4a4a7dcf..42b1346d341 100644 --- a/src/gallium/targets/va/target.c +++ b/src/gallium/targets/va/target.c @@ -1 +1 @@ -#include "target-helpers/inline_drm_helper.h" +#include "target-helpers/drm_helper.h" diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am index 7eb62c1cc78..d388f8b5014 100644 --- a/src/gallium/targets/vdpau/Makefile.am +++ b/src/gallium/targets/vdpau/Makefile.am @@ -47,7 +47,7 @@ EXTRA_DIST = \ TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc @@ -59,14 +59,16 @@ if HAVE_GALLIUM_STATIC_TARGETS libvdpau_gallium_la_SOURCES += target.c libvdpau_gallium_la_CPPFLAGS = $(TARGET_CPPFLAGS) -libvdpau_gallium_la_LIBADD += $(TARGET_LIB_DEPS) \ +libvdpau_gallium_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS libvdpau_gallium_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/vdpau/target.c b/src/gallium/targets/vdpau/target.c index fde4a4a7dcf..42b1346d341 100644 --- a/src/gallium/targets/vdpau/target.c +++ b/src/gallium/targets/vdpau/target.c @@ -1 +1 @@ -#include "target-helpers/inline_drm_helper.h" +#include "target-helpers/drm_helper.h" diff --git a/src/gallium/targets/xa/Makefile.am b/src/gallium/targets/xa/Makefile.am index 02c42c665ed..a63fd6903a4 100644 --- a/src/gallium/targets/xa/Makefile.am +++ b/src/gallium/targets/xa/Makefile.am @@ -60,7 +60,7 @@ if HAVE_GALLIUM_STATIC_TARGETS TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc @@ -74,13 +74,15 @@ include $(top_srcdir)/src/gallium/drivers/freedreno/Automake.inc libxatracker_la_SOURCES += target.c libxatracker_la_CPPFLAGS = $(TARGET_CPPFLAGS) -libxatracker_la_LIBADD += $(TARGET_LIB_DEPS) +libxatracker_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) else # HAVE_GALLIUM_STATIC_TARGETS libxatracker_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/xa/target.c b/src/gallium/targets/xa/target.c index fde4a4a7dcf..42b1346d341 100644 --- a/src/gallium/targets/xa/target.c +++ b/src/gallium/targets/xa/target.c @@ -1 +1 @@ -#include "target-helpers/inline_drm_helper.h" +#include "target-helpers/drm_helper.h" diff --git a/src/gallium/targets/xvmc/Makefile.am b/src/gallium/targets/xvmc/Makefile.am index b3285890822..fdc5f4b7318 100644 --- a/src/gallium/targets/xvmc/Makefile.am +++ b/src/gallium/targets/xvmc/Makefile.am @@ -38,7 +38,7 @@ EXTRA_DIST = xvmc.sym TARGET_DRIVERS = TARGET_CPPFLAGS = -TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la +TARGET_LIB_DEPS = include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc @@ -48,13 +48,15 @@ if HAVE_GALLIUM_STATIC_TARGETS libXvMCgallium_la_SOURCES += target.c libXvMCgallium_la_CPPFLAGS = $(TARGET_CPPFLAGS) -libXvMCgallium_la_LIBADD += $(TARGET_LIB_DEPS) \ +libXvMCgallium_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) else # HAVE_GALLIUM_STATIC_TARGETS libXvMCgallium_la_LIBADD += \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la endif # HAVE_GALLIUM_STATIC_TARGETS diff --git a/src/gallium/targets/xvmc/target.c b/src/gallium/targets/xvmc/target.c index fde4a4a7dcf..42b1346d341 100644 --- a/src/gallium/targets/xvmc/target.c +++ b/src/gallium/targets/xvmc/target.c @@ -1 +1 @@ -#include "target-helpers/inline_drm_helper.h" +#include "target-helpers/drm_helper.h" diff --git a/src/gallium/tests/trivial/Makefile.am b/src/gallium/tests/trivial/Makefile.am index 56b7f3ffc66..585fb699e6c 100644 --- a/src/gallium/tests/trivial/Makefile.am +++ b/src/gallium/tests/trivial/Makefile.am @@ -5,17 +5,10 @@ PIPE_SRC_DIR = $(top_builddir)/src/gallium/targets/pipe-loader AM_CFLAGS = \ $(GALLIUM_CFLAGS) -AM_CPPFLAGS = \ - -I$(top_srcdir)/src/gallium/drivers \ - -I$(top_srcdir)/src/gallium/winsys \ - -DPIPE_SEARCH_DIR=\"$(PIPE_SRC_DIR)/.libs\" \ - $(GALLIUM_PIPE_LOADER_DEFINES) - LDADD = \ - $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/util/libmesautil.la \ - $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ $(GALLIUM_COMMON_LIB_DEPS) noinst_PROGRAMS = compute tri quad-tex diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c index b344f78b25c..bcdfb11c4f1 100644 --- a/src/gallium/tests/trivial/compute.c +++ b/src/gallium/tests/trivial/compute.c @@ -74,7 +74,7 @@ static void init_ctx(struct context *ctx) ret = pipe_loader_probe(&ctx->dev, 1); assert(ret); - ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR); + ctx->screen = pipe_loader_create_screen(ctx->dev); assert(ctx->screen); ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0); diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index f66f63043da..4c5a9200a52 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -96,7 +96,7 @@ static void init_prog(struct program *p) assert(ret); /* init a pipe screen */ - p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR); + p->screen = pipe_loader_create_screen(p->dev); assert(p->screen); /* create the pipe driver context and cso context */ diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c index a555200842e..c71a63f44e5 100644 --- a/src/gallium/tests/trivial/tri.c +++ b/src/gallium/tests/trivial/tri.c @@ -91,7 +91,7 @@ static void init_prog(struct program *p) assert(ret); /* init a pipe screen */ - p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR); + p->screen = pipe_loader_create_screen(p->dev); assert(p->screen); /* create the pipe driver context and cso context */ diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk index 6898fb0d492..59cc8577a6e 100644 --- a/src/glsl/Android.gen.mk +++ b/src/glsl/Android.gen.mk @@ -38,7 +38,8 @@ LOCAL_C_INCLUDES += \ $(MESA_TOP)/src/glsl/nir LOCAL_EXPORT_C_INCLUDE_DIRS += \ - $(intermediates)/nir + $(intermediates)/nir \ + $(MESA_TOP)/src/glsl/nir LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ $(LIBGLCPP_GENERATED_FILES) \ diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 957fd6b90ba..0c9fd75d206 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -22,10 +22,12 @@ NIR_FILES = \ nir/glsl_to_nir.h \ nir/glsl_types.cpp \ nir/glsl_types.h \ + nir/builtin_type_macros.h \ nir/nir.c \ nir/nir.h \ nir/nir_array.h \ nir/nir_builder.h \ + nir/nir_clone.c \ nir/nir_constant_expressions.h \ nir/nir_control_flow.c \ nir/nir_control_flow.h \ @@ -102,7 +104,6 @@ LIBGLSL_FILES = \ blob.c \ blob.h \ builtin_functions.cpp \ - builtin_type_macros.h \ builtin_types.cpp \ builtin_variables.cpp \ glsl_parser_extras.cpp \ diff --git a/src/glsl/ast.h b/src/glsl/ast.h index 1b75234d578..3bea63ea0ed 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -336,7 +336,7 @@ public: array_dimensions.push_tail(&dim->link); } - const bool is_single_dimension() + bool is_single_dimension() const { return this->array_dimensions.tail_pred->prev != NULL && this->array_dimensions.tail_pred->prev->is_head_sentinel(); @@ -350,6 +350,26 @@ public: exec_list array_dimensions; }; +class ast_layout_expression : public ast_node { +public: + ast_layout_expression(const struct YYLTYPE &locp, ast_expression *expr) + { + set_location(locp); + layout_const_expressions.push_tail(&expr->link); + } + + bool process_qualifier_constant(struct _mesa_glsl_parse_state *state, + const char *qual_indentifier, + unsigned *value, bool can_be_zero); + + void merge_qualifier(ast_layout_expression *l_expr) + { + layout_const_expressions.append_list(&l_expr->layout_const_expressions); + } + + exec_list layout_const_expressions; +}; + /** * C-style aggregate initialization class * @@ -558,7 +578,7 @@ struct ast_type_qualifier { unsigned precision:2; /** Geometry shader invocations for GL_ARB_gpu_shader5. */ - int invocations; + ast_layout_expression *invocations; /** * Location specified via GL_ARB_explicit_attrib_location layout @@ -566,20 +586,20 @@ struct ast_type_qualifier { * \note * This field is only valid if \c explicit_location is set. */ - int location; + ast_expression *location; /** * Index specified via GL_ARB_explicit_attrib_location layout * * \note * This field is only valid if \c explicit_index is set. */ - int index; + ast_expression *index; /** Maximum output vertices in GLSL 1.50 geometry shaders. */ - int max_vertices; + ast_layout_expression *max_vertices; /** Stream in GLSL 1.50 geometry shaders. */ - unsigned stream; + ast_expression *stream; /** * Input or output primitive type in GLSL 1.50 geometry shaders @@ -593,7 +613,7 @@ struct ast_type_qualifier { * \note * This field is only valid if \c explicit_binding is set. */ - int binding; + ast_expression *binding; /** * Offset specified via GL_ARB_shader_atomic_counter's "offset" @@ -602,14 +622,14 @@ struct ast_type_qualifier { * \note * This field is only valid if \c explicit_offset is set. */ - int offset; + ast_expression *offset; /** * Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}" * layout qualifier. Element i of this array is only valid if * flags.q.local_size & (1 << i) is set. */ - int local_size[3]; + ast_layout_expression *local_size[3]; /** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */ GLenum vertex_spacing; @@ -621,7 +641,7 @@ struct ast_type_qualifier { bool point_mode; /** Tessellation control shader: number of output vertices */ - int vertices; + ast_layout_expression *vertices; /** * Image format specified with an ARB_shader_image_load_store @@ -752,7 +772,7 @@ public: class ast_fully_specified_type : public ast_node { public: virtual void print(void) const; - bool has_qualifiers() const; + bool has_qualifiers(_mesa_glsl_parse_state *state) const; ast_fully_specified_type() : qualifier(), specifier(NULL) { @@ -1093,17 +1113,13 @@ public: class ast_tcs_output_layout : public ast_node { public: - ast_tcs_output_layout(const struct YYLTYPE &locp, int vertices) - : vertices(vertices) + ast_tcs_output_layout(const struct YYLTYPE &locp) { set_location(locp); } virtual ir_rvalue *hir(exec_list *instructions, struct _mesa_glsl_parse_state *state); - -private: - const int vertices; }; @@ -1135,9 +1151,12 @@ private: class ast_cs_input_layout : public ast_node { public: - ast_cs_input_layout(const struct YYLTYPE &locp, const unsigned *local_size) + ast_cs_input_layout(const struct YYLTYPE &locp, + ast_layout_expression **local_size) { - memcpy(this->local_size, local_size, sizeof(this->local_size)); + for (int i = 0; i < 3; i++) { + this->local_size[i] = local_size[i]; + } set_location(locp); } @@ -1145,7 +1164,7 @@ public: struct _mesa_glsl_parse_state *state); private: - unsigned local_size[3]; + ast_layout_expression *local_size[3]; }; /*@}*/ diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 65db2618895..52881a4da7a 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2491,7 +2491,7 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state, "uniform block layout qualifiers row_major and " "column_major may not be applied to variables " "outside of uniform blocks"); - } else if (!type->is_matrix()) { + } else if (!type->without_array()->is_matrix()) { /* The OpenGL ES 3.0 conformance tests did not originally allow * matrix layout qualifiers on non-matrices. However, the OpenGL * 4.4 and OpenGL ES 3.0 (revision TBD) specifications were @@ -2502,39 +2502,88 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state, "uniform block layout qualifiers row_major and " "column_major applied to non-matrix types may " "be rejected by older compilers"); - } else if (type->is_record()) { - /* We allow 'layout(row_major)' on structure types because it's the only - * way to get row-major layouts on matrices contained in structures. - */ - _mesa_glsl_warning(loc, state, - "uniform block layout qualifiers row_major and " - "column_major applied to structure types is not " - "strictly conformant and may be rejected by other " - "compilers"); } } static bool -validate_binding_qualifier(struct _mesa_glsl_parse_state *state, +process_qualifier_constant(struct _mesa_glsl_parse_state *state, YYLTYPE *loc, - const glsl_type *type, - const ast_type_qualifier *qual) + const char *qual_indentifier, + ast_expression *const_expression, + unsigned *value) +{ + exec_list dummy_instructions; + + if (const_expression == NULL) { + *value = 0; + return true; + } + + ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); + + ir_constant *const const_int = ir->constant_expression_value(); + if (const_int == NULL || !const_int->type->is_integer()) { + _mesa_glsl_error(loc, state, "%s must be an integral constant " + "expression", qual_indentifier); + return false; + } + + if (const_int->value.i[0] < 0) { + _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)", + qual_indentifier, const_int->value.u[0]); + return false; + } + + /* If the location is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the location isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); + + *value = const_int->value.u[0]; + return true; +} + +static bool +validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state, + unsigned stream) +{ + if (stream >= state->ctx->Const.MaxVertexStreams) { + _mesa_glsl_error(loc, state, + "invalid stream specified %d is larger than " + "MAX_VERTEX_STREAMS - 1 (%d).", + stream, state->ctx->Const.MaxVertexStreams - 1); + return false; + } + + return true; +} + +static void +apply_explicit_binding(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + ir_variable *var, + const glsl_type *type, + const ast_type_qualifier *qual) { if (!qual->flags.q.uniform && !qual->flags.q.buffer) { _mesa_glsl_error(loc, state, "the \"binding\" qualifier only applies to uniforms and " "shader storage buffer objects"); - return false; + return; } - if (qual->binding < 0) { - _mesa_glsl_error(loc, state, "binding values must be >= 0"); - return false; + unsigned qual_binding; + if (!process_qualifier_constant(state, loc, "binding", qual->binding, + &qual_binding)) { + return; } const struct gl_context *const ctx = state->ctx; unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1; - unsigned max_index = qual->binding + elements - 1; + unsigned max_index = qual_binding + elements - 1; const glsl_type *base_type = type->without_array(); if (base_type->is_interface()) { @@ -2550,11 +2599,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, */ if (qual->flags.q.uniform && max_index >= ctx->Const.MaxUniformBufferBindings) { - _mesa_glsl_error(loc, state, "layout(binding = %d) for %d UBOs exceeds " + _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds " "the maximum number of UBO binding points (%d)", - qual->binding, elements, + qual_binding, elements, ctx->Const.MaxUniformBufferBindings); - return false; + return; } /* SSBOs. From page 67 of the GLSL 4.30 specification: @@ -2568,11 +2617,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, */ if (qual->flags.q.buffer && max_index >= ctx->Const.MaxShaderStorageBufferBindings) { - _mesa_glsl_error(loc, state, "layout(binding = %d) for %d SSBOs exceeds " + _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds " "the maximum number of SSBO binding points (%d)", - qual->binding, elements, + qual_binding, elements, ctx->Const.MaxShaderStorageBufferBindings); - return false; + return; } } else if (base_type->is_sampler()) { /* Samplers. From page 63 of the GLSL 4.20 specification: @@ -2587,19 +2636,19 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, if (max_index >= limit) { _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers " "exceeds the maximum number of texture image units " - "(%d)", qual->binding, elements, limit); + "(%u)", qual_binding, elements, limit); - return false; + return; } } else if (base_type->contains_atomic()) { assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS); - if (unsigned(qual->binding) >= ctx->Const.MaxAtomicBufferBindings) { + if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) { _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the " " maximum number of atomic counter buffer bindings" - "(%d)", qual->binding, + "(%u)", qual_binding, ctx->Const.MaxAtomicBufferBindings); - return false; + return; } } else if (state->is_version(420, 310) && base_type->is_image()) { assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS); @@ -2607,17 +2656,20 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, _mesa_glsl_error(loc, state, "Image binding %d exceeds the " " maximum number of image units (%d)", max_index, ctx->Const.MaxImageUnits); - return false; + return; } } else { _mesa_glsl_error(loc, state, "the \"binding\" qualifier only applies to uniform " "blocks, opaque variables, or arrays thereof"); - return false; + return; } - return true; + var->data.explicit_binding = true; + var->data.binding = qual_binding; + + return; } @@ -2660,20 +2712,26 @@ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual, static void -validate_explicit_location(const struct ast_type_qualifier *qual, - ir_variable *var, - struct _mesa_glsl_parse_state *state, - YYLTYPE *loc) +apply_explicit_location(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) { bool fail = false; + unsigned qual_location; + if (!process_qualifier_constant(state, loc, "location", qual->location, + &qual_location)) { + return; + } + /* Checks for GL_ARB_explicit_uniform_location. */ if (qual->flags.q.uniform) { if (!state->check_explicit_uniform_location_allowed(loc, var)) return; const struct gl_context *const ctx = state->ctx; - unsigned max_loc = qual->location + var->type->uniform_locations() - 1; + unsigned max_loc = qual_location + var->type->uniform_locations() - 1; if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) { _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s " @@ -2683,7 +2741,7 @@ validate_explicit_location(const struct ast_type_qualifier *qual, } var->data.explicit_location = true; - var->data.location = qual->location; + var->data.location = qual_location; return; } @@ -2768,30 +2826,40 @@ validate_explicit_location(const struct ast_type_qualifier *qual, switch (state->stage) { case MESA_SHADER_VERTEX: var->data.location = (var->data.mode == ir_var_shader_in) - ? (qual->location + VERT_ATTRIB_GENERIC0) - : (qual->location + VARYING_SLOT_VAR0); + ? (qual_location + VERT_ATTRIB_GENERIC0) + : (qual_location + VARYING_SLOT_VAR0); break; case MESA_SHADER_TESS_CTRL: case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: if (var->data.patch) - var->data.location = qual->location + VARYING_SLOT_PATCH0; + var->data.location = qual_location + VARYING_SLOT_PATCH0; else - var->data.location = qual->location + VARYING_SLOT_VAR0; + var->data.location = qual_location + VARYING_SLOT_VAR0; break; case MESA_SHADER_FRAGMENT: var->data.location = (var->data.mode == ir_var_shader_out) - ? (qual->location + FRAG_RESULT_DATA0) - : (qual->location + VARYING_SLOT_VAR0); + ? (qual_location + FRAG_RESULT_DATA0) + : (qual_location + VARYING_SLOT_VAR0); break; case MESA_SHADER_COMPUTE: assert(!"Unexpected shader type"); break; } - if (qual->flags.q.explicit_index) { + /* Check if index was set for the uniform instead of the function */ + if (qual->flags.q.explicit_index && qual->flags.q.subroutine) { + _mesa_glsl_error(loc, state, "an index qualifier can only be " + "used with subroutine functions"); + return; + } + + unsigned qual_index; + if (qual->flags.q.explicit_index && + process_qualifier_constant(state, loc, "index", qual->index, + &qual_index)) { /* From the GLSL 4.30 specification, section 4.4.2 (Output * Layout Qualifiers): * @@ -2801,12 +2869,12 @@ validate_explicit_location(const struct ast_type_qualifier *qual, * Older specifications don't mandate a behavior; we take * this as a clarification and always generate the error. */ - if (qual->index < 0 || qual->index > 1) { + if (qual_index > 1) { _mesa_glsl_error(loc, state, "explicit index may only be 0 or 1"); } else { var->data.explicit_index = true; - var->data.index = qual->index; + var->data.index = qual_index; } } } @@ -2939,6 +3007,221 @@ validate_array_dimensions(const glsl_type *t, } static void +apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) +{ + if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) { + + /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says: + * + * "Within any shader, the first redeclarations of gl_FragCoord + * must appear before any use of gl_FragCoord." + * + * Generate a compiler error if above condition is not met by the + * fragment shader. + */ + ir_variable *earlier = state->symbols->get_variable("gl_FragCoord"); + if (earlier != NULL && + earlier->data.used && + !state->fs_redeclares_gl_fragcoord) { + _mesa_glsl_error(loc, state, + "gl_FragCoord used before its first redeclaration " + "in fragment shader"); + } + + /* Make sure all gl_FragCoord redeclarations specify the same layout + * qualifiers. + */ + if (is_conflicting_fragcoord_redeclaration(state, qual)) { + const char *const qual_string = + get_layout_qualifier_string(qual->flags.q.origin_upper_left, + qual->flags.q.pixel_center_integer); + + const char *const state_string = + get_layout_qualifier_string(state->fs_origin_upper_left, + state->fs_pixel_center_integer); + + _mesa_glsl_error(loc, state, + "gl_FragCoord redeclared with different layout " + "qualifiers (%s) and (%s) ", + state_string, + qual_string); + } + state->fs_origin_upper_left = qual->flags.q.origin_upper_left; + state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer; + state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = + !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer; + state->fs_redeclares_gl_fragcoord = + state->fs_origin_upper_left || + state->fs_pixel_center_integer || + state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; + } + + var->data.pixel_center_integer = qual->flags.q.pixel_center_integer; + var->data.origin_upper_left = qual->flags.q.origin_upper_left; + if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer) + && (strcmp(var->name, "gl_FragCoord") != 0)) { + const char *const qual_string = (qual->flags.q.origin_upper_left) + ? "origin_upper_left" : "pixel_center_integer"; + + _mesa_glsl_error(loc, state, + "layout qualifier `%s' can only be applied to " + "fragment shader input `gl_FragCoord'", + qual_string); + } + + if (qual->flags.q.explicit_location) { + apply_explicit_location(qual, var, state, loc); + } else if (qual->flags.q.explicit_index) { + if (!qual->flags.q.subroutine_def) + _mesa_glsl_error(loc, state, + "explicit index requires explicit location"); + } + + if (qual->flags.q.explicit_binding) { + apply_explicit_binding(state, loc, var, var->type, qual); + } + + if (state->stage == MESA_SHADER_GEOMETRY && + qual->flags.q.out && qual->flags.q.stream) { + unsigned qual_stream; + if (process_qualifier_constant(state, loc, "stream", qual->stream, + &qual_stream) && + validate_stream_qualifier(loc, state, qual_stream)) { + var->data.stream = qual_stream; + } + } + + if (var->type->contains_atomic()) { + if (var->data.mode == ir_var_uniform) { + if (var->data.explicit_binding) { + unsigned *offset = + &state->atomic_counter_offsets[var->data.binding]; + + if (*offset % ATOMIC_COUNTER_SIZE) + _mesa_glsl_error(loc, state, + "misaligned atomic counter offset"); + + var->data.atomic.offset = *offset; + *offset += var->type->atomic_size(); + + } else { + _mesa_glsl_error(loc, state, + "atomic counters require explicit binding point"); + } + } else if (var->data.mode != ir_var_function_in) { + _mesa_glsl_error(loc, state, "atomic counters may only be declared as " + "function parameters or uniform-qualified " + "global variables"); + } + } + + /* Is the 'layout' keyword used with parameters that allow relaxed checking. + * Many implementations of GL_ARB_fragment_coord_conventions_enable and some + * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable + * allowed the layout qualifier to be used with 'varying' and 'attribute'. + * These extensions and all following extensions that add the 'layout' + * keyword have been modified to require the use of 'in' or 'out'. + * + * The following extension do not allow the deprecated keywords: + * + * GL_AMD_conservative_depth + * GL_ARB_conservative_depth + * GL_ARB_gpu_shader5 + * GL_ARB_separate_shader_objects + * GL_ARB_tessellation_shader + * GL_ARB_transform_feedback3 + * GL_ARB_uniform_buffer_object + * + * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5 + * allow layout with the deprecated keywords. + */ + const bool relaxed_layout_qualifier_checking = + state->ARB_fragment_coord_conventions_enable; + + const bool uses_deprecated_qualifier = qual->flags.q.attribute + || qual->flags.q.varying; + if (qual->has_layout() && uses_deprecated_qualifier) { + if (relaxed_layout_qualifier_checking) { + _mesa_glsl_warning(loc, state, + "`layout' qualifier may not be used with " + "`attribute' or `varying'"); + } else { + _mesa_glsl_error(loc, state, + "`layout' qualifier may not be used with " + "`attribute' or `varying'"); + } + } + + /* Layout qualifiers for gl_FragDepth, which are enabled by extension + * AMD_conservative_depth. + */ + int depth_layout_count = qual->flags.q.depth_any + + qual->flags.q.depth_greater + + qual->flags.q.depth_less + + qual->flags.q.depth_unchanged; + if (depth_layout_count > 0 + && !state->AMD_conservative_depth_enable + && !state->ARB_conservative_depth_enable) { + _mesa_glsl_error(loc, state, + "extension GL_AMD_conservative_depth or " + "GL_ARB_conservative_depth must be enabled " + "to use depth layout qualifiers"); + } else if (depth_layout_count > 0 + && strcmp(var->name, "gl_FragDepth") != 0) { + _mesa_glsl_error(loc, state, + "depth layout qualifiers can be applied only to " + "gl_FragDepth"); + } else if (depth_layout_count > 1 + && strcmp(var->name, "gl_FragDepth") == 0) { + _mesa_glsl_error(loc, state, + "at most one depth layout qualifier can be applied to " + "gl_FragDepth"); + } + if (qual->flags.q.depth_any) + var->data.depth_layout = ir_depth_layout_any; + else if (qual->flags.q.depth_greater) + var->data.depth_layout = ir_depth_layout_greater; + else if (qual->flags.q.depth_less) + var->data.depth_layout = ir_depth_layout_less; + else if (qual->flags.q.depth_unchanged) + var->data.depth_layout = ir_depth_layout_unchanged; + else + var->data.depth_layout = ir_depth_layout_none; + + if (qual->flags.q.std140 || + qual->flags.q.std430 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(loc, state, + "uniform and shader storage block layout qualifiers " + "std140, std430, packed, and shared can only be " + "applied to uniform or shader storage blocks, not " + "members"); + } + + if (qual->flags.q.row_major || qual->flags.q.column_major) { + validate_matrix_layout_for_type(state, loc, var->type, var); + } + + /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader + * Inputs): + * + * "Fragment shaders also allow the following layout qualifier on in only + * (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (qual->flags.q.early_fragment_tests) { + _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only " + "valid in fragment shader input layout declaration."); + } +} + +static void apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, ir_variable *var, struct _mesa_glsl_parse_state *state, @@ -2992,11 +3275,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, select_gles_precision(qual->precision, var->type, state, loc); } - if (state->stage == MESA_SHADER_GEOMETRY && - qual->flags.q.out && qual->flags.q.stream) { - var->data.stream = qual->stream; - } - if (qual->flags.q.patch) var->data.patch = 1; @@ -3136,102 +3414,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode, state, loc); - var->data.pixel_center_integer = qual->flags.q.pixel_center_integer; - var->data.origin_upper_left = qual->flags.q.origin_upper_left; - if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer) - && (strcmp(var->name, "gl_FragCoord") != 0)) { - const char *const qual_string = (qual->flags.q.origin_upper_left) - ? "origin_upper_left" : "pixel_center_integer"; - - _mesa_glsl_error(loc, state, - "layout qualifier `%s' can only be applied to " - "fragment shader input `gl_FragCoord'", - qual_string); - } - - if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) { - - /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says: - * - * "Within any shader, the first redeclarations of gl_FragCoord - * must appear before any use of gl_FragCoord." - * - * Generate a compiler error if above condition is not met by the - * fragment shader. - */ - ir_variable *earlier = state->symbols->get_variable("gl_FragCoord"); - if (earlier != NULL && - earlier->data.used && - !state->fs_redeclares_gl_fragcoord) { - _mesa_glsl_error(loc, state, - "gl_FragCoord used before its first redeclaration " - "in fragment shader"); - } - - /* Make sure all gl_FragCoord redeclarations specify the same layout - * qualifiers. - */ - if (is_conflicting_fragcoord_redeclaration(state, qual)) { - const char *const qual_string = - get_layout_qualifier_string(qual->flags.q.origin_upper_left, - qual->flags.q.pixel_center_integer); - - const char *const state_string = - get_layout_qualifier_string(state->fs_origin_upper_left, - state->fs_pixel_center_integer); - - _mesa_glsl_error(loc, state, - "gl_FragCoord redeclared with different layout " - "qualifiers (%s) and (%s) ", - state_string, - qual_string); - } - state->fs_origin_upper_left = qual->flags.q.origin_upper_left; - state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer; - state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = - !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer; - state->fs_redeclares_gl_fragcoord = - state->fs_origin_upper_left || - state->fs_pixel_center_integer || - state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; - } - - if (qual->flags.q.explicit_location) { - validate_explicit_location(qual, var, state, loc); - } else if (qual->flags.q.explicit_index) { - _mesa_glsl_error(loc, state, "explicit index requires explicit location"); - } - - if (qual->flags.q.explicit_binding && - validate_binding_qualifier(state, loc, var->type, qual)) { - var->data.explicit_binding = true; - var->data.binding = qual->binding; - } - - if (var->type->contains_atomic()) { - if (var->data.mode == ir_var_uniform) { - if (var->data.explicit_binding) { - unsigned *offset = - &state->atomic_counter_offsets[var->data.binding]; - - if (*offset % ATOMIC_COUNTER_SIZE) - _mesa_glsl_error(loc, state, - "misaligned atomic counter offset"); - - var->data.atomic.offset = *offset; - *offset += var->type->atomic_size(); - - } else { - _mesa_glsl_error(loc, state, - "atomic counters require explicit binding point"); - } - } else if (var->data.mode != ir_var_function_in) { - _mesa_glsl_error(loc, state, "atomic counters may only be declared as " - "function parameters or uniform-qualified " - "global variables"); - } - } - /* Does the declaration use the deprecated 'attribute' or 'varying' * keywords? */ @@ -3267,114 +3449,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, "`out' or `varying' variables between shader stages"); } - - /* Is the 'layout' keyword used with parameters that allow relaxed checking. - * Many implementations of GL_ARB_fragment_coord_conventions_enable and some - * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable - * allowed the layout qualifier to be used with 'varying' and 'attribute'. - * These extensions and all following extensions that add the 'layout' - * keyword have been modified to require the use of 'in' or 'out'. - * - * The following extension do not allow the deprecated keywords: - * - * GL_AMD_conservative_depth - * GL_ARB_conservative_depth - * GL_ARB_gpu_shader5 - * GL_ARB_separate_shader_objects - * GL_ARB_tessellation_shader - * GL_ARB_transform_feedback3 - * GL_ARB_uniform_buffer_object - * - * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5 - * allow layout with the deprecated keywords. - */ - const bool relaxed_layout_qualifier_checking = - state->ARB_fragment_coord_conventions_enable; - - if (qual->has_layout() && uses_deprecated_qualifier) { - if (relaxed_layout_qualifier_checking) { - _mesa_glsl_warning(loc, state, - "`layout' qualifier may not be used with " - "`attribute' or `varying'"); - } else { - _mesa_glsl_error(loc, state, - "`layout' qualifier may not be used with " - "`attribute' or `varying'"); - } - } - - /* Layout qualifiers for gl_FragDepth, which are enabled by extension - * AMD_conservative_depth. - */ - int depth_layout_count = qual->flags.q.depth_any - + qual->flags.q.depth_greater - + qual->flags.q.depth_less - + qual->flags.q.depth_unchanged; - if (depth_layout_count > 0 - && !state->AMD_conservative_depth_enable - && !state->ARB_conservative_depth_enable) { - _mesa_glsl_error(loc, state, - "extension GL_AMD_conservative_depth or " - "GL_ARB_conservative_depth must be enabled " - "to use depth layout qualifiers"); - } else if (depth_layout_count > 0 - && strcmp(var->name, "gl_FragDepth") != 0) { - _mesa_glsl_error(loc, state, - "depth layout qualifiers can be applied only to " - "gl_FragDepth"); - } else if (depth_layout_count > 1 - && strcmp(var->name, "gl_FragDepth") == 0) { - _mesa_glsl_error(loc, state, - "at most one depth layout qualifier can be applied to " - "gl_FragDepth"); - } - if (qual->flags.q.depth_any) - var->data.depth_layout = ir_depth_layout_any; - else if (qual->flags.q.depth_greater) - var->data.depth_layout = ir_depth_layout_greater; - else if (qual->flags.q.depth_less) - var->data.depth_layout = ir_depth_layout_less; - else if (qual->flags.q.depth_unchanged) - var->data.depth_layout = ir_depth_layout_unchanged; - else - var->data.depth_layout = ir_depth_layout_none; - - if (qual->flags.q.std140 || - qual->flags.q.std430 || - qual->flags.q.packed || - qual->flags.q.shared) { - _mesa_glsl_error(loc, state, - "uniform and shader storage block layout qualifiers " - "std140, std430, packed, and shared can only be " - "applied to uniform or shader storage blocks, not " - "members"); - } - if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) { _mesa_glsl_error(loc, state, "the shared storage qualifiers can only be used with " "compute shaders"); } - if (qual->flags.q.row_major || qual->flags.q.column_major) { - validate_matrix_layout_for_type(state, loc, var->type, var); - } - apply_image_qualifier_to_variable(qual, var, state, loc); - - /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader - * Inputs): - * - * "Fragment shaders also allow the following layout qualifier on in only - * (not with variable declarations) - * layout-qualifier-id - * early_fragment_tests - * [...]" - */ - if (qual->flags.q.early_fragment_tests) { - _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only " - "valid in fragment shader input layout declaration."); - } } /** @@ -3798,7 +3879,17 @@ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state, unsigned num_vertices = 0; if (state->tcs_output_vertices_specified) { - num_vertices = state->out_qualifier->vertices; + if (!state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", + &num_vertices, false)) { + return; + } + + if (num_vertices > state->Const.MaxPatchVertices) { + _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " + "GL_MAX_PATCH_VERTICES", num_vertices); + return; + } } if (!var->type->is_array() && !var->data.patch) { @@ -4032,9 +4123,18 @@ ast_declarator_list::hir(exec_list *instructions, */ if (decl_type && decl_type->contains_atomic()) { if (type->qualifier.flags.q.explicit_binding && - type->qualifier.flags.q.explicit_offset) - state->atomic_counter_offsets[type->qualifier.binding] = - type->qualifier.offset; + type->qualifier.flags.q.explicit_offset) { + unsigned qual_binding; + unsigned qual_offset; + if (process_qualifier_constant(state, &loc, "binding", + type->qualifier.binding, + &qual_binding) + && process_qualifier_constant(state, &loc, "offset", + type->qualifier.offset, + &qual_offset)) { + state->atomic_counter_offsets[qual_binding] = qual_offset; + } + } } if (this->declarations.is_empty()) { @@ -4188,6 +4288,8 @@ ast_declarator_list::hir(exec_list *instructions, apply_type_qualifier_to_variable(& this->type->qualifier, var, state, & loc, false); + apply_layout_qualifier_to_variable(&this->type->qualifier, var, state, + &loc); if (this->type->qualifier.flags.q.invariant) { if (!is_varying_var(var, state->stage)) { @@ -4983,7 +5085,7 @@ ast_function::hir(exec_list *instructions, /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec: * "No qualifier is allowed on the return type of a function." */ - if (this->return_type->has_qualifiers()) { + if (this->return_type->has_qualifiers(state)) { YYLTYPE loc = this->get_location(); _mesa_glsl_error(& loc, state, "function `%s' return type has qualifiers", name); @@ -5115,6 +5217,27 @@ ast_function::hir(exec_list *instructions, if (this->return_type->qualifier.flags.q.subroutine_def) { int idx; + if (this->return_type->qualifier.flags.q.explicit_index) { + unsigned qual_index; + if (process_qualifier_constant(state, &loc, "index", + this->return_type->qualifier.index, + &qual_index)) { + if (!state->has_explicit_uniform_location()) { + _mesa_glsl_error(&loc, state, "subroutine index requires " + "GL_ARB_explicit_uniform_location or " + "GLSL 4.30"); + } else if (qual_index >= MAX_SUBROUTINES) { + _mesa_glsl_error(&loc, state, + "invalid subroutine index (%d) index must " + "be a number between 0 and " + "GL_MAX_SUBROUTINES - 1 (%d)", qual_index, + MAX_SUBROUTINES - 1); + } else { + f->subroutine_index = qual_index; + } + } + } + f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length(); f->subroutine_types = ralloc_array(state, const struct glsl_type *, f->num_subroutine_types); @@ -6046,27 +6169,19 @@ ast_type_specifier::hir(exec_list *instructions, * stored in \c *fields_ret. */ unsigned -ast_process_structure_or_interface_block(exec_list *instructions, - struct _mesa_glsl_parse_state *state, - exec_list *declarations, - YYLTYPE &loc, - glsl_struct_field **fields_ret, - bool is_interface, - enum glsl_matrix_layout matrix_layout, - bool allow_reserved_names, - ir_variable_mode var_mode, - ast_type_qualifier *layout) +ast_process_struct_or_iface_block_members(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + exec_list *declarations, + glsl_struct_field **fields_ret, + bool is_interface, + enum glsl_matrix_layout matrix_layout, + bool allow_reserved_names, + ir_variable_mode var_mode, + ast_type_qualifier *layout, + unsigned block_stream) { unsigned decl_count = 0; - /* For blocks that accept memory qualifiers (i.e. shader storage), verify - * that we don't have incompatible qualifiers - */ - if (layout && layout->flags.q.read_only && layout->flags.q.write_only) { - _mesa_glsl_error(&loc, state, - "Interface block sets both readonly and writeonly"); - } - /* Make an initial pass over the list of fields to determine how * many there are. Each element in this list is an ast_declarator_list. * This means that we actually need to count the number of elements in the @@ -6087,6 +6202,7 @@ ast_process_structure_or_interface_block(exec_list *instructions, unsigned i = 0; foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { const char *type_name; + YYLTYPE loc = decl_list->get_location(); decl_list->type->specifier->hir(instructions, state); @@ -6101,74 +6217,120 @@ ast_process_structure_or_interface_block(exec_list *instructions, const glsl_type *decl_type = decl_list->type->glsl_type(& type_name, state); - foreach_list_typed (ast_declaration, decl, link, - &decl_list->declarations) { - if (!allow_reserved_names) - validate_identifier(decl->identifier, loc, state); + const struct ast_type_qualifier *const qual = + &decl_list->type->qualifier; - /* From section 4.3.9 of the GLSL 4.40 spec: - * - * "[In interface blocks] opaque types are not allowed." + /* From section 4.3.9 of the GLSL 4.40 spec: + * + * "[In interface blocks] opaque types are not allowed." + * + * It should be impossible for decl_type to be NULL here. Cases that + * might naturally lead to decl_type being NULL, especially for the + * is_interface case, will have resulted in compilation having + * already halted due to a syntax error. + */ + assert(decl_type); + + if (is_interface && decl_type->contains_opaque()) { + _mesa_glsl_error(&loc, state, + "uniform/buffer in non-default interface block contains " + "opaque variable"); + } + + if (decl_type->contains_atomic()) { + /* From section 4.1.7.3 of the GLSL 4.40 spec: * - * It should be impossible for decl_type to be NULL here. Cases that - * might naturally lead to decl_type being NULL, especially for the - * is_interface case, will have resulted in compilation having - * already halted due to a syntax error. + * "Members of structures cannot be declared as atomic counter + * types." */ - assert(decl_type); + _mesa_glsl_error(&loc, state, "atomic counter in structure, " + "shader storage block or uniform block"); + } - if (is_interface && decl_type->contains_opaque()) { - YYLTYPE loc = decl_list->get_location(); - _mesa_glsl_error(&loc, state, - "uniform/buffer in non-default interface block contains " - "opaque variable"); - } + if (decl_type->contains_image()) { + /* FINISHME: Same problem as with atomic counters. + * FINISHME: Request clarification from Khronos and add + * FINISHME: spec quotation here. + */ + _mesa_glsl_error(&loc, state, + "image in structure, shader storage block or " + "uniform block"); + } - if (decl_type->contains_atomic()) { - /* From section 4.1.7.3 of the GLSL 4.40 spec: - * - * "Members of structures cannot be declared as atomic counter - * types." - */ - YYLTYPE loc = decl_list->get_location(); - _mesa_glsl_error(&loc, state, "atomic counter in structure, " - "shader storage block or uniform block"); - } + if (qual->flags.q.explicit_binding) { + _mesa_glsl_error(&loc, state, + "binding layout qualifier cannot be applied " + "to struct or interface block members"); + } - if (decl_type->contains_image()) { - /* FINISHME: Same problem as with atomic counters. - * FINISHME: Request clarification from Khronos and add - * FINISHME: spec quotation here. - */ - YYLTYPE loc = decl_list->get_location(); - _mesa_glsl_error(&loc, state, - "image in structure, shader storage block or " - "uniform block"); + if (qual->flags.q.std140 || + qual->flags.q.std430 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(&loc, state, + "uniform/shader storage block layout qualifiers " + "std140, std430, packed, and shared can only be " + "applied to uniform/shader storage blocks, not " + "members"); + } + + if (qual->flags.q.constant) { + _mesa_glsl_error(&loc, state, + "const storage qualifier cannot be applied " + "to struct or interface block members"); + } + + /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec: + * + * "A block member may be declared with a stream identifier, but + * the specified stream must match the stream associated with the + * containing block." + */ + if (qual->flags.q.explicit_stream) { + unsigned qual_stream; + if (process_qualifier_constant(state, &loc, "stream", + qual->stream, &qual_stream) && + qual_stream != block_stream) { + _mesa_glsl_error(&loc, state, "stream layout qualifier on " + "interface block member does not match " + "the interface block (%d vs %d)", qual->stream, + block_stream); } + } - const struct ast_type_qualifier *const qual = - & decl_list->type->qualifier; + if (qual->flags.q.uniform && qual->has_interpolation()) { + _mesa_glsl_error(&loc, state, + "interpolation qualifiers cannot be used " + "with uniform interface blocks"); + } - if (qual->flags.q.explicit_binding) - validate_binding_qualifier(state, &loc, decl_type, qual); + if ((qual->flags.q.uniform || !is_interface) && + qual->has_auxiliary_storage()) { + _mesa_glsl_error(&loc, state, + "auxiliary storage qualifiers cannot be used " + "in uniform blocks or structures."); + } - if (qual->flags.q.std140 || - qual->flags.q.std430 || - qual->flags.q.packed || - qual->flags.q.shared) { + if (qual->flags.q.row_major || qual->flags.q.column_major) { + if (!qual->flags.q.uniform && !qual->flags.q.buffer) { _mesa_glsl_error(&loc, state, - "uniform/shader storage block layout qualifiers " - "std140, std430, packed, and shared can only be " - "applied to uniform/shader storage blocks, not " - "members"); - } + "row_major and column_major can only be " + "applied to interface blocks"); + } else + validate_matrix_layout_for_type(state, &loc, decl_type, NULL); + } - if (qual->flags.q.constant) { - YYLTYPE loc = decl_list->get_location(); - _mesa_glsl_error(&loc, state, - "const storage qualifier cannot be applied " - "to struct or interface block members"); - } + if (qual->flags.q.read_only && qual->flags.q.write_only) { + _mesa_glsl_error(&loc, state, "buffer variable can't be both " + "readonly and writeonly."); + } + + foreach_list_typed (ast_declaration, decl, link, + &decl_list->declarations) { + YYLTYPE loc = decl->get_location(); + + if (!allow_reserved_names) + validate_identifier(decl->identifier, loc, state); const struct glsl_type *field_type = process_array_type(&loc, decl_type, decl->array_specifier, state); @@ -6183,42 +6345,6 @@ ast_process_structure_or_interface_block(exec_list *instructions, fields[i].patch = qual->flags.q.patch ? 1 : 0; fields[i].precision = qual->precision; - /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec: - * - * "A block member may be declared with a stream identifier, but - * the specified stream must match the stream associated with the - * containing block." - */ - if (qual->flags.q.explicit_stream && - qual->stream != layout->stream) { - _mesa_glsl_error(&loc, state, "stream layout qualifier on " - "interface block member `%s' does not match " - "the interface block (%d vs %d)", - fields[i].name, qual->stream, layout->stream); - } - - if (qual->flags.q.row_major || qual->flags.q.column_major) { - if (!qual->flags.q.uniform && !qual->flags.q.buffer) { - _mesa_glsl_error(&loc, state, - "row_major and column_major can only be " - "applied to interface blocks"); - } else - validate_matrix_layout_for_type(state, &loc, field_type, NULL); - } - - if (qual->flags.q.uniform && qual->has_interpolation()) { - _mesa_glsl_error(&loc, state, - "interpolation qualifiers cannot be used " - "with uniform interface blocks"); - } - - if ((qual->flags.q.uniform || !is_interface) && - qual->has_auxiliary_storage()) { - _mesa_glsl_error(&loc, state, - "auxiliary storage qualifiers cannot be used " - "in uniform blocks or structures."); - } - /* Propogate row- / column-major information down the fields of the * structure or interface block. Structures need this data because * the structure may contain a structure that contains ... a matrix @@ -6248,29 +6374,20 @@ ast_process_structure_or_interface_block(exec_list *instructions, * be defined inside shader storage buffer objects */ if (layout && var_mode == ir_var_shader_storage) { - if (qual->flags.q.read_only && qual->flags.q.write_only) { - _mesa_glsl_error(&loc, state, - "buffer variable `%s' can't be " - "readonly and writeonly.", fields[i].name); - } - /* For readonly and writeonly qualifiers the field definition, * if set, overwrites the layout qualifier. */ - bool read_only = layout->flags.q.read_only; - bool write_only = layout->flags.q.write_only; - if (qual->flags.q.read_only) { - read_only = true; - write_only = false; + fields[i].image_read_only = true; + fields[i].image_write_only = false; } else if (qual->flags.q.write_only) { - read_only = false; - write_only = true; + fields[i].image_read_only = false; + fields[i].image_write_only = true; + } else { + fields[i].image_read_only = layout->flags.q.read_only; + fields[i].image_write_only = layout->flags.q.write_only; } - fields[i].image_read_only = read_only; - fields[i].image_write_only = write_only; - /* For other qualifiers, we set the flag if either the layout * qualifier or the field qualifier are set */ @@ -6328,16 +6445,16 @@ ast_struct_specifier::hir(exec_list *instructions, glsl_struct_field *fields; unsigned decl_count = - ast_process_structure_or_interface_block(instructions, - state, - &this->declarations, - loc, - &fields, - false, - GLSL_MATRIX_LAYOUT_INHERITED, - false /* allow_reserved_names */, - ir_var_auto, - NULL); + ast_process_struct_or_iface_block_members(instructions, + state, + &this->declarations, + &fields, + false, + GLSL_MATRIX_LAYOUT_INHERITED, + false /* allow_reserved_names */, + ir_var_auto, + NULL, + 0 /* for interface only */); validate_identifier(this->name, loc, state); @@ -6483,17 +6600,36 @@ ast_interface_block::hir(exec_list *instructions, */ state->struct_specifier_depth++; + /* For blocks that accept memory qualifiers (i.e. shader storage), verify + * that we don't have incompatible qualifiers + */ + if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) { + _mesa_glsl_error(&loc, state, + "Interface block sets both readonly and writeonly"); + } + + unsigned qual_stream; + if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream, + &qual_stream) || + !validate_stream_qualifier(&loc, state, qual_stream)) { + /* If the stream qualifier is invalid it doesn't make sense to continue + * on and try to compare stream layouts on member variables against it + * so just return early. + */ + return NULL; + } + unsigned int num_variables = - ast_process_structure_or_interface_block(&declared_variables, - state, - &this->declarations, - loc, - &fields, - true, - matrix_layout, - redeclaring_per_vertex, - var_mode, - &this->layout); + ast_process_struct_or_iface_block_members(&declared_variables, + state, + &this->declarations, + &fields, + true, + matrix_layout, + redeclaring_per_vertex, + var_mode, + &this->layout, + qual_stream); state->struct_specifier_depth--; @@ -6604,6 +6740,8 @@ ast_interface_block::hir(exec_list *instructions, earlier_per_vertex->fields.structure[j].sample; fields[i].patch = earlier_per_vertex->fields.structure[j].patch; + fields[i].precision = + earlier_per_vertex->fields.structure[j].precision; } } @@ -6633,8 +6771,6 @@ ast_interface_block::hir(exec_list *instructions, num_variables, packing, this->block_name); - if (this->layout.flags.q.explicit_binding) - validate_binding_qualifier(state, &loc, block_type, &this->layout); if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) { YYLTYPE loc = this->get_location(); @@ -6765,10 +6901,6 @@ ast_interface_block::hir(exec_list *instructions, "not allowed"); } - if (this->layout.flags.q.explicit_binding) - validate_binding_qualifier(state, &loc, block_array_type, - &this->layout); - var = new(state) ir_variable(block_array_type, this->instance_name, var_mode); @@ -6830,14 +6962,12 @@ ast_interface_block::hir(exec_list *instructions, earlier->reinit_interface_type(block_type); delete var; } else { - /* Propagate the "binding" keyword into this UBO's fields; - * the UBO declaration itself doesn't get an ir_variable unless it - * has an instance name. This is ugly. - */ - var->data.explicit_binding = this->layout.flags.q.explicit_binding; - var->data.binding = this->layout.binding; + if (this->layout.flags.q.explicit_binding) { + apply_explicit_binding(state, &loc, var, var->type, + &this->layout); + } - var->data.stream = this->layout.stream; + var->data.stream = qual_stream; state->symbols->add_variable(var); instructions->push_tail(var); @@ -6857,7 +6987,7 @@ ast_interface_block::hir(exec_list *instructions, var->data.centroid = fields[i].centroid; var->data.sample = fields[i].sample; var->data.patch = fields[i].patch; - var->data.stream = this->layout.stream; + var->data.stream = qual_stream; var->init_interface_type(block_type); if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) @@ -6914,8 +7044,10 @@ ast_interface_block::hir(exec_list *instructions, * The UBO declaration itself doesn't get an ir_variable unless it * has an instance name. This is ugly. */ - var->data.explicit_binding = this->layout.flags.q.explicit_binding; - var->data.binding = this->layout.binding; + if (this->layout.flags.q.explicit_binding) { + apply_explicit_binding(state, &loc, var, + var->get_interface_type(), &this->layout); + } if (var->type->is_unsized_array()) { if (var->is_in_shader_storage_block()) { @@ -6997,22 +7129,18 @@ ast_tcs_output_layout::hir(exec_list *instructions, { YYLTYPE loc = this->get_location(); - /* If any tessellation control output layout declaration preceded this - * one, make sure it was consistent with this one. - */ - if (state->tcs_output_vertices_specified && - state->out_qualifier->vertices != this->vertices) { - _mesa_glsl_error(&loc, state, - "tessellation control shader output layout does not " - "match previous declaration"); - return NULL; + unsigned num_vertices; + if (!state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", &num_vertices, + false)) { + /* return here to stop cascading incorrect error messages */ + return NULL; } /* If any shader outputs occurred before this declaration and specified an * array size, make sure the size they specified is consistent with the * primitive type. */ - unsigned num_vertices = this->vertices; if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) { _mesa_glsl_error(&loc, state, "this tessellation control shader output layout " @@ -7120,20 +7248,6 @@ ast_cs_input_layout::hir(exec_list *instructions, { YYLTYPE loc = this->get_location(); - /* If any compute input layout declaration preceded this one, make sure it - * was consistent with this one. - */ - if (state->cs_input_local_size_specified) { - for (int i = 0; i < 3; i++) { - if (state->cs_input_local_size[i] != this->local_size[i]) { - _mesa_glsl_error(&loc, state, - "compute shader input layout does not match" - " previous declaration"); - return NULL; - } - } - } - /* From the ARB_compute_shader specification: * * If the local size of the shader in any dimension is greater @@ -7146,15 +7260,30 @@ ast_cs_input_layout::hir(exec_list *instructions, * report it at compile time as well. */ GLuint64 total_invocations = 1; + unsigned qual_local_size[3]; for (int i = 0; i < 3; i++) { - if (this->local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) { + + char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c", + 'x' + i); + /* Infer a local_size of 1 for unspecified dimensions */ + if (this->local_size[i] == NULL) { + qual_local_size[i] = 1; + } else if (!this->local_size[i]-> + process_qualifier_constant(state, local_size_str, + &qual_local_size[i], false)) { + ralloc_free(local_size_str); + return NULL; + } + ralloc_free(local_size_str); + + if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) { _mesa_glsl_error(&loc, state, "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE" " (%d)", 'x' + i, state->ctx->Const.MaxComputeWorkGroupSize[i]); break; } - total_invocations *= this->local_size[i]; + total_invocations *= qual_local_size[i]; if (total_invocations > state->ctx->Const.MaxComputeWorkGroupInvocations) { _mesa_glsl_error(&loc, state, @@ -7165,9 +7294,23 @@ ast_cs_input_layout::hir(exec_list *instructions, } } + /* If any compute input layout declaration preceded this one, make sure it + * was consistent with this one. + */ + if (state->cs_input_local_size_specified) { + for (int i = 0; i < 3; i++) { + if (state->cs_input_local_size[i] != qual_local_size[i]) { + _mesa_glsl_error(&loc, state, + "compute shader input layout does not match" + " previous declaration"); + return NULL; + } + } + } + state->cs_input_local_size_specified = true; for (int i = 0; i < 3; i++) - state->cs_input_local_size[i] = this->local_size[i]; + state->cs_input_local_size[i] = qual_local_size[i]; /* We may now declare the built-in constant gl_WorkGroupSize (see * builtin_variable_generator::generate_constants() for why we didn't @@ -7182,7 +7325,7 @@ ast_cs_input_layout::hir(exec_list *instructions, ir_constant_data data; memset(&data, 0, sizeof(data)); for (int i = 0; i < 3; i++) - data.u[i] = this->local_size[i]; + data.u[i] = qual_local_size[i]; var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data); var->constant_initializer = new(var) ir_constant(glsl_type::uvec3_type, &data); @@ -7198,6 +7341,8 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, { bool gl_FragColor_assigned = false; bool gl_FragData_assigned = false; + bool gl_FragSecondaryColor_assigned = false; + bool gl_FragSecondaryData_assigned = false; bool user_defined_fs_output_assigned = false; ir_variable *user_defined_fs_output = NULL; @@ -7215,6 +7360,10 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, gl_FragColor_assigned = true; else if (strcmp(var->name, "gl_FragData") == 0) gl_FragData_assigned = true; + else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0) + gl_FragSecondaryColor_assigned = true; + else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0) + gl_FragSecondaryData_assigned = true; else if (!is_gl_identifier(var->name)) { if (state->stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out) { @@ -7246,11 +7395,29 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, _mesa_glsl_error(&loc, state, "fragment shader writes to both " "`gl_FragColor' and `%s'", user_defined_fs_output->name); + } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragSecondaryColorEXT' and" + " `gl_FragSecondaryDataEXT'"); + } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and" + " `gl_FragSecondaryDataEXT'"); + } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragData' and" + " `gl_FragSecondaryColorEXT'"); } else if (gl_FragData_assigned && user_defined_fs_output_assigned) { _mesa_glsl_error(&loc, state, "fragment shader writes to both " "`gl_FragData' and `%s'", user_defined_fs_output->name); } + + if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) && + !state->EXT_blend_func_extended_enable) { + _mesa_glsl_error(&loc, state, + "Dual source blending requires EXT_blend_func_extended"); + } } diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp index 79134c19893..03ed4dcfa2a 100644 --- a/src/glsl/ast_type.cpp +++ b/src/glsl/ast_type.cpp @@ -38,13 +38,16 @@ ast_type_specifier::print(void) const } bool -ast_fully_specified_type::has_qualifiers() const +ast_fully_specified_type::has_qualifiers(_mesa_glsl_parse_state *state) const { /* 'subroutine' isnt a real qualifier. */ ast_type_qualifier subroutine_only; subroutine_only.flags.i = 0; subroutine_only.flags.q.subroutine = 1; subroutine_only.flags.q.subroutine_def = 1; + if (state->has_explicit_uniform_location()) { + subroutine_only.flags.q.explicit_index = 1; + } return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0; } @@ -169,41 +172,32 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, } if (q.flags.q.max_vertices) { - if (this->flags.q.max_vertices && this->max_vertices != q.max_vertices) { + if (this->max_vertices) { + this->max_vertices->merge_qualifier(q.max_vertices); + } else { + this->max_vertices = q.max_vertices; + } + } + + if (q.flags.q.subroutine_def) { + if (this->flags.q.subroutine_def) { _mesa_glsl_error(loc, state, - "geometry shader set conflicting max_vertices " - "(%d and %d)", this->max_vertices, q.max_vertices); - return false; + "conflicting subroutine qualifiers used"); + } else { + this->subroutine_list = q.subroutine_list; } - this->max_vertices = q.max_vertices; } if (q.flags.q.invocations) { - if (this->flags.q.invocations && this->invocations != q.invocations) { - _mesa_glsl_error(loc, state, - "geometry shader set conflicting invocations " - "(%d and %d)", this->invocations, q.invocations); - return false; + if (this->invocations) { + this->invocations->merge_qualifier(q.invocations); + } else { + this->invocations = q.invocations; } - this->invocations = q.invocations; } if (state->stage == MESA_SHADER_GEOMETRY && state->has_explicit_attrib_stream()) { - if (q.flags.q.stream && q.stream >= state->ctx->Const.MaxVertexStreams) { - _mesa_glsl_error(loc, state, - "`stream' value is larger than MAX_VERTEX_STREAMS - 1 " - "(%d > %d)", - q.stream, state->ctx->Const.MaxVertexStreams - 1); - } - if (this->flags.q.explicit_stream && - this->stream >= state->ctx->Const.MaxVertexStreams) { - _mesa_glsl_error(loc, state, - "`stream' value is larger than MAX_VERTEX_STREAMS - 1 " - "(%d > %d)", - this->stream, state->ctx->Const.MaxVertexStreams - 1); - } - if (!this->flags.q.explicit_stream) { if (q.flags.q.stream) { this->flags.q.stream = 1; @@ -222,14 +216,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, } if (q.flags.q.vertices) { - if (this->flags.q.vertices && this->vertices != q.vertices) { - _mesa_glsl_error(loc, state, - "tessellation control shader set conflicting " - "vertices (%d and %d)", - this->vertices, q.vertices); - return false; + if (this->vertices) { + this->vertices->merge_qualifier(q.vertices); + } else { + this->vertices = q.vertices; } - this->vertices = q.vertices; } if (q.flags.q.vertex_spacing) { @@ -266,15 +257,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, for (int i = 0; i < 3; i++) { if (q.flags.q.local_size & (1 << i)) { - if ((this->flags.q.local_size & (1 << i)) && - this->local_size[i] != q.local_size[i]) { - _mesa_glsl_error(loc, state, - "compute shader set conflicting values for " - "local_size_%c (%d and %d)", 'x' + i, - this->local_size[i], q.local_size[i]); - return false; + if (this->local_size[i]) { + this->local_size[i]->merge_qualifier(q.local_size[i]); + } else { + this->local_size[i] = q.local_size[i]; } - this->local_size[i] = q.local_size[i]; } } @@ -313,7 +300,7 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc, const bool r = this->merge_qualifier(loc, state, q); if (state->stage == MESA_SHADER_TESS_CTRL) { - node = new(mem_ctx) ast_tcs_output_layout(*loc, q.vertices); + node = new(mem_ctx) ast_tcs_output_layout(*loc); } return r; @@ -417,15 +404,13 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, state->in_qualifier->prim_type = q.prim_type; } - if (this->flags.q.invocations && - q.flags.q.invocations && - this->invocations != q.invocations) { - _mesa_glsl_error(loc, state, - "conflicting invocations counts specified"); - return false; - } else if (q.flags.q.invocations) { + if (q.flags.q.invocations) { this->flags.q.invocations = 1; - this->invocations = q.invocations; + if (this->invocations) { + this->invocations->merge_qualifier(q.invocations); + } else { + this->invocations = q.invocations; + } } if (q.flags.q.early_fragment_tests) { @@ -468,15 +453,67 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, if (create_gs_ast) { node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type); } else if (create_cs_ast) { - /* Infer a local_size of 1 for every unspecified dimension */ - unsigned local_size[3]; - for (int i = 0; i < 3; i++) { - if (q.flags.q.local_size & (1 << i)) - local_size[i] = q.local_size[i]; - else - local_size[i] = 1; + node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size); + } + + return true; +} + +bool +ast_layout_expression::process_qualifier_constant(struct _mesa_glsl_parse_state *state, + const char *qual_indentifier, + unsigned *value, + bool can_be_zero) +{ + int min_value = 0; + bool first_pass = true; + *value = 0; + + if (!can_be_zero) + min_value = 1; + + for (exec_node *node = layout_const_expressions.head; + !node->is_tail_sentinel(); node = node->next) { + + exec_list dummy_instructions; + ast_node *const_expression = exec_node_data(ast_node, node, link); + + ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); + + ir_constant *const const_int = ir->constant_expression_value(); + if (const_int == NULL || !const_int->type->is_integer()) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s must be an integral constant " + "expression", qual_indentifier); + return false; + } + + if (const_int->value.i[0] < min_value) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s layout qualifier is invalid " + "(%d < %d)", qual_indentifier, + const_int->value.i[0], min_value); + return false; } - node = new(mem_ctx) ast_cs_input_layout(*loc, local_size); + + if (!first_pass && *value != const_int->value.u[0]) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s layout qualifier does not " + "match previous declaration (%d vs %d)", + qual_indentifier, *value, const_int->value.i[0]); + return false; + } else { + first_pass = false; + *value = const_int->value.u[0]; + } + + /* If the location is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the location isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); } return true; diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index 13494446b59..881ee2b6b55 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -290,6 +290,20 @@ texture_multisample_array(const _mesa_glsl_parse_state *state) } static bool +texture_samples_identical(const _mesa_glsl_parse_state *state) +{ + return texture_multisample(state) && + state->EXT_shader_samples_identical_enable; +} + +static bool +texture_samples_identical_array(const _mesa_glsl_parse_state *state) +{ + return texture_multisample_array(state) && + state->EXT_shader_samples_identical_enable; +} + +static bool fs_texture_cube_map_array(const _mesa_glsl_parse_state *state) { return state->stage == MESA_SHADER_FRAGMENT && @@ -724,6 +738,7 @@ private: BA2(textureQueryLod); B1(textureQueryLevels); + BA2(textureSamplesIdentical); B1(dFdx); B1(dFdy); B1(fwidth); @@ -2210,6 +2225,16 @@ builtin_builder::create_builtins() NULL); + add_function("textureSamplesIdenticalEXT", + _textureSamplesIdentical(texture_samples_identical, glsl_type::sampler2DMS_type, glsl_type::ivec2_type), + _textureSamplesIdentical(texture_samples_identical, glsl_type::isampler2DMS_type, glsl_type::ivec2_type), + _textureSamplesIdentical(texture_samples_identical, glsl_type::usampler2DMS_type, glsl_type::ivec2_type), + + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), + NULL); + add_function("texture1D", _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), @@ -3573,7 +3598,16 @@ builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type ir_constant_data infinities; for (int i = 0; i < type->vector_elements; i++) { - infinities.f[i] = INFINITY; + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + infinities.f[i] = INFINITY; + break; + case GLSL_TYPE_DOUBLE: + infinities.d[i] = INFINITY; + break; + default: + unreachable("unknown type"); + } } body.emit(ret(equal(abs(x), imm(type, infinities)))); @@ -4675,6 +4709,25 @@ builtin_builder::_textureQueryLevels(const glsl_type *sampler_type) return sig; } +ir_function_signature * +builtin_builder::_textureSamplesIdentical(builtin_available_predicate avail, + const glsl_type *sampler_type, + const glsl_type *coord_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + ir_variable *P = in_var(coord_type, "P"); + const glsl_type *return_type = glsl_type::bool_type; + MAKE_SIG(return_type, avail, 2, s, P); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_samples_identical); + tex->coordinate = var_ref(P); + tex->set_sampler(var_ref(s), return_type); + + body.emit(ret(tex)); + + return sig; +} + UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives) UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control) UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control) @@ -5243,8 +5296,8 @@ builtin_builder::_image_size_prototype(const glsl_type *image_type, ir_function_signature * builtin_builder::_image_samples_prototype(const glsl_type *image_type, - unsigned num_arguments, - unsigned flags) + unsigned /* num_arguments */, + unsigned /* flags */) { ir_variable *image = in_var(image_type, "image"); ir_function_signature *sig = diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp index b06c1bc5c12..e8eab808a19 100644 --- a/src/glsl/builtin_variables.cpp +++ b/src/glsl/builtin_variables.cpp @@ -327,6 +327,7 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type, this->fields[this->num_fields].centroid = 0; this->fields[this->num_fields].sample = 0; this->fields[this->num_fields].patch = 0; + this->fields[this->num_fields].precision = GLSL_PRECISION_NONE; this->num_fields++; } @@ -376,6 +377,11 @@ private: return add_variable(name, type, ir_var_shader_out, slot); } + ir_variable *add_index_output(int slot, int index, const glsl_type *type, const char *name) + { + return add_index_variable(name, type, ir_var_shader_out, slot, index); + } + ir_variable *add_system_value(int slot, const glsl_type *type, const char *name) { @@ -384,6 +390,8 @@ private: ir_variable *add_variable(const char *name, const glsl_type *type, enum ir_variable_mode mode, int slot); + ir_variable *add_index_variable(const char *name, const glsl_type *type, + enum ir_variable_mode mode, int slot, int index); ir_variable *add_uniform(const glsl_type *type, const char *name); ir_variable *add_const(const char *name, int value); ir_variable *add_const_ivec3(const char *name, int x, int y, int z); @@ -429,6 +437,46 @@ builtin_variable_generator::builtin_variable_generator( { } +ir_variable * +builtin_variable_generator::add_index_variable(const char *name, + const glsl_type *type, + enum ir_variable_mode mode, int slot, int index) +{ + ir_variable *var = new(symtab) ir_variable(type, name, mode); + var->data.how_declared = ir_var_declared_implicitly; + + switch (var->data.mode) { + case ir_var_auto: + case ir_var_shader_in: + case ir_var_uniform: + case ir_var_system_value: + var->data.read_only = true; + break; + case ir_var_shader_out: + case ir_var_shader_storage: + break; + default: + /* The only variables that are added using this function should be + * uniforms, shader storage, shader inputs, and shader outputs, constants + * (which use ir_var_auto), and system values. + */ + assert(0); + break; + } + + var->data.location = slot; + var->data.explicit_location = (slot >= 0); + var->data.explicit_index = 1; + var->data.index = index; + + /* Once the variable is created an initialized, add it to the symbol table + * and add the declaration to the IR stream. + */ + instructions->push_tail(var); + + symtab->add_variable(var); + return var; +} ir_variable * builtin_variable_generator::add_variable(const char *name, @@ -580,6 +628,14 @@ builtin_variable_generator::generate_constants() add_const("gl_MaxVaryingVectors", state->ctx->Const.MaxVarying); } + + /* EXT_blend_func_extended brings a built in constant + * for determining number of dual source draw buffers + */ + if (state->EXT_blend_func_extended_enable) { + add_const("gl_MaxDualSourceDrawBuffersEXT", + state->Const.MaxDualSourceDrawBuffers); + } } else { add_const("gl_MaxVertexUniformComponents", state->Const.MaxVertexUniformComponents); @@ -1016,6 +1072,19 @@ builtin_variable_generator::generate_fs_special_vars() array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData"); } + if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) { + /* We make an assumption here that there will only ever be one dual-source draw buffer + * In case this assumption is ever proven to be false, make sure to assert here + * since we don't handle this case. + * In practice, this issue will never arise since no hardware will support it. + */ + assert(state->Const.MaxDualSourceDrawBuffers <= 1); + add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, "gl_SecondaryFragColorEXT"); + add_index_output(FRAG_RESULT_DATA0, 1, + array(vec4_t, state->Const.MaxDualSourceDrawBuffers), + "gl_SecondaryFragDataEXT"); + } + /* gl_FragDepth has always been in desktop GLSL, but did not appear in GLSL * ES 1.00. */ @@ -1186,6 +1255,7 @@ builtin_variable_generator::generate_varyings() var->data.centroid = fields[i].centroid; var->data.sample = fields[i].sample; var->data.patch = fields[i].patch; + var->data.precision = fields[i].precision; var->init_interface_type(per_vertex_out_type); } } diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y index 6aa7abec00e..2fd4cf04079 100644 --- a/src/glsl/glcpp/glcpp-parse.y +++ b/src/glsl/glcpp/glcpp-parse.y @@ -2384,6 +2384,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "GL_OES_standard_derivatives", 1); if (extensions->ARB_texture_multisample) add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1); + if (extensions->ARB_blend_func_extended) + add_builtin_define(parser, "GL_EXT_blend_func_extended", 1); } } else { add_builtin_define(parser, "GL_ARB_draw_buffers", 1); @@ -2510,6 +2512,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions != NULL) { if (extensions->EXT_shader_integer_mix) add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1); + + if (extensions->EXT_shader_samples_identical) + add_builtin_define(parser, "GL_EXT_shader_samples_identical", 1); } if (version >= 150) diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index adf6a05acce..5a8f98019d1 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -298,7 +298,6 @@ static bool match_layout_qualifier(const char *s1, const char *s2, %type <node> conditionopt %type <node> for_init_statement %type <for_rest_statement> for_rest_statement -%type <n> integer_constant %type <node> layout_defaults %right THEN ELSE @@ -1152,11 +1151,6 @@ layout_qualifier_id_list: } ; -integer_constant: - INTCONSTANT { $$ = $1; } - | UINTCONSTANT { $$ = $1; } - ; - layout_qualifier_id: any_identifier { @@ -1453,9 +1447,18 @@ layout_qualifier_id: YYERROR; } } - | any_identifier '=' integer_constant + | any_identifier '=' constant_expression { memset(& $$, 0, sizeof($$)); + void *ctx = state; + + if ($3->oper != ast_int_constant && + $3->oper != ast_uint_constant && + !state->has_enhanced_layouts()) { + _mesa_glsl_error(& @1, state, + "compile-time constant expressions require " + "GLSL 4.40 or ARB_enhanced_layouts"); + } if (match_layout_qualifier("location", $1, state) == 0) { $$.flags.q.explicit_location = 1; @@ -1466,24 +1469,17 @@ layout_qualifier_id: "GL_ARB_explicit_attrib_location layout " "identifier `%s' used", $1); } - - if ($3 >= 0) { - $$.location = $3; - } else { - _mesa_glsl_error(& @3, state, "invalid location %d specified", $3); - YYERROR; - } + $$.location = $3; } if (match_layout_qualifier("index", $1, state) == 0) { - $$.flags.q.explicit_index = 1; - - if ($3 >= 0) { - $$.index = $3; - } else { - _mesa_glsl_error(& @3, state, "invalid index %d specified", $3); + if (state->es_shader && !state->EXT_blend_func_extended_enable) { + _mesa_glsl_error(& @3, state, "index layout qualifier requires EXT_blend_func_extended"); YYERROR; } + + $$.flags.q.explicit_index = 1; + $$.index = $3; } if ((state->has_420pack() || @@ -1502,18 +1498,11 @@ layout_qualifier_id: if (match_layout_qualifier("max_vertices", $1, state) == 0) { $$.flags.q.max_vertices = 1; - - if ($3 < 0) { + $$.max_vertices = new(ctx) ast_layout_expression(@1, $3); + if (!state->is_version(150, 0)) { _mesa_glsl_error(& @3, state, - "invalid max_vertices %d specified", $3); - YYERROR; - } else { - $$.max_vertices = $3; - if (!state->is_version(150, 0)) { - _mesa_glsl_error(& @3, state, - "#version 150 max_vertices qualifier " - "specified", $3); - } + "#version 150 max_vertices qualifier " + "specified", $3); } } @@ -1521,15 +1510,8 @@ layout_qualifier_id: if (match_layout_qualifier("stream", $1, state) == 0 && state->check_explicit_attrib_stream_allowed(& @3)) { $$.flags.q.stream = 1; - - if ($3 < 0) { - _mesa_glsl_error(& @3, state, - "invalid stream %d specified", $3); - YYERROR; - } else { - $$.flags.q.explicit_stream = 1; - $$.stream = $3; - } + $$.flags.q.explicit_stream = 1; + $$.stream = $3; } } @@ -1541,12 +1523,7 @@ layout_qualifier_id: for (int i = 0; i < 3; i++) { if (match_layout_qualifier(local_size_qualifiers[i], $1, state) == 0) { - if ($3 <= 0) { - _mesa_glsl_error(& @3, state, - "invalid %s of %d specified", - local_size_qualifiers[i], $3); - YYERROR; - } else if (!state->has_compute_shader()) { + if (!state->has_compute_shader()) { _mesa_glsl_error(& @3, state, "%s qualifier requires GLSL 4.30 or " "GLSL ES 3.10 or ARB_compute_shader", @@ -1554,7 +1531,7 @@ layout_qualifier_id: YYERROR; } else { $$.flags.q.local_size |= (1 << i); - $$.local_size[i] = $3; + $$.local_size[i] = new(ctx) ast_layout_expression(@1, $3); } break; } @@ -1562,48 +1539,24 @@ layout_qualifier_id: if (match_layout_qualifier("invocations", $1, state) == 0) { $$.flags.q.invocations = 1; - - if ($3 <= 0) { + $$.invocations = new(ctx) ast_layout_expression(@1, $3); + if (!state->is_version(400, 0) && + !state->ARB_gpu_shader5_enable) { _mesa_glsl_error(& @3, state, - "invalid invocations %d specified", $3); - YYERROR; - } else if ($3 > MAX_GEOMETRY_SHADER_INVOCATIONS) { - _mesa_glsl_error(& @3, state, - "invocations (%d) exceeds " - "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", $3); - YYERROR; - } else { - $$.invocations = $3; - if (!state->is_version(400, 0) && - !state->ARB_gpu_shader5_enable) { - _mesa_glsl_error(& @3, state, - "GL_ARB_gpu_shader5 invocations " - "qualifier specified", $3); - } + "GL_ARB_gpu_shader5 invocations " + "qualifier specified", $3); } } /* Layout qualifiers for tessellation control shaders. */ if (match_layout_qualifier("vertices", $1, state) == 0) { $$.flags.q.vertices = 1; - - if ($3 <= 0) { - _mesa_glsl_error(& @3, state, - "invalid vertices (%d) specified", $3); - YYERROR; - } else if ($3 > (int)state->Const.MaxPatchVertices) { - _mesa_glsl_error(& @3, state, - "vertices (%d) exceeds " - "GL_MAX_PATCH_VERTICES", $3); - YYERROR; - } else { - $$.vertices = $3; - if (!state->ARB_tessellation_shader_enable && - !state->is_version(400, 0)) { - _mesa_glsl_error(& @1, state, - "vertices qualifier requires GLSL 4.00 or " - "ARB_tessellation_shader"); - } + $$.vertices = new(ctx) ast_layout_expression(@1, $3); + if (!state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "vertices qualifier requires GLSL 4.00 or " + "ARB_tessellation_shader"); } } diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 02584c62a4d..b41b64af2c1 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -104,6 +104,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers; + this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers; + /* 1.50 constants */ this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents; @@ -646,9 +648,11 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(AMD_shader_trinary_minmax, true, false, dummy_true), EXT(AMD_vertex_shader_layer, true, false, AMD_vertex_shader_layer), EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index), + EXT(EXT_blend_func_extended, false, true, ARB_blend_func_extended), EXT(EXT_draw_buffers, false, true, dummy_true), EXT(EXT_separate_shader_objects, false, true, dummy_true), EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix), + EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical), EXT(EXT_texture_array, true, false, EXT_texture_array), }; @@ -1646,8 +1650,20 @@ set_shader_inout_layout(struct gl_shader *shader, switch (shader->Stage) { case MESA_SHADER_TESS_CTRL: shader->TessCtrl.VerticesOut = 0; - if (state->tcs_output_vertices_specified) - shader->TessCtrl.VerticesOut = state->out_qualifier->vertices; + if (state->tcs_output_vertices_specified) { + unsigned vertices; + if (state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", &vertices, + false)) { + + YYLTYPE loc = state->out_qualifier->vertices->get_location(); + if (vertices > state->Const.MaxPatchVertices) { + _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " + "GL_MAX_PATCH_VERTICES", vertices); + } + shader->TessCtrl.VerticesOut = vertices; + } + } break; case MESA_SHADER_TESS_EVAL: shader->TessEval.PrimitiveMode = PRIM_UNKNOWN; @@ -1668,8 +1684,14 @@ set_shader_inout_layout(struct gl_shader *shader, break; case MESA_SHADER_GEOMETRY: shader->Geom.VerticesOut = 0; - if (state->out_qualifier->flags.q.max_vertices) - shader->Geom.VerticesOut = state->out_qualifier->max_vertices; + if (state->out_qualifier->flags.q.max_vertices) { + unsigned qual_max_vertices; + if (state->out_qualifier->max_vertices-> + process_qualifier_constant(state, "max_vertices", + &qual_max_vertices, true)) { + shader->Geom.VerticesOut = qual_max_vertices; + } + } if (state->gs_input_prim_type_specified) { shader->Geom.InputType = state->in_qualifier->prim_type; @@ -1684,8 +1706,22 @@ set_shader_inout_layout(struct gl_shader *shader, } shader->Geom.Invocations = 0; - if (state->in_qualifier->flags.q.invocations) - shader->Geom.Invocations = state->in_qualifier->invocations; + if (state->in_qualifier->flags.q.invocations) { + unsigned invocations; + if (state->in_qualifier->invocations-> + process_qualifier_constant(state, "invocations", + &invocations, false)) { + + YYLTYPE loc = state->in_qualifier->invocations->get_location(); + if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) { + _mesa_glsl_error(&loc, state, + "invocations (%d) exceeds " + "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", + invocations); + } + shader->Geom.Invocations = invocations; + } + } break; case MESA_SHADER_COMPUTE: @@ -1797,6 +1833,9 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, if (shader->InfoLog) ralloc_free(shader->InfoLog); + if (!state->error) + set_shader_inout_layout(shader, state); + shader->symbols = new(shader->ir) glsl_symbol_table; shader->CompileStatus = !state->error; shader->InfoLog = state->info_log; @@ -1804,9 +1843,6 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, shader->IsES = state->es_shader; shader->uses_builtin_functions = state->uses_builtin_functions; - if (!state->error) - set_shader_inout_layout(shader, state); - /* Retain any live IR, but trash the rest. */ reparent_ir(shader->ir, shader->ir); diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 1d8c1b8799f..17ff0b5af79 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -380,6 +380,9 @@ struct _mesa_glsl_parse_state { /* ARB_draw_buffers */ unsigned MaxDrawBuffers; + /* ARB_blend_func_extended */ + unsigned MaxDualSourceDrawBuffers; + /* 3.00 ES */ int MinProgramTexelOffset; int MaxProgramTexelOffset; @@ -595,12 +598,16 @@ struct _mesa_glsl_parse_state { bool AMD_vertex_shader_layer_warn; bool AMD_vertex_shader_viewport_index_enable; bool AMD_vertex_shader_viewport_index_warn; + bool EXT_blend_func_extended_enable; + bool EXT_blend_func_extended_warn; bool EXT_draw_buffers_enable; bool EXT_draw_buffers_warn; bool EXT_separate_shader_objects_enable; bool EXT_separate_shader_objects_warn; bool EXT_shader_integer_mix_enable; bool EXT_shader_integer_mix_warn; + bool EXT_shader_samples_identical_enable; + bool EXT_shader_samples_identical_warn; bool EXT_texture_array_enable; bool EXT_texture_array_warn; /*@}*/ diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 8933b230177..ca520f547a1 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -1421,12 +1421,11 @@ ir_dereference::is_lvalue() const } -static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples" }; +static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" }; const char *ir_texture::opcode_string() { - assert((unsigned int) op <= - sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0])); + assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs)); return tex_opcode_strs[op]; } @@ -1456,6 +1455,10 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type) } else if (this->op == ir_lod) { assert(type->vector_elements == 2); assert(type->base_type == GLSL_TYPE_FLOAT); + } else if (this->op == ir_samples_identical) { + assert(type == glsl_type::bool_type); + assert(sampler->type->base_type == GLSL_TYPE_SAMPLER); + assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS); } else { assert(sampler->type->sampler_type == (int) type->base_type); if (sampler->type->sampler_shadow) @@ -1676,6 +1679,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name, this->data.interpolation = INTERP_QUALIFIER_NONE; this->data.max_array_access = 0; this->data.atomic.offset = 0; + this->data.precision = GLSL_PRECISION_NONE; this->data.image_read_only = false; this->data.image_write_only = false; this->data.image_coherent = false; @@ -1842,6 +1846,7 @@ ir_function_signature::replace_parameters(exec_list *new_params) ir_function::ir_function(const char *name) : ir_instruction(ir_type_function) { + this->subroutine_index = -1; this->name = ralloc_strdup(this, name); } diff --git a/src/glsl/ir.h b/src/glsl/ir.h index d59dee1e369..e1109eec1d3 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1171,6 +1171,8 @@ public: */ int num_subroutine_types; const struct glsl_type **subroutine_types; + + int subroutine_index; }; inline const char *ir_function_signature::function_name() const @@ -1965,6 +1967,7 @@ enum ir_texture_opcode { ir_tg4, /**< Texture gather */ ir_query_levels, /**< Texture levels query */ ir_texture_samples, /**< Texture samples query */ + ir_samples_identical, /**< Query whether all samples are definitely identical. */ }; @@ -1991,6 +1994,7 @@ enum ir_texture_opcode { * (lod <type> <sampler> <coordinate>) * (tg4 <type> <sampler> <coordinate> <offset> <component>) * (query_levels <type> <sampler>) + * (samples_identical <sampler> <coordinate>) */ class ir_texture : public ir_rvalue { public: diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp index d6b06eeec87..2aef4fcb4ac 100644 --- a/src/glsl/ir_clone.cpp +++ b/src/glsl/ir_clone.cpp @@ -223,6 +223,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const case ir_lod: case ir_query_levels: case ir_texture_samples: + case ir_samples_identical: break; case ir_txb: new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht); @@ -269,6 +270,7 @@ ir_function::clone(void *mem_ctx, struct hash_table *ht) const ir_function *copy = new(mem_ctx) ir_function(this->name); copy->is_subroutine = this->is_subroutine; + copy->subroutine_index = this->subroutine_index; copy->num_subroutine_types = this->num_subroutine_types; copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types); for (int i = 0; i < copy->num_subroutine_types; i++) diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp index 5f0785e0ece..b86f4ea16bb 100644 --- a/src/glsl/ir_equals.cpp +++ b/src/glsl/ir_equals.cpp @@ -58,8 +58,13 @@ ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const return false; for (unsigned i = 0; i < type->components(); i++) { - if (value.u[i] != other->value.u[i]) - return false; + if (type->base_type == GLSL_TYPE_DOUBLE) { + if (value.d[i] != other->value.d[i]) + return false; + } else { + if (value.u[i] != other->value.u[i]) + return false; + } } return true; @@ -152,6 +157,7 @@ ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const case ir_lod: case ir_query_levels: case ir_texture_samples: + case ir_samples_identical: break; case ir_txb: if (!lod_info.bias->equals(other->lod_info.bias, ignore)) diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp index 6495cc4581d..213992af28c 100644 --- a/src/glsl/ir_hv_accept.cpp +++ b/src/glsl/ir_hv_accept.cpp @@ -195,6 +195,7 @@ ir_texture::accept(ir_hierarchical_visitor *v) case ir_lod: case ir_query_levels: case ir_texture_samples: + case ir_samples_identical: break; case ir_txb: s = this->lod_info.bias->accept(v); diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp index 42b03fdea52..fd7bc2eea98 100644 --- a/src/glsl/ir_print_visitor.cpp +++ b/src/glsl/ir_print_visitor.cpp @@ -268,6 +268,14 @@ void ir_print_visitor::visit(ir_texture *ir) { fprintf(f, "(%s ", ir->opcode_string()); + if (ir->op == ir_samples_identical) { + ir->sampler->accept(this); + fprintf(f, " "); + ir->coordinate->accept(this); + fprintf(f, ")"); + return; + } + print_type(f, ir->type); fprintf(f, " "); @@ -334,6 +342,8 @@ void ir_print_visitor::visit(ir_texture *ir) case ir_tg4: ir->lod_info.component->accept(this); break; + case ir_samples_identical: + unreachable(!"ir_samples_identical was already handled"); }; fprintf(f, ")"); } diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp index a6966f546bc..6486838b8b8 100644 --- a/src/glsl/ir_rvalue_visitor.cpp +++ b/src/glsl/ir_rvalue_visitor.cpp @@ -59,6 +59,7 @@ ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir) case ir_lod: case ir_query_levels: case ir_texture_samples: + case ir_samples_identical: break; case ir_txb: handle_rvalue(&ir->lod_info.bias); diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index 7e77a675db1..c0b4b3e820c 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -766,7 +766,7 @@ public: gl_shader_stage consumer_stage); ~varying_matches(); void record(ir_variable *producer_var, ir_variable *consumer_var); - unsigned assign_locations(); + unsigned assign_locations(uint64_t reserved_slots); void store_locations() const; private: @@ -986,7 +986,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) * passed to varying_matches::record(). */ unsigned -varying_matches::assign_locations() +varying_matches::assign_locations(uint64_t reserved_slots) { /* Sort varying matches into an order that makes them easy to pack. */ qsort(this->matches, this->num_matches, sizeof(*this->matches), @@ -1013,6 +1013,10 @@ varying_matches::assign_locations() != this->matches[i].packing_class) { *location = ALIGN(*location, 4); } + while ((*location < MAX_VARYING * 4u) && + (reserved_slots & (1u << *location / 4u))) { + *location = ALIGN(*location + 1, 4); + } this->matches[i].generic_location = *location; @@ -1376,6 +1380,38 @@ canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) } /** + * Generate a bitfield map of the explicit locations for shader varyings. + * + * In theory a 32 bits value will be enough but a 64 bits value is future proof. + */ +uint64_t +reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode) +{ + assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); + assert(MAX_VARYING <= 64); /* avoid an overflow of the returned value */ + + uint64_t slots = 0; + int var_slot; + + if (!stage) + return slots; + + foreach_in_list(ir_instruction, node, stage->ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode || !var->data.explicit_location) + continue; + + var_slot = var->data.location - VARYING_SLOT_VAR0; + if (var_slot >= 0 && var_slot < MAX_VARYING) + slots |= 1u << var_slot; + } + + return slots; +} + + +/** * Assign locations for all variables that are produced in one pipeline stage * (the "producer") and consumed in the next stage (the "consumer"). * @@ -1550,7 +1586,11 @@ assign_varying_locations(struct gl_context *ctx, matches.record(matched_candidate->toplevel_var, NULL); } - const unsigned slots_used = matches.assign_locations(); + const uint64_t reserved_slots = + reserved_varying_slot(producer, ir_var_shader_out) | + reserved_varying_slot(consumer, ir_var_shader_in); + + const unsigned slots_used = matches.assign_locations(reserved_slots); matches.store_locations(); for (unsigned i = 0; i < num_tfeedback_decls; ++i) { diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index db00f8febc6..331d9a28007 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -3864,10 +3864,43 @@ link_assign_subroutine_types(struct gl_shader_program *prog) sh->SubroutineFunctions[sh->NumSubroutineFunctions].types = ralloc_array(sh, const struct glsl_type *, fn->num_subroutine_types); + + /* From Section 4.4.4(Subroutine Function Layout Qualifiers) of the + * GLSL 4.5 spec: + * + * "Each subroutine with an index qualifier in the shader must be + * given a unique index, otherwise a compile or link error will be + * generated." + */ + for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { + if (sh->SubroutineFunctions[j].index != -1 && + sh->SubroutineFunctions[j].index == fn->subroutine_index) { + linker_error(prog, "each subroutine index qualifier in the " + "shader must be unique\n"); + return; + } + } + sh->SubroutineFunctions[sh->NumSubroutineFunctions].index = + fn->subroutine_index; + for (int j = 0; j < fn->num_subroutine_types; j++) sh->SubroutineFunctions[sh->NumSubroutineFunctions].types[j] = fn->subroutine_types[j]; sh->NumSubroutineFunctions++; } + + /* Assign index for subroutines without an explicit index*/ + int index = 0; + for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { + while (sh->SubroutineFunctions[j].index == -1) { + for (unsigned k = 0; k < sh->NumSubroutineFunctions; k++) { + if (sh->SubroutineFunctions[k].index == index) + break; + else if (k == sh->NumSubroutineFunctions - 1) + sh->SubroutineFunctions[j].index = index; + } + index++; + } + } } } diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index d8df3544f10..a26300d1d26 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -31,6 +31,7 @@ #include "ir_visitor.h" #include "ir_hierarchical_visitor.h" #include "ir.h" +#include "main/imports.h" /* * pass to lower GLSL IR to NIR @@ -147,16 +148,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, nir_lower_outputs_to_temporaries(shader); - /* TODO: Use _mesa_fls instead */ - unsigned num_textures = 0; - for (unsigned i = 0; i < 8 * sizeof(sh->Program->SamplersUsed); i++) - if (sh->Program->SamplersUsed & (1 << i)) - num_textures = i; - shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); if (shader_prog->Label) shader->info.label = ralloc_strdup(shader, shader_prog->Label); - shader->info.num_textures = num_textures; + shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed); shader->info.num_ubos = sh->NumUniformBlocks; shader->info.num_abos = shader_prog->NumAtomicBuffers; shader->info.num_ssbos = sh->NumShaderStorageBlocks; @@ -174,6 +169,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, shader_prog->TransformFeedback.NumVarying > 0; switch (stage) { + case MESA_SHADER_TESS_CTRL: + shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut; + break; + case MESA_SHADER_GEOMETRY: shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn; shader->info.gs.output_primitive = sh->Geom.OutputType; @@ -244,6 +243,8 @@ constant_copy(ir_constant *ir, void *mem_ctx) unsigned total_elems = ir->type->components(); unsigned i; + + ret->num_elements = 0; switch (ir->type->base_type) { case GLSL_TYPE_UINT: for (i = 0; i < total_elems; i++) @@ -268,6 +269,8 @@ constant_copy(ir_constant *ir, void *mem_ctx) case GLSL_TYPE_STRUCT: ret->elements = ralloc_array(mem_ctx, nir_constant *, ir->type->length); + ret->num_elements = ir->type->length; + i = 0; foreach_in_list(ir_constant, field, &ir->components) { ret->elements[i] = constant_copy(field, mem_ctx); @@ -278,6 +281,7 @@ constant_copy(ir_constant *ir, void *mem_ctx) case GLSL_TYPE_ARRAY: ret->elements = ralloc_array(mem_ctx, nir_constant *, ir->type->length); + ret->num_elements = ir->type->length; for (i = 0; i < ir->type->length; i++) ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx); @@ -297,15 +301,6 @@ nir_visitor::visit(ir_variable *ir) var->type = ir->type; var->name = ralloc_strdup(var, ir->name); - if (ir->is_interface_instance() && ir->get_max_ifc_array_access() != NULL) { - unsigned size = ir->get_interface_type()->length; - var->max_ifc_array_access = ralloc_array(var, unsigned, size); - memcpy(var->max_ifc_array_access, ir->get_max_ifc_array_access(), - size * sizeof(unsigned)); - } else { - var->max_ifc_array_access = NULL; - } - var->data.read_only = ir->data.read_only; var->data.centroid = ir->data.centroid; var->data.sample = ir->data.sample; @@ -1543,9 +1538,9 @@ nir_visitor::visit(ir_expression *ir) result = supports_ints ? nir_ior(&b, srcs[0], srcs[1]) : nir_for(&b, srcs[0], srcs[1]); break; - case ir_binop_logic_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break; - result = supports_ints ? nir_ior(&b, srcs[0], srcs[1]) - : nir_for(&b, srcs[0], srcs[1]); + case ir_binop_logic_xor: + result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1]) + : nir_fxor(&b, srcs[0], srcs[1]); break; case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break; case ir_binop_rshift: @@ -1808,6 +1803,11 @@ nir_visitor::visit(ir_texture *ir) num_srcs = 0; break; + case ir_samples_identical: + op = nir_texop_samples_identical; + num_srcs = 1; /* coordinate */ + break; + default: unreachable("not reached"); } @@ -1835,8 +1835,9 @@ nir_visitor::visit(ir_texture *ir) case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; + case GLSL_TYPE_BOOL: case GLSL_TYPE_UINT: - instr->dest_type = nir_type_unsigned; + instr->dest_type = nir_type_uint; break; default: unreachable("not reached"); diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp index 3e9d38f7707..64b5c0cb106 100644 --- a/src/glsl/nir/glsl_types.cpp +++ b/src/glsl/nir/glsl_types.cpp @@ -130,6 +130,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, this->fields.structure[i].image_coherent = fields[i].image_coherent; this->fields.structure[i].image_volatile = fields[i].image_volatile; this->fields.structure[i].image_restrict = fields[i].image_restrict; + this->fields.structure[i].precision = fields[i].precision; } mtx_unlock(&glsl_type::mutex); diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h index 14c2aa49f85..1aafa5cd547 100644 --- a/src/glsl/nir/glsl_types.h +++ b/src/glsl/nir/glsl_types.h @@ -858,7 +858,7 @@ struct glsl_struct_field { /** * Precision qualifier */ - unsigned precision; + unsigned precision:2; /** * Image qualifiers, applicable to buffer variables defined in shader @@ -873,7 +873,8 @@ struct glsl_struct_field { #ifdef __cplusplus glsl_struct_field(const struct glsl_type *_type, const char *_name) : type(_type), name(_name), location(-1), interpolation(0), centroid(0), - sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0) + sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), + precision(GLSL_PRECISION_NONE) { /* empty */ } diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 3157ff82d99..79df6d3df94 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -107,6 +107,10 @@ void nir_shader_add_variable(nir_shader *shader, nir_variable *var) { switch (var->data.mode) { + case nir_var_all: + assert(!"invalid mode"); + break; + case nir_var_local: assert(!"nir_shader_add_variable cannot be used for local variables"); break; @@ -312,6 +316,14 @@ nir_block_create(nir_shader *shader) block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, _mesa_key_pointer_equal); block->imm_dom = NULL; + /* XXX maybe it would be worth it to defer allocation? This + * way it doesn't get allocated for shader ref's that never run + * nir_calc_dominance? For example, state-tracker creates an + * initial IR, clones that, runs appropriate lowering pass, passes + * to driver which does common lowering/opt, and then stores ref + * which is later used to do state specific lowering and futher + * opt. Do any of the references not need dominance metadata? + */ block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -1306,21 +1318,62 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src) { assert(!new_src.is_ssa || def != new_src.ssa); - nir_foreach_use_safe(def, use_src) { - nir_instr *src_parent_instr = use_src->parent_instr; - list_del(&use_src->use_link); - nir_src_copy(use_src, &new_src, src_parent_instr); - src_add_all_uses(use_src, src_parent_instr, NULL); - } + nir_foreach_use_safe(def, use_src) + nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); + + nir_foreach_if_use_safe(def, use_src) + nir_if_rewrite_condition(use_src->parent_if, new_src); +} + +static bool +is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between) +{ + assert(start->block == end->block); + + if (between->block != start->block) + return false; + + /* Search backwards looking for "between" */ + while (start != end) { + if (between == end) + return true; - nir_foreach_if_use_safe(def, use_src) { - nir_if *src_parent_if = use_src->parent_if; - list_del(&use_src->use_link); - nir_src_copy(use_src, &new_src, src_parent_if); - src_add_all_uses(use_src, NULL, src_parent_if); + end = nir_instr_prev(end); + assert(end); } + + return false; } +/* Replaces all uses of the given SSA def with the given source but only if + * the use comes after the after_me instruction. This can be useful if you + * are emitting code to fix up the result of some instruction: you can freely + * use the result in that code and then call rewrite_uses_after and pass the + * last fixup instruction as after_me and it will replace all of the uses you + * want without touching the fixup code. + * + * This function assumes that after_me is in the same block as + * def->parent_instr and that after_me comes after def->parent_instr. + */ +void +nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, + nir_instr *after_me) +{ + assert(!new_src.is_ssa || def != new_src.ssa); + + nir_foreach_use_safe(def, use_src) { + assert(use_src->parent_instr != def->parent_instr); + /* Since def already dominates all of its uses, the only way a use can + * not be dominated by after_me is if it is between def and after_me in + * the instruction list. + */ + if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr)) + nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); + } + + nir_foreach_if_use_safe(def, use_src) + nir_if_rewrite_condition(use_src->parent_if, new_src); +} static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, bool reverse, void *state); @@ -1571,6 +1624,8 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_tess_level_inner; case SYSTEM_VALUE_VERTICES_IN: return nir_intrinsic_load_patch_vertices_in; + case SYSTEM_VALUE_HELPER_INVOCATION: + return nir_intrinsic_load_helper_invocation; default: unreachable("system value does not directly correspond to intrinsic"); } @@ -1614,6 +1669,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_TESS_LEVEL_INNER; case nir_intrinsic_load_patch_vertices_in: return SYSTEM_VALUE_VERTICES_IN; + case nir_intrinsic_load_helper_invocation: + return SYSTEM_VALUE_HELPER_INVOCATION; default: unreachable("intrinsic doesn't produce a system value"); } diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index df0e6f1f54a..b7374e17407 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -82,6 +82,7 @@ typedef struct { } nir_state_slot; typedef enum { + nir_var_all = -1, nir_var_shader_in, nir_var_shader_out, nir_var_global, @@ -111,6 +112,11 @@ typedef struct nir_constant { */ union nir_constant_data value; + /* we could get this from the var->type but makes clone *much* easier to + * not have to care about the type. + */ + unsigned num_elements; + /* Array elements / Structure Fields */ struct nir_constant **elements; } nir_constant; @@ -147,19 +153,6 @@ typedef struct { */ char *name; - /** - * For variables which satisfy the is_interface_instance() predicate, this - * points to an array of integers such that if the ith member of the - * interface block is an array, max_ifc_array_access[i] is the maximum - * array element of that member that has been accessed. If the ith member - * of the interface block is not an array, max_ifc_array_access[i] is - * unused. - * - * For variables whose type is not an interface block, this pointer is - * NULL. - */ - unsigned *max_ifc_array_access; - struct nir_variable_data { /** @@ -654,7 +647,7 @@ typedef enum { nir_type_invalid = 0, /* Not a valid type */ nir_type_float, nir_type_int, - nir_type_unsigned, + nir_type_uint, nir_type_bool } nir_alu_type; @@ -977,6 +970,9 @@ typedef enum { nir_texop_tg4, /**< Texture gather */ nir_texop_query_levels, /**< Texture levels query */ nir_texop_texture_samples, /**< Texture samples query */ + nir_texop_samples_identical, /**< Query whether all samples are definitely + * identical. + */ } nir_texop; typedef struct { @@ -1069,6 +1065,7 @@ nir_tex_instr_dest_size(nir_tex_instr *instr) case nir_texop_texture_samples: case nir_texop_query_levels: + case nir_texop_samples_identical: return 1; default: @@ -1079,6 +1076,31 @@ nir_tex_instr_dest_size(nir_tex_instr *instr) } } +/* Returns true if this texture operation queries something about the texture + * rather than actually sampling it. + */ +static inline bool +nir_tex_instr_is_query(nir_tex_instr *instr) +{ + switch (instr->op) { + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_texture_samples: + case nir_texop_query_levels: + return true; + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_tg4: + return false; + default: + unreachable("Invalid texture opcode"); + } +} + static inline unsigned nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src) { @@ -1353,6 +1375,7 @@ typedef enum { nir_metadata_block_index = 0x1, nir_metadata_dominance = 0x2, nir_metadata_live_ssa_defs = 0x4, + nir_metadata_not_properly_reset = 0x8, } nir_metadata; typedef struct { @@ -1578,6 +1601,11 @@ typedef struct nir_shader_info { struct { unsigned local_size[3]; } cs; + + struct { + /** The number of vertices in the TCS output patch. */ + unsigned vertices_out; + } tcs; }; } nir_shader_info; @@ -1910,6 +1938,8 @@ void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, unsigned num_components, const char *name); void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src); +void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, + nir_instr *after_me); /* visits basic blocks in source-code order */ typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state); @@ -1937,10 +1967,16 @@ void nir_index_blocks(nir_function_impl *impl); void nir_print_shader(nir_shader *shader, FILE *fp); void nir_print_instr(const nir_instr *instr, FILE *fp); +nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s); + #ifdef DEBUG void nir_validate_shader(nir_shader *shader); +void nir_metadata_set_validation_flag(nir_shader *shader); +void nir_metadata_check_validation_flag(nir_shader *shader); #else static inline void nir_validate_shader(nir_shader *shader) { (void) shader; } +static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } +static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } #endif /* DEBUG */ void nir_calc_dominance_impl(nir_function_impl *impl); @@ -2032,9 +2068,22 @@ typedef struct nir_lower_tex_options { unsigned saturate_s; unsigned saturate_t; unsigned saturate_r; + + /* Bitmask of samplers that need swizzling. + * + * If (swizzle_result & (1 << sampler_index)), then the swizzle in + * swizzles[sampler_index] is applied to the result of the texturing + * operation. + */ + unsigned swizzle_result; + + /* A swizzle for each sampler. Values 0-3 represent x, y, z, or w swizzles + * while 4 and 5 represent 0 and 1 respectively. + */ + uint8_t swizzles[32][4]; } nir_lower_tex_options; -void nir_lower_tex(nir_shader *shader, +bool nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options); void nir_lower_idiv(nir_shader *shader); diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 205aa067b0b..fe41c74b608 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -256,7 +256,7 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], { nir_alu_src alu_src = { NIR_SRC_INIT }; alu_src.src = nir_src_for_ssa(src); - for (int i = 0; i < 4; i++) + for (unsigned i = 0; i < num_components; i++) alu_src.swizzle[i] = swiz[i]; return use_fmov ? nir_fmov_alu(build, alu_src, num_components) : @@ -290,6 +290,8 @@ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c) /** * Turns a nir_src into a nir_ssa_def * so it can be passed to * nir_build_alu()-based builder calls. + * + * See nir_ssa_for_alu_src() for alu instructions. */ static inline nir_ssa_def * nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) @@ -305,6 +307,25 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) return nir_imov_alu(build, alu, num_components); } +/** + * Similar to nir_ssa_for_src(), but for alu src's, respecting the + * nir_alu_src's swizzle. + */ +static inline nir_ssa_def * +nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn) +{ + static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 }; + nir_alu_src *src = &instr->src[srcn]; + unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn); + + if (src->src.is_ssa && (src->src.ssa->num_components == num_components) && + !src->abs && !src->negate && + (memcmp(src->swizzle, trivial_swizzle, num_components) == 0)) + return src->src.ssa; + + return nir_imov_alu(build, *src, num_components); +} + static inline nir_ssa_def * nir_load_var(nir_builder *build, nir_variable *var) { diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c new file mode 100644 index 00000000000..68b72ef5381 --- /dev/null +++ b/src/glsl/nir/nir_clone.c @@ -0,0 +1,674 @@ +/* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_control_flow_private.h" + +/* Secret Decoder Ring: + * clone_foo(): + * Allocate and clone a foo. + * __clone_foo(): + * Clone body of foo (ie. parent class, embedded struct, etc) + */ + +typedef struct { + /* maps orig ptr -> cloned ptr: */ + struct hash_table *ptr_table; + + /* List of phi sources. */ + struct list_head phi_srcs; + + /* new shader object, used as memctx for just about everything else: */ + nir_shader *ns; +} clone_state; + +static void +init_clone_state(clone_state *state) +{ + state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + list_inithead(&state->phi_srcs); +} + +static void +free_clone_state(clone_state *state) +{ + _mesa_hash_table_destroy(state->ptr_table, NULL); +} + +static void * +lookup_ptr(clone_state *state, const void *ptr) +{ + struct hash_entry *entry; + + if (!ptr) + return NULL; + + entry = _mesa_hash_table_search(state->ptr_table, ptr); + assert(entry && "Failed to find pointer!"); + if (!entry) + return NULL; + + return entry->data; +} + +static void +store_ptr(clone_state *state, void *nptr, const void *ptr) +{ + _mesa_hash_table_insert(state->ptr_table, ptr, nptr); +} + +static nir_constant * +clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar) +{ + nir_constant *nc = ralloc(nvar, nir_constant); + + nc->value = c->value; + nc->num_elements = c->num_elements; + nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements); + for (unsigned i = 0; i < c->num_elements; i++) { + nc->elements[i] = clone_constant(state, c->elements[i], nvar); + } + + return nc; +} + +/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid + * having to deal with locals and globals separately: + */ +static nir_variable * +clone_variable(clone_state *state, const nir_variable *var) +{ + nir_variable *nvar = rzalloc(state->ns, nir_variable); + store_ptr(state, nvar, var); + + nvar->type = var->type; + nvar->name = ralloc_strdup(nvar, var->name); + nvar->data = var->data; + nvar->num_state_slots = var->num_state_slots; + nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots); + memcpy(nvar->state_slots, var->state_slots, + var->num_state_slots * sizeof(nir_state_slot)); + if (var->constant_initializer) { + nvar->constant_initializer = + clone_constant(state, var->constant_initializer, nvar); + } + nvar->interface_type = var->interface_type; + + return nvar; +} + +/* clone list of nir_variable: */ +static void +clone_var_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) +{ + exec_list_make_empty(dst); + foreach_list_typed(nir_variable, var, node, list) { + nir_variable *nvar = clone_variable(state, var); + exec_list_push_tail(dst, &nvar->node); + } +} + +/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create() + * to avoid having to deal with locals and globals separately: + */ +static nir_register * +clone_register(clone_state *state, const nir_register *reg) +{ + nir_register *nreg = rzalloc(state->ns, nir_register); + store_ptr(state, nreg, reg); + + nreg->num_components = reg->num_components; + nreg->num_array_elems = reg->num_array_elems; + nreg->index = reg->index; + nreg->name = ralloc_strdup(nreg, reg->name); + nreg->is_global = reg->is_global; + nreg->is_packed = reg->is_packed; + + /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */ + list_inithead(&nreg->uses); + list_inithead(&nreg->defs); + list_inithead(&nreg->if_uses); + + return nreg; +} + +/* clone list of nir_register: */ +static void +clone_reg_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) +{ + exec_list_make_empty(dst); + foreach_list_typed(nir_register, reg, node, list) { + nir_register *nreg = clone_register(state, reg); + exec_list_push_tail(dst, &nreg->node); + } +} + +static void +__clone_src(clone_state *state, void *ninstr_or_if, + nir_src *nsrc, const nir_src *src) +{ + nsrc->is_ssa = src->is_ssa; + if (src->is_ssa) { + nsrc->ssa = lookup_ptr(state, src->ssa); + } else { + nsrc->reg.reg = lookup_ptr(state, src->reg.reg); + if (src->reg.indirect) { + nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); + __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); + } + nsrc->reg.base_offset = src->reg.base_offset; + } +} + +static void +__clone_dst(clone_state *state, nir_instr *ninstr, + nir_dest *ndst, const nir_dest *dst) +{ + ndst->is_ssa = dst->is_ssa; + if (dst->is_ssa) { + nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name); + store_ptr(state, &ndst->ssa, &dst->ssa); + } else { + ndst->reg.reg = lookup_ptr(state, dst->reg.reg); + if (dst->reg.indirect) { + ndst->reg.indirect = ralloc(ninstr, nir_src); + __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); + } + ndst->reg.base_offset = dst->reg.base_offset; + } +} + +static nir_deref *clone_deref(clone_state *state, const nir_deref *deref, + nir_instr *ninstr, nir_deref *parent); + +static nir_deref_var * +clone_deref_var(clone_state *state, const nir_deref_var *dvar, + nir_instr *ninstr) +{ + nir_variable *nvar = lookup_ptr(state, dvar->var); + nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar); + + if (dvar->deref.child) + ndvar->deref.child = clone_deref(state, dvar->deref.child, + ninstr, &ndvar->deref); + + return ndvar; +} + +static nir_deref_array * +clone_deref_array(clone_state *state, const nir_deref_array *darr, + nir_instr *ninstr, nir_deref *parent) +{ + nir_deref_array *ndarr = nir_deref_array_create(parent); + + ndarr->deref.type = darr->deref.type; + if (darr->deref.child) + ndarr->deref.child = clone_deref(state, darr->deref.child, + ninstr, &ndarr->deref); + + ndarr->deref_array_type = darr->deref_array_type; + ndarr->base_offset = darr->base_offset; + if (ndarr->deref_array_type == nir_deref_array_type_indirect) + __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect); + + return ndarr; +} + +static nir_deref_struct * +clone_deref_struct(clone_state *state, const nir_deref_struct *dstr, + nir_instr *ninstr, nir_deref *parent) +{ + nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index); + + ndstr->deref.type = dstr->deref.type; + if (dstr->deref.child) + ndstr->deref.child = clone_deref(state, dstr->deref.child, + ninstr, &ndstr->deref); + + return ndstr; +} + +static nir_deref * +clone_deref(clone_state *state, const nir_deref *dref, + nir_instr *ninstr, nir_deref *parent) +{ + switch (dref->deref_type) { + case nir_deref_type_array: + return &clone_deref_array(state, nir_deref_as_array(dref), + ninstr, parent)->deref; + case nir_deref_type_struct: + return &clone_deref_struct(state, nir_deref_as_struct(dref), + ninstr, parent)->deref; + default: + unreachable("bad deref type"); + return NULL; + } +} + +static nir_alu_instr * +clone_alu(clone_state *state, const nir_alu_instr *alu) +{ + nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op); + + __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest); + nalu->dest.saturate = alu->dest.saturate; + nalu->dest.write_mask = alu->dest.write_mask; + + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src); + nalu->src[i].negate = alu->src[i].negate; + nalu->src[i].abs = alu->src[i].abs; + memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, + sizeof(nalu->src[i].swizzle)); + } + + return nalu; +} + +static nir_intrinsic_instr * +clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr) +{ + nir_intrinsic_instr *nitr = + nir_intrinsic_instr_create(state->ns, itr->intrinsic); + + unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables; + unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs; + + if (nir_intrinsic_infos[itr->intrinsic].has_dest) + __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest); + + nitr->num_components = itr->num_components; + memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index)); + + for (unsigned i = 0; i < num_variables; i++) { + nitr->variables[i] = clone_deref_var(state, itr->variables[i], + &nitr->instr); + } + + for (unsigned i = 0; i < num_srcs; i++) + __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]); + + return nitr; +} + +static nir_load_const_instr * +clone_load_const(clone_state *state, const nir_load_const_instr *lc) +{ + nir_load_const_instr *nlc = + nir_load_const_instr_create(state->ns, lc->def.num_components); + + memcpy(&nlc->value, &lc->value, sizeof(nlc->value)); + + store_ptr(state, &nlc->def, &lc->def); + + return nlc; +} + +static nir_ssa_undef_instr * +clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa) +{ + nir_ssa_undef_instr *nsa = + nir_ssa_undef_instr_create(state->ns, sa->def.num_components); + + store_ptr(state, &nsa->def, &sa->def); + + return nsa; +} + +static nir_tex_instr * +clone_tex(clone_state *state, const nir_tex_instr *tex) +{ + nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs); + + ntex->sampler_dim = tex->sampler_dim; + ntex->dest_type = tex->dest_type; + ntex->op = tex->op; + __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest); + for (unsigned i = 0; i < ntex->num_srcs; i++) { + ntex->src[i].src_type = tex->src[i].src_type; + __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src); + } + ntex->coord_components = tex->coord_components; + ntex->is_array = tex->is_array; + ntex->is_shadow = tex->is_shadow; + ntex->is_new_style_shadow = tex->is_new_style_shadow; + memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset)); + ntex->component = tex->component; + ntex->texture_index = tex->texture_index; + ntex->texture_array_size = tex->texture_array_size; + if (tex->texture) + ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr); + ntex->sampler_index = tex->sampler_index; + if (tex->sampler) + ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr); + + return ntex; +} + +static nir_phi_instr * +clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk) +{ + nir_phi_instr *nphi = nir_phi_instr_create(state->ns); + + __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest); + + /* Cloning a phi node is a bit different from other instructions. The + * sources of phi instructions are the only time where we can use an SSA + * def before it is defined. In order to handle this, we just copy over + * the sources from the old phi instruction directly and then fix them up + * in a second pass once all the instrutions in the function have been + * properly cloned. + * + * In order to ensure that the copied sources (which are the same as the + * old phi instruction's sources for now) don't get inserted into the old + * shader's use-def lists, we have to add the phi instruction *before* we + * set up its sources. + */ + nir_instr_insert_after_block(nblk, &nphi->instr); + + foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { + nir_phi_src *nsrc = ralloc(nphi, nir_phi_src); + + /* Just copy the old source for now. */ + memcpy(nsrc, src, sizeof(*src)); + + /* Since we're not letting nir_insert_instr handle use/def stuff for us, + * we have to set the parent_instr manually. It doesn't really matter + * when we do it, so we might as well do it here. + */ + nsrc->src.parent_instr = &nphi->instr; + + /* Stash it in the list of phi sources. We'll walk this list and fix up + * sources at the very end of clone_function_impl. + */ + list_add(&nsrc->src.use_link, &state->phi_srcs); + + exec_list_push_tail(&nphi->srcs, &nsrc->node); + } + + return nphi; +} + +static nir_jump_instr * +clone_jump(clone_state *state, const nir_jump_instr *jmp) +{ + nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type); + + return njmp; +} + +static nir_call_instr * +clone_call(clone_state *state, const nir_call_instr *call) +{ + nir_function_overload *ncallee = lookup_ptr(state, call->callee); + nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee); + + for (unsigned i = 0; i < ncall->num_params; i++) + ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr); + + ncall->return_deref = clone_deref_var(state, call->return_deref, + &ncall->instr); + + return ncall; +} + +static nir_instr * +clone_instr(clone_state *state, const nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + return &clone_alu(state, nir_instr_as_alu(instr))->instr; + case nir_instr_type_intrinsic: + return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr; + case nir_instr_type_load_const: + return &clone_load_const(state, nir_instr_as_load_const(instr))->instr; + case nir_instr_type_ssa_undef: + return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr; + case nir_instr_type_tex: + return &clone_tex(state, nir_instr_as_tex(instr))->instr; + case nir_instr_type_phi: + unreachable("Cannot clone phis with clone_instr"); + case nir_instr_type_jump: + return &clone_jump(state, nir_instr_as_jump(instr))->instr; + case nir_instr_type_call: + return &clone_call(state, nir_instr_as_call(instr))->instr; + case nir_instr_type_parallel_copy: + unreachable("Cannot clone parallel copies"); + default: + unreachable("bad instr type"); + return NULL; + } +} + +static nir_block * +clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk) +{ + /* Don't actually create a new block. Just use the one from the tail of + * the list. NIR guarantees that the tail of the list is a block and that + * no two blocks are side-by-side in the IR; It should be empty. + */ + nir_block *nblk = + exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); + assert(nblk->cf_node.type == nir_cf_node_block); + assert(exec_list_is_empty(&nblk->instr_list)); + + /* We need this for phi sources */ + store_ptr(state, nblk, blk); + + nir_foreach_instr(blk, instr) { + if (instr->type == nir_instr_type_phi) { + /* Phi instructions are a bit of a special case when cloning because + * we don't want inserting the instruction to automatically handle + * use/defs for us. Instead, we need to wait until all the + * blocks/instructions are in so that we can set their sources up. + */ + clone_phi(state, nir_instr_as_phi(instr), nblk); + } else { + nir_instr *ninstr = clone_instr(state, instr); + nir_instr_insert_after_block(nblk, ninstr); + } + } + + return nblk; +} + +static void +clone_cf_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list); + +static nir_if * +clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i) +{ + nir_if *ni = nir_if_create(state->ns); + + __clone_src(state, ni, &ni->condition, &i->condition); + + nir_cf_node_insert_end(cf_list, &ni->cf_node); + + clone_cf_list(state, &ni->then_list, &i->then_list); + clone_cf_list(state, &ni->else_list, &i->else_list); + + return ni; +} + +static nir_loop * +clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop) +{ + nir_loop *nloop = nir_loop_create(state->ns); + + nir_cf_node_insert_end(cf_list, &nloop->cf_node); + + clone_cf_list(state, &nloop->body, &loop->body); + + return nloop; +} + +/* clone list of nir_cf_node: */ +static void +clone_cf_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, cf, node, list) { + switch (cf->type) { + case nir_cf_node_block: + clone_block(state, dst, nir_cf_node_as_block(cf)); + break; + case nir_cf_node_if: + clone_if(state, dst, nir_cf_node_as_if(cf)); + break; + case nir_cf_node_loop: + clone_loop(state, dst, nir_cf_node_as_loop(cf)); + break; + default: + unreachable("bad cf type"); + } + } +} + +static nir_function_impl * +clone_function_impl(clone_state *state, const nir_function_impl *fi, + nir_function_overload *nfo) +{ + nir_function_impl *nfi = nir_function_impl_create(nfo); + + clone_var_list(state, &nfi->locals, &fi->locals); + clone_reg_list(state, &nfi->registers, &fi->registers); + nfi->reg_alloc = fi->reg_alloc; + + nfi->num_params = fi->num_params; + nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params); + for (unsigned i = 0; i < fi->num_params; i++) { + nfi->params[i] = lookup_ptr(state, fi->params[i]); + } + nfi->return_var = lookup_ptr(state, fi->return_var); + + assert(list_empty(&state->phi_srcs)); + + clone_cf_list(state, &nfi->body, &fi->body); + + /* After we've cloned almost everything, we have to walk the list of phi + * sources and fix them up. Thanks to loops, the block and SSA value for a + * phi source may not be defined when we first encounter it. Instead, we + * add it to the phi_srcs list and we fix it up here. + */ + list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { + src->pred = lookup_ptr(state, src->pred); + assert(src->src.is_ssa); + src->src.ssa = lookup_ptr(state, src->src.ssa); + + /* Remove from this list and place in the uses of the SSA def */ + list_del(&src->src.use_link); + list_addtail(&src->src.use_link, &src->src.ssa->uses); + } + assert(list_empty(&state->phi_srcs)); + + /* All metadata is invalidated in the cloning process */ + nfi->valid_metadata = 0; + + return nfi; +} + +static nir_function_overload * +clone_function_overload(clone_state *state, const nir_function_overload *fo, + nir_function *nfxn) +{ + nir_function_overload *nfo = nir_function_overload_create(nfxn); + + /* Needed for call instructions */ + store_ptr(state, nfo, fo); + + nfo->num_params = fo->num_params; + nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params); + memcpy(nfo->params, fo->params, sizeof(nir_parameter) * fo->num_params); + + nfo->return_type = fo->return_type; + + /* At first glance, it looks like we should clone the function_impl here. + * However, call instructions need to be able to reference at least the + * overload and those will get processed as we clone the function_impl's. + * We stop here and do function_impls as a second pass. + */ + + return nfo; +} + +static nir_function * +clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns) +{ + assert(ns == state->ns); + nir_function *nfxn = nir_function_create(ns, fxn->name); + + foreach_list_typed(nir_function_overload, fo, node, &fxn->overload_list) + clone_function_overload(state, fo, nfxn); + + return nfxn; +} + +nir_shader * +nir_shader_clone(void *mem_ctx, const nir_shader *s) +{ + clone_state state; + init_clone_state(&state); + + nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options); + state.ns = ns; + + clone_var_list(&state, &ns->uniforms, &s->uniforms); + clone_var_list(&state, &ns->inputs, &s->inputs); + clone_var_list(&state, &ns->outputs, &s->outputs); + clone_var_list(&state, &ns->globals, &s->globals); + clone_var_list(&state, &ns->system_values, &s->system_values); + + /* Go through and clone functions and overloads */ + foreach_list_typed(nir_function, fxn, node, &s->functions) + clone_function(&state, fxn, ns); + + /* Only after all overloads are cloned can we clone the actual function + * implementations. This is because nir_call_instr's need to reference the + * overloads of other functions and we don't know what order the functions + * will have in the list. + */ + nir_foreach_overload(s, fo) { + nir_function_overload *nfo = lookup_ptr(&state, fo); + clone_function_impl(&state, fo->impl, nfo); + } + + clone_reg_list(&state, &ns->registers, &s->registers); + ns->reg_alloc = s->reg_alloc; + + ns->info = s->info; + ns->info.name = ralloc_strdup(ns, ns->info.name); + if (ns->info.label) + ns->info.label = ralloc_strdup(ns, ns->info.label); + + ns->num_inputs = s->num_inputs; + ns->num_uniforms = s->num_uniforms; + ns->num_outputs = s->num_outputs; + + free_clone_state(&state); + + return ns; +} diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py index 2ba8554645d..b16ef503c92 100644 --- a/src/glsl/nir/nir_constant_expressions.py +++ b/src/glsl/nir/nir_constant_expressions.py @@ -213,7 +213,7 @@ unpack_half_1x16(uint16_t u) } /* Some typed vector structures to make things like src0.y work */ -% for type in ["float", "int", "unsigned", "bool"]: +% for type in ["float", "int", "uint", "bool"]: struct ${type}_vec { ${type} x; ${type} y; diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 0a134aff211..de30db61eea 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -244,6 +244,7 @@ SYSTEM_VALUE(local_invocation_id, 3, 0) SYSTEM_VALUE(work_group_id, 3, 0) SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */ SYSTEM_VALUE(num_work_groups, 3, 0) +SYSTEM_VALUE(helper_invocation, 1, 0) /* * The format of the indices depends on the type of the load. For uniforms, diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c index 31ccfb2c02b..c58c7785b3f 100644 --- a/src/glsl/nir/nir_lower_clip.c +++ b/src/glsl/nir/nir_lower_clip.c @@ -55,9 +55,11 @@ create_clipdist_var(nir_shader *shader, unsigned drvloc, if (output) { exec_list_push_tail(&shader->outputs, &var->node); + shader->num_outputs++; /* TODO use type_size() */ } else { exec_list_push_tail(&shader->inputs, &var->node); + shader->num_inputs++; /* TODO use type_size() */ } return var; } diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c index c961178c53a..f64b3eac8a0 100644 --- a/src/glsl/nir/nir_lower_idiv.c +++ b/src/glsl/nir/nir_lower_idiv.c @@ -52,10 +52,8 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu) bld->cursor = nir_before_instr(&alu->instr); - numer = nir_ssa_for_src(bld, alu->src[0].src, - nir_ssa_alu_instr_src_components(alu, 0)); - denom = nir_ssa_for_src(bld, alu->src[1].src, - nir_ssa_alu_instr_src_components(alu, 1)); + numer = nir_ssa_for_alu_src(bld, alu, 0); + denom = nir_ssa_for_alu_src(bld, alu, 1); if (is_signed) { af = nir_i2f(bld, numer); @@ -96,7 +94,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu) r = nir_imul(bld, q, b); r = nir_isub(bld, a, r); - r = nir_ige(bld, r, b); + r = nir_uge(bld, r, b); r = nir_b2i(bld, r); q = nir_iadd(bld, q, r); diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 00a31458310..5683e69d865 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -197,7 +197,7 @@ nir_lower_io_block(nir_block *block, void *void_state) nir_variable_mode mode = intrin->variables[0]->var->data.mode; - if (state->mode != -1 && state->mode != mode) + if (state->mode != nir_var_all && state->mode != mode) continue; if (mode != nir_var_shader_in && diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c index 8aaa48ab568..93ebf8e78a9 100644 --- a/src/glsl/nir/nir_lower_tex.c +++ b/src/glsl/nir/nir_lower_tex.c @@ -41,6 +41,7 @@ typedef struct { nir_builder b; const nir_lower_tex_options *options; + bool progress; } lower_tex_state; static void @@ -133,6 +134,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex) txs->op = nir_texop_txs; txs->sampler_dim = GLSL_SAMPLER_DIM_RECT; txs->sampler_index = tex->sampler_index; + txs->dest_type = nir_type_int; /* only single src, the lod: */ txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0)); @@ -213,6 +215,66 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) } } +static nir_ssa_def * +get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) +{ + nir_const_value v; + + memset(&v, 0, sizeof(v)); + + if (swizzle_val == 4) { + v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0; + } else { + assert(swizzle_val == 5); + if (type == nir_type_float) + v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0; + else + v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1; + } + + return nir_build_imm(b, 4, v); +} + +static void +swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) +{ + assert(tex->dest.is_ssa); + + b->cursor = nir_after_instr(&tex->instr); + + nir_ssa_def *swizzled; + if (tex->op == nir_texop_tg4) { + if (swizzle[tex->component] < 4) { + /* This one's easy */ + tex->component = swizzle[tex->component]; + return; + } else { + swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]); + } + } else { + assert(nir_tex_instr_dest_size(tex) == 4); + if (swizzle[0] < 4 && swizzle[1] < 4 && + swizzle[2] < 4 && swizzle[3] < 4) { + unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; + /* We have no 0's or 1's, just emit a swizzling MOV */ + swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false); + } else { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < 4; i++) { + if (swizzle[i] < 4) { + srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]); + } else { + srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]); + } + } + swizzled = nir_vec(b, srcs, 4); + } + } + + nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled), + swizzled->parent_instr); +} + static bool nir_lower_tex_block(nir_block *block, void *void_state) { @@ -239,15 +301,28 @@ nir_lower_tex_block(nir_block *block, void *void_state) /* If we are clamping any coords, we must lower projector first * as clamping happens *after* projection: */ - if (lower_txp || sat_mask) + if (lower_txp || sat_mask) { project_src(b, tex); + state->progress = true; + } if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && - state->options->lower_rect) + state->options->lower_rect) { lower_rect(b, tex); + state->progress = true; + } - if (sat_mask) + if (sat_mask) { saturate_src(b, tex, sat_mask); + state->progress = true; + } + + if (((1 << tex->sampler_index) & state->options->swizzle_result) && + !nir_tex_instr_is_query(tex) && + !(tex->is_shadow && tex->is_new_style_shadow)) { + swizzle_result(b, tex, state->options->swizzles[tex->sampler_index]); + state->progress = true; + } } return true; @@ -264,13 +339,17 @@ nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state) nir_metadata_dominance); } -void +bool nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options) { lower_tex_state state; state.options = options; + state.progress = false; + nir_foreach_overload(shader, overload) { if (overload->impl) nir_lower_tex_impl(overload->impl, &state); } + + return state.progress; } diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c index db519bf513b..6995b9d6bc1 100644 --- a/src/glsl/nir/nir_lower_two_sided_color.c +++ b/src/glsl/nir/nir_lower_two_sided_color.c @@ -60,6 +60,8 @@ create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot) exec_list_push_tail(&shader->inputs, &var->node); + shader->num_inputs++; /* TODO use type_size() */ + return var; } diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c index 6de981f430f..d5324b35a78 100644 --- a/src/glsl/nir/nir_metadata.c +++ b/src/glsl/nir/nir_metadata.c @@ -52,3 +52,39 @@ nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved) { impl->valid_metadata &= preserved; } + +#ifdef DEBUG +/** + * Make sure passes properly invalidate metadata (part 1). + * + * Call this before running a pass to set a bogus metadata flag, which will + * only be preserved if the pass forgets to call nir_metadata_preserve(). + */ +void +nir_metadata_set_validation_flag(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) { + overload->impl->valid_metadata |= nir_metadata_not_properly_reset; + } + } +} + +/** + * Make sure passes properly invalidate metadata (part 2). + * + * Call this after a pass makes progress to verify that the bogus metadata set by + * the earlier function was properly thrown away. Note that passes may not call + * nir_metadata_preserve() if they don't actually make any changes at all. + */ +void +nir_metadata_check_validation_flag(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) { + assert(!(overload->impl->valid_metadata & + nir_metadata_not_properly_reset)); + } + } +} +#endif diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index 3c0f1da94af..37d3dfc4588 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -91,7 +91,7 @@ class Opcode(object): tfloat = "float" tint = "int" tbool = "bool" -tunsigned = "unsigned" +tuint = "uint" commutative = "commutative " associative = "associative " @@ -156,7 +156,7 @@ unop("fsqrt", tfloat, "sqrtf(src0)") unop("fexp2", tfloat, "exp2f(src0)") unop("flog2", tfloat, "log2f(src0)") unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. -unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion +unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. # Float-to-boolean conversion unop_convert("f2b", tfloat, tbool, "src0 != 0.0f") @@ -165,7 +165,7 @@ unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") # Int-to-boolean conversion unop_convert("i2b", tint, tbool, "src0 != 0") unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion -unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion. +unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion. unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}") unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}") @@ -205,13 +205,13 @@ unop("fddy_coarse", tfloat, "0.0f") # Floating point pack and unpack operations. def pack_2x16(fmt): - unop_horiz("pack_" + fmt + "_2x16", 1, tunsigned, 2, tfloat, """ + unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """ dst.x = (uint32_t) pack_fmt_1x16(src0.x); dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16; """.replace("fmt", fmt)) def pack_4x8(fmt): - unop_horiz("pack_" + fmt + "_4x8", 1, tunsigned, 4, tfloat, """ + unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """ dst.x = (uint32_t) pack_fmt_1x8(src0.x); dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8; dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16; @@ -219,13 +219,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24; """.replace("fmt", fmt)) def unpack_2x16(fmt): - unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tunsigned, """ + unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """ dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff)); dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16)); """.replace("fmt", fmt)) def unpack_4x8(fmt): - unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tunsigned, """ + unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """ dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff)); dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff)); dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff)); @@ -248,22 +248,22 @@ unpack_2x16("half") # Lowered floating point unpacking operations. -unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tunsigned, +unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint, "unpack_half_1x16((uint16_t)(src0.x & 0xffff))") -unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tunsigned, +unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint, "unpack_half_1x16((uint16_t)(src0.x >> 16))") # Bit operations, part of ARB_gpu_shader5. -unop("bitfield_reverse", tunsigned, """ +unop("bitfield_reverse", tuint, """ /* we're not winning any awards for speed here, but that's ok */ dst = 0; for (unsigned bit = 0; bit < 32; bit++) dst |= ((src0 >> bit) & 1) << (31 - bit); """) -unop("bit_count", tunsigned, """ +unop("bit_count", tuint, """ dst = 0; for (unsigned bit = 0; bit < 32; bit++) { if ((src0 >> bit) & 1) @@ -271,7 +271,7 @@ for (unsigned bit = 0; bit < 32; bit++) { } """) -unop_convert("ufind_msb", tunsigned, tint, """ +unop_convert("ufind_msb", tuint, tint, """ dst = -1; for (int bit = 31; bit > 0; bit--) { if ((src0 >> bit) & 1) { @@ -358,25 +358,25 @@ binop("imul", tint, commutative + associative, "src0 * src1") binop("imul_high", tint, commutative, "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)") # high 32-bits of unsigned integer multiply -binop("umul_high", tunsigned, commutative, +binop("umul_high", tuint, commutative, "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)") binop("fdiv", tfloat, "", "src0 / src1") binop("idiv", tint, "", "src0 / src1") -binop("udiv", tunsigned, "", "src0 / src1") +binop("udiv", tuint, "", "src0 / src1") # returns a boolean representing the carry resulting from the addition of # the two unsigned arguments. -binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0") +binop_convert("uadd_carry", tbool, tuint, commutative, "src0 + src1 < src0") # returns a boolean representing the borrow resulting from the subtraction # of the two unsigned arguments. -binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0") +binop_convert("usub_borrow", tbool, tuint, "", "src1 < src0") binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") -binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1") +binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1") # # Comparisons @@ -393,8 +393,8 @@ binop_compare("ilt", tint, "", "src0 < src1") binop_compare("ige", tint, "", "src0 >= src1") binop_compare("ieq", tint, commutative, "src0 == src1") binop_compare("ine", tint, commutative, "src0 != src1") -binop_compare("ult", tunsigned, "", "src0 < src1") -binop_compare("uge", tunsigned, "", "src0 >= src1") +binop_compare("ult", tuint, "", "src0 < src1") +binop_compare("uge", tuint, "", "src0 >= src1") # integer-aware GLSL-style comparisons that compare floats and ints @@ -425,7 +425,7 @@ binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not E binop("ishl", tint, "", "src0 << src1") binop("ishr", tint, "", "src0 >> src1") -binop("ushr", tunsigned, "", "src0 >> src1") +binop("ushr", tuint, "", "src0 >> src1") # bitwise logic operators # @@ -433,9 +433,9 @@ binop("ushr", tunsigned, "", "src0 >> src1") # integers. -binop("iand", tunsigned, commutative + associative, "src0 & src1") -binop("ior", tunsigned, commutative + associative, "src0 | src1") -binop("ixor", tunsigned, commutative + associative, "src0 ^ src1") +binop("iand", tuint, commutative + associative, "src0 & src1") +binop("ior", tuint, commutative + associative, "src0 | src1") +binop("ixor", tuint, commutative + associative, "src0 ^ src1") # floating point logic operators @@ -463,10 +463,10 @@ opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "", binop("fmin", tfloat, "", "fminf(src0, src1)") binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1") -binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1") +binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1") binop("fmax", tfloat, "", "fmaxf(src0, src1)") binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") -binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0") +binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0") # Saturated vector add for 4 8bit ints. binop("usadd_4x8", tint, commutative + associative, """ @@ -515,10 +515,10 @@ for (int i = 0; i < 32; i += 8) { binop("fpow", tfloat, "", "powf(src0, src1)") -binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat, +binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat, "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)") -binop_convert("bfm", tunsigned, tint, "", """ +binop_convert("bfm", tuint, tint, "", """ int offset = src0, bits = src1; if (offset < 0 || bits < 0 || offset + bits > 32) dst = 0; /* undefined per the spec */ @@ -535,7 +535,7 @@ if (!isnormal(dst)) # Combines the first component of each input to make a 2-component vector. -binop_horiz("vec2", 2, tunsigned, 1, tunsigned, 1, tunsigned, """ +binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """ dst.x = src0.x; dst.y = src1.x; """) @@ -543,9 +543,9 @@ dst.y = src1.x; def triop(name, ty, const_expr): opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr) def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr): - opcode(name, output_size, tunsigned, + opcode(name, output_size, tuint, [src1_size, src2_size, src3_size], - [tunsigned, tunsigned, tunsigned], "", const_expr) + [tuint, tuint, tuint], "", const_expr) triop("ffma", tfloat, "src0 * src1 + src2") @@ -559,11 +559,11 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2") triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2") -opcode("bcsel", 0, tunsigned, [0, 0, 0], - [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2") +opcode("bcsel", 0, tuint, [0, 0, 0], + [tbool, tuint, tuint], "", "src0 ? src1 : src2") -triop("bfi", tunsigned, """ -unsigned mask = src0, insert = src1 & mask, base = src2; +triop("bfi", tuint, """ +unsigned mask = src0, insert = src1, base = src2; if (mask == 0) { dst = base; } else { @@ -572,12 +572,12 @@ if (mask == 0) { tmp >>= 1; insert <<= 1; } - dst = (base & ~mask) | insert; + dst = (base & ~mask) | (insert & mask); } """) -opcode("ubitfield_extract", 0, tunsigned, - [0, 1, 1], [tunsigned, tint, tint], "", """ +opcode("ubitfield_extract", 0, tuint, + [0, 1, 1], [tuint, tint, tint], "", """ unsigned base = src0; int offset = src1.x, bits = src2.x; if (bits == 0) { @@ -611,13 +611,13 @@ dst.z = src2.x; def quadop_horiz(name, output_size, src1_size, src2_size, src3_size, src4_size, const_expr): - opcode(name, output_size, tunsigned, + opcode(name, output_size, tuint, [src1_size, src2_size, src3_size, src4_size], - [tunsigned, tunsigned, tunsigned, tunsigned], + [tuint, tuint, tuint, tuint], "", const_expr) -opcode("bitfield_insert", 0, tunsigned, [0, 0, 1, 1], - [tunsigned, tunsigned, tint, tint], "", """ +opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1], + [tuint, tuint, tint, tint], "", """ unsigned base = src0, insert = src1; int offset = src2.x, bits = src3.x; if (bits == 0) { diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c index 7d8bdd7f2ca..cfc8e331128 100644 --- a/src/glsl/nir/nir_opt_copy_propagate.c +++ b/src/glsl/nir/nir_opt_copy_propagate.c @@ -55,10 +55,15 @@ static bool is_move(nir_alu_instr *instr) static bool is_vec(nir_alu_instr *instr) { - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { if (!instr->src[i].src.is_ssa) return false; + /* we handle modifiers in a separate pass */ + if (instr->src[i].abs || instr->src[i].negate) + return false; + } + return instr->op == nir_op_vec2 || instr->op == nir_op_vec3 || instr->op == nir_op_vec4; diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 2db209d434d..76bfc47c2a0 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -512,7 +512,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_texop_texture_samples: fprintf(fp, "texture_samples "); break; - + case nir_texop_samples_identical: + fprintf(fp, "samples_identical "); + break; default: unreachable("Invalid texture operation"); break; @@ -985,6 +987,16 @@ nir_print_shader(nir_shader *shader, FILE *fp) fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage)); + if (shader->info.name) + fprintf(fp, "name: %s\n", shader->info.name); + + if (shader->info.label) + fprintf(fp, "label: %s\n", shader->info.label); + + fprintf(fp, "inputs: %u\n", shader->num_inputs); + fprintf(fp, "outputs: %u\n", shader->num_outputs); + fprintf(fp, "uniforms: %u\n", shader->num_uniforms); + nir_foreach_variable(var, &shader->uniforms) { print_var_decl(var, &state); } diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c index bb154407914..56d7e8162f3 100644 --- a/src/glsl/nir/nir_search.c +++ b/src/glsl/nir/nir_search.c @@ -166,7 +166,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, } return true; case nir_type_int: - case nir_type_unsigned: + case nir_type_uint: case nir_type_bool: for (unsigned i = 0; i < num_components; ++i) { if (load->value.i[new_swizzle[i]] != const_val->data.i) @@ -310,7 +310,7 @@ construct_value(const nir_search_value *value, nir_alu_type type, load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i); load->value.i[0] = c->data.i; break; - case nir_type_unsigned: + case nir_type_uint: case nir_type_bool: load->value.u[0] = c->data.u; break; diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c index ed374b921fa..06879d64ee2 100644 --- a/src/glsl/nir/nir_validate.c +++ b/src/glsl/nir/nir_validate.c @@ -290,11 +290,11 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state) { assert(instr->op < nir_num_opcodes); - validate_alu_dest(&instr->dest, state); - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { validate_alu_src(instr, i, state); } + + validate_alu_dest(&instr->dest, state); } static void @@ -375,6 +375,11 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) validate_src(&instr->src[i], state); } + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) { + validate_deref_var(instr, instr->variables[i], state); + } + if (nir_intrinsic_infos[instr->intrinsic].has_dest) { unsigned components_written = nir_intrinsic_infos[instr->intrinsic].dest_components; @@ -392,11 +397,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) validate_dest(&instr->dest, state); } - unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; - for (unsigned i = 0; i < num_vars; i++) { - validate_deref_var(instr, instr->variables[i], state); - } - switch (instr->intrinsic) { case nir_intrinsic_load_var: { const struct glsl_type *type = @@ -434,8 +434,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) static void validate_tex_instr(nir_tex_instr *instr, validate_state *state) { - validate_dest(&instr->dest, state); - bool src_type_seen[nir_num_tex_src_types]; for (unsigned i = 0; i < nir_num_tex_src_types; i++) src_type_seen[i] = false; @@ -448,6 +446,8 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state) if (instr->sampler != NULL) validate_deref_var(instr, instr->sampler, state); + + validate_dest(&instr->dest, state); } static void diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 70610ca0f66..86282d25e0a 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2026,7 +2026,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, switch (glsl_get_sampler_result_type(sampler_type)) { case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; - case GLSL_TYPE_UINT: instr->dest_type = nir_type_unsigned; break; + case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; default: unreachable("Invalid base type for sampler result"); diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp index e38a0e93058..cd58213c019 100644 --- a/src/glsl/opt_tree_grafting.cpp +++ b/src/glsl/opt_tree_grafting.cpp @@ -275,6 +275,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir) case ir_lod: case ir_query_levels: case ir_texture_samples: + case ir_samples_identical: break; case ir_txb: if (do_graft(&ir->lod_info.bias)) diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am index e64955e3b3e..00925455b07 100644 --- a/src/glx/Makefile.am +++ b/src/glx/Makefile.am @@ -133,6 +133,8 @@ if HAVE_DRI3 libglx_la_SOURCES += \ dri3_glx.c \ dri3_priv.h + +libglx_la_LIBADD += $(top_builddir)/src/loader/libloader_dri3_helper.la endif if HAVE_APPLEDRI diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c index 96f13e6a07b..ee243126731 100644 --- a/src/glx/dri3_glx.c +++ b/src/glx/dri3_glx.c @@ -78,40 +78,109 @@ #include "loader.h" #include "dri2.h" -static const struct glx_context_vtable dri3_context_vtable; +static struct dri3_drawable * +loader_drawable_to_dri3_drawable(struct loader_dri3_drawable *draw) { + size_t offset = offsetof(struct dri3_drawable, loader_drawable); + return (struct dri3_drawable *)(((void*) draw) - offset); +} -static inline void -dri3_fence_reset(xcb_connection_t *c, struct dri3_buffer *buffer) +static int +glx_dri3_get_swap_interval(struct loader_dri3_drawable *draw) { - xshmfence_reset(buffer->shm_fence); + struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw); + + return priv->swap_interval; } -static inline void -dri3_fence_set(struct dri3_buffer *buffer) +static int +glx_dri3_clamp_swap_interval(struct loader_dri3_drawable *draw, int interval) { - xshmfence_trigger(buffer->shm_fence); + return interval; } -static inline void -dri3_fence_trigger(xcb_connection_t *c, struct dri3_buffer *buffer) +static void +glx_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval) { - xcb_sync_trigger_fence(c, buffer->sync_fence); + struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw); + + priv->swap_interval = interval; } -static inline void -dri3_fence_await(xcb_connection_t *c, struct dri3_buffer *buffer) +static void +glx_dri3_set_drawable_size(struct loader_dri3_drawable *draw, + int width, int height) { - xcb_flush(c); - xshmfence_await(buffer->shm_fence); + /* Nothing to do */ } -static inline Bool -dri3_fence_triggered(struct dri3_buffer *buffer) +static bool +glx_dri3_in_current_context(struct loader_dri3_drawable *draw) { - return xshmfence_query(buffer->shm_fence); + struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw); + struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext(); + struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc; + + return (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base; +} + +static __DRIcontext * +glx_dri3_get_dri_context(struct loader_dri3_drawable *draw) +{ + struct glx_context *gc = __glXGetCurrentContext(); + + if (gc) { + struct dri3_context *dri3Ctx = (struct dri3_context *) gc; + return dri3Ctx->driContext; + } + + return NULL; } static void +glx_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags) +{ + loader_dri3_flush(draw, flags, __DRI2_THROTTLE_SWAPBUFFER); +} + +static void +glx_dri3_show_fps(struct loader_dri3_drawable *draw, uint64_t current_ust) +{ + struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw); + const uint64_t interval = + ((struct dri3_screen *) priv->base.psc)->show_fps_interval; + + if (!interval) + return; + + priv->frames++; + + /* DRI3+Present together uses microseconds for UST. */ + if (priv->previous_ust + interval * 1000000 <= current_ust) { + if (priv->previous_ust) { + fprintf(stderr, "libGL: FPS = %.1f\n", + ((uint64_t) priv->frames * 1000000) / + (double)(current_ust - priv->previous_ust)); + } + priv->frames = 0; + priv->previous_ust = current_ust; + } +} + +static struct loader_dri3_vtable glx_dri3_vtable = { + .get_swap_interval = glx_dri3_get_swap_interval, + .clamp_swap_interval = glx_dri3_clamp_swap_interval, + .set_swap_interval = glx_dri3_set_swap_interval, + .set_drawable_size = glx_dri3_set_drawable_size, + .in_current_context = glx_dri3_in_current_context, + .get_dri_context = glx_dri3_get_dri_context, + .flush_drawable = glx_dri3_flush_drawable, + .show_fps = glx_dri3_show_fps, +}; + + +static const struct glx_context_vtable dri3_context_vtable; + +static void dri3_destroy_context(struct glx_context *context) { struct dri3_context *pcp = (struct dri3_context *) context; @@ -143,7 +212,8 @@ dri3_bind_context(struct glx_context *context, struct glx_context *old, return GLXBadDrawable; if (!(*psc->core->bindContext) (pcp->driContext, - pdraw->driDrawable, pread->driDrawable)) + pdraw->loader_drawable.dri_drawable, + pread->loader_drawable.dri_drawable)) return GLXBadContext; return Success; @@ -265,38 +335,12 @@ dri3_create_context(struct glx_screen *base, } static void -dri3_free_render_buffer(struct dri3_drawable *pdraw, struct dri3_buffer *buffer); - -static void -dri3_update_num_back(struct dri3_drawable *priv) -{ - priv->num_back = 1; - if (priv->flipping) { - if (!priv->is_pixmap && !(priv->present_capabilities & XCB_PRESENT_CAPABILITY_ASYNC)) - priv->num_back++; - priv->num_back++; - } - if (priv->swap_interval == 0) - priv->num_back++; -} - -static void dri3_destroy_drawable(__GLXDRIdrawable *base) { - struct dri3_screen *psc = (struct dri3_screen *) base->psc; struct dri3_drawable *pdraw = (struct dri3_drawable *) base; - xcb_connection_t *c = XGetXCBConnection(pdraw->base.psc->dpy); - int i; - - (*psc->core->destroyDrawable) (pdraw->driDrawable); - for (i = 0; i < DRI3_NUM_BUFFERS; i++) { - if (pdraw->buffers[i]) - dri3_free_render_buffer(pdraw, pdraw->buffers[i]); - } + loader_dri3_drawable_fini(&pdraw->loader_drawable); - if (pdraw->special_event) - xcb_unregister_for_special_event(c, pdraw->special_event); free(pdraw); } @@ -307,7 +351,6 @@ dri3_create_drawable(struct glx_screen *base, XID xDrawable, struct dri3_drawable *pdraw; struct dri3_screen *psc = (struct dri3_screen *) base; __GLXDRIconfigPrivate *config = (__GLXDRIconfigPrivate *) config_base; - GLint vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1; pdraw = calloc(1, sizeof(*pdraw)); if (!pdraw) @@ -317,158 +360,21 @@ dri3_create_drawable(struct glx_screen *base, XID xDrawable, pdraw->base.xDrawable = xDrawable; pdraw->base.drawable = drawable; pdraw->base.psc = &psc->base; - pdraw->swap_interval = 1; /* default may be overridden below */ - pdraw->have_back = 0; - pdraw->have_fake_front = 0; - - if (psc->config) - psc->config->configQueryi(psc->driScreen, - "vblank_mode", &vblank_mode); - - switch (vblank_mode) { - case DRI_CONF_VBLANK_NEVER: - case DRI_CONF_VBLANK_DEF_INTERVAL_0: - pdraw->swap_interval = 0; - break; - case DRI_CONF_VBLANK_DEF_INTERVAL_1: - case DRI_CONF_VBLANK_ALWAYS_SYNC: - default: - pdraw->swap_interval = 1; - break; - } - - dri3_update_num_back(pdraw); (void) __glXInitialize(psc->base.dpy); - /* Create a new drawable */ - pdraw->driDrawable = - (*psc->image_driver->createNewDrawable) (psc->driScreen, - config->driConfig, pdraw); - - if (!pdraw->driDrawable) { + if (loader_dri3_drawable_init(XGetXCBConnection(base->dpy), + xDrawable, psc->driScreen, + psc->is_different_gpu, config->driConfig, + &psc->loader_dri3_ext, &glx_dri3_vtable, + &pdraw->loader_drawable)) { free(pdraw); return NULL; } - /* - * Make sure server has the same swap interval we do for the new - * drawable. - */ - if (psc->vtable.setSwapInterval) - psc->vtable.setSwapInterval(&pdraw->base, pdraw->swap_interval); - return &pdraw->base; } -static void -show_fps(struct dri3_drawable *draw, uint64_t current_ust) -{ - const uint64_t interval = - ((struct dri3_screen *) draw->base.psc)->show_fps_interval; - - draw->frames++; - - /* DRI3+Present together uses microseconds for UST. */ - if (draw->previous_ust + interval * 1000000 <= current_ust) { - if (draw->previous_ust) { - fprintf(stderr, "libGL: FPS = %.1f\n", - ((uint64_t) draw->frames * 1000000) / - (double)(current_ust - draw->previous_ust)); - } - draw->frames = 0; - draw->previous_ust = current_ust; - } -} - -/* - * Process one Present event - */ -static void -dri3_handle_present_event(struct dri3_drawable *priv, xcb_present_generic_event_t *ge) -{ - struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc; - - switch (ge->evtype) { - case XCB_PRESENT_CONFIGURE_NOTIFY: { - xcb_present_configure_notify_event_t *ce = (void *) ge; - - priv->width = ce->width; - priv->height = ce->height; - break; - } - case XCB_PRESENT_COMPLETE_NOTIFY: { - xcb_present_complete_notify_event_t *ce = (void *) ge; - - /* Compute the processed SBC number from the received 32-bit serial number merged - * with the upper 32-bits of the sent 64-bit serial number while checking for - * wrap - */ - if (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) { - priv->recv_sbc = (priv->send_sbc & 0xffffffff00000000LL) | ce->serial; - if (priv->recv_sbc > priv->send_sbc) - priv->recv_sbc -= 0x100000000; - switch (ce->mode) { - case XCB_PRESENT_COMPLETE_MODE_FLIP: - priv->flipping = true; - break; - case XCB_PRESENT_COMPLETE_MODE_COPY: - priv->flipping = false; - break; - } - dri3_update_num_back(priv); - - if (psc->show_fps_interval) - show_fps(priv, ce->ust); - - priv->ust = ce->ust; - priv->msc = ce->msc; - } else { - priv->recv_msc_serial = ce->serial; - priv->notify_ust = ce->ust; - priv->notify_msc = ce->msc; - } - break; - } - case XCB_PRESENT_EVENT_IDLE_NOTIFY: { - xcb_present_idle_notify_event_t *ie = (void *) ge; - int b; - - for (b = 0; b < sizeof (priv->buffers) / sizeof (priv->buffers[0]); b++) { - struct dri3_buffer *buf = priv->buffers[b]; - - if (buf && buf->pixmap == ie->pixmap) { - buf->busy = 0; - if (priv->num_back <= b && b < DRI3_MAX_BACK) { - dri3_free_render_buffer(priv, buf); - priv->buffers[b] = NULL; - } - break; - } - } - break; - } - } - free(ge); -} - -static bool -dri3_wait_for_event(__GLXDRIdrawable *pdraw) -{ - xcb_connection_t *c = XGetXCBConnection(pdraw->psc->dpy); - struct dri3_drawable *priv = (struct dri3_drawable *) pdraw; - xcb_generic_event_t *ev; - xcb_present_generic_event_t *ge; - - xcb_flush(c); - ev = xcb_wait_for_special_event(c, priv->special_event); - if (!ev) - return false; - ge = (void *) ev; - dri3_handle_present_event(priv, ge); - return true; -} - /** dri3_wait_for_msc * * Get the X server to send an event when the target msc/divisor/remainder is @@ -478,32 +384,10 @@ static int dri3_wait_for_msc(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor, int64_t remainder, int64_t *ust, int64_t *msc, int64_t *sbc) { - xcb_connection_t *c = XGetXCBConnection(pdraw->psc->dpy); struct dri3_drawable *priv = (struct dri3_drawable *) pdraw; - uint32_t msc_serial; - - /* Ask for the an event for the target MSC */ - msc_serial = ++priv->send_msc_serial; - xcb_present_notify_msc(c, - priv->base.xDrawable, - msc_serial, - target_msc, - divisor, - remainder); - - xcb_flush(c); - - /* Wait for the event */ - if (priv->special_event) { - while ((int32_t) (msc_serial - priv->recv_msc_serial) > 0) { - if (!dri3_wait_for_event(pdraw)) - return 0; - } - } - *ust = priv->notify_ust; - *msc = priv->notify_msc; - *sbc = priv->recv_sbc; + loader_dri3_wait_for_msc(&priv->loader_drawable, target_msc, divisor, + remainder, ust, msc, sbc); return 1; } @@ -532,101 +416,8 @@ dri3_wait_for_sbc(__GLXDRIdrawable *pdraw, int64_t target_sbc, int64_t *ust, { struct dri3_drawable *priv = (struct dri3_drawable *) pdraw; - /* From the GLX_OML_sync_control spec: - * - * "If <target_sbc> = 0, the function will block until all previous - * swaps requested with glXSwapBuffersMscOML for that window have - * completed." - */ - if (!target_sbc) - target_sbc = priv->send_sbc; - - while (priv->recv_sbc < target_sbc) { - if (!dri3_wait_for_event(pdraw)) - return 0; - } - - *ust = priv->ust; - *msc = priv->msc; - *sbc = priv->recv_sbc; - return 1; -} - -/** - * Asks the driver to flush any queued work necessary for serializing with the - * X command stream, and optionally the slightly more strict requirement of - * glFlush() equivalence (which would require flushing even if nothing had - * been drawn to a window system framebuffer, for example). - */ -static void -dri3_flush(struct dri3_screen *psc, - struct dri3_drawable *draw, - unsigned flags, - enum __DRI2throttleReason throttle_reason) -{ - struct glx_context *gc = __glXGetCurrentContext(); - - if (gc) { - struct dri3_context *dri3Ctx = (struct dri3_context *)gc; - - (*psc->f->flush_with_flags)(dri3Ctx->driContext, draw->driDrawable, flags, throttle_reason); - } -} - -static xcb_gcontext_t -dri3_drawable_gc(struct dri3_drawable *priv) -{ - if (!priv->gc) { - uint32_t v; - xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy); - - v = 0; - xcb_create_gc(c, - (priv->gc = xcb_generate_id(c)), - priv->base.xDrawable, - XCB_GC_GRAPHICS_EXPOSURES, - &v); - } - return priv->gc; -} - -static struct dri3_buffer * -dri3_back_buffer(struct dri3_drawable *priv) -{ - return priv->buffers[DRI3_BACK_ID(priv->cur_back)]; -} - -static struct dri3_buffer * -dri3_fake_front_buffer(struct dri3_drawable *priv) -{ - return priv->buffers[DRI3_FRONT_ID]; -} - -static void -dri3_copy_area (xcb_connection_t *c /**< */, - xcb_drawable_t src_drawable /**< */, - xcb_drawable_t dst_drawable /**< */, - xcb_gcontext_t gc /**< */, - int16_t src_x /**< */, - int16_t src_y /**< */, - int16_t dst_x /**< */, - int16_t dst_y /**< */, - uint16_t width /**< */, - uint16_t height /**< */) -{ - xcb_void_cookie_t cookie; - - cookie = xcb_copy_area_checked(c, - src_drawable, - dst_drawable, - gc, - src_x, - src_y, - dst_x, - dst_y, - width, - height); - xcb_discard_reply(c, cookie.sequence); + return loader_dri3_wait_for_sbc(&priv->loader_drawable, target_sbc, + ust, msc, sbc); } static void @@ -635,144 +426,27 @@ dri3_copy_sub_buffer(__GLXDRIdrawable *pdraw, int x, int y, Bool flush) { struct dri3_drawable *priv = (struct dri3_drawable *) pdraw; - struct dri3_screen *psc = (struct dri3_screen *) pdraw->psc; - struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext(); - xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy); - struct dri3_buffer *back; - - unsigned flags = __DRI2_FLUSH_DRAWABLE; - - /* Check we have the right attachments */ - if (!priv->have_back || priv->is_pixmap) - return; - - if (flush) - flags |= __DRI2_FLUSH_CONTEXT; - dri3_flush(psc, priv, flags, __DRI2_THROTTLE_SWAPBUFFER); - - back = dri3_back_buffer(priv); - y = priv->height - y - height; - - if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) { - /* Update the linear buffer part of the back buffer - * for the dri3_copy_area operation - */ - psc->image->blitImage(pcp->driContext, - back->linear_buffer, - back->image, - 0, 0, back->width, - back->height, - 0, 0, back->width, - back->height, __BLIT_FLAG_FLUSH); - /* We use blitImage to update our fake front, - */ - if (priv->have_fake_front) - psc->image->blitImage(pcp->driContext, - dri3_fake_front_buffer(priv)->image, - back->image, - x, y, width, height, - x, y, width, height, __BLIT_FLAG_FLUSH); - } - - dri3_fence_reset(c, back); - dri3_copy_area(c, - dri3_back_buffer(priv)->pixmap, - priv->base.xDrawable, - dri3_drawable_gc(priv), - x, y, x, y, width, height); - dri3_fence_trigger(c, back); - /* Refresh the fake front (if present) after we just damaged the real - * front. - */ - if (priv->have_fake_front && !psc->is_different_gpu) { - dri3_fence_reset(c, dri3_fake_front_buffer(priv)); - dri3_copy_area(c, - dri3_back_buffer(priv)->pixmap, - dri3_fake_front_buffer(priv)->pixmap, - dri3_drawable_gc(priv), - x, y, x, y, width, height); - dri3_fence_trigger(c, dri3_fake_front_buffer(priv)); - dri3_fence_await(c, dri3_fake_front_buffer(priv)); - } - dri3_fence_await(c, back); -} - -static void -dri3_copy_drawable(struct dri3_drawable *priv, Drawable dest, Drawable src) -{ - struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc; - xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy); - dri3_flush(psc, priv, __DRI2_FLUSH_DRAWABLE, 0); - - dri3_fence_reset(c, dri3_fake_front_buffer(priv)); - dri3_copy_area(c, - src, dest, - dri3_drawable_gc(priv), - 0, 0, 0, 0, priv->width, priv->height); - dri3_fence_trigger(c, dri3_fake_front_buffer(priv)); - dri3_fence_await(c, dri3_fake_front_buffer(priv)); + loader_dri3_copy_sub_buffer(&priv->loader_drawable, x, y, + width, height, flush); } static void dri3_wait_x(struct glx_context *gc) { - struct dri3_context *pcp = (struct dri3_context *) gc; struct dri3_drawable *priv = (struct dri3_drawable *) GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable); - struct dri3_screen *psc; - struct dri3_buffer *front; - if (priv == NULL || !priv->have_fake_front) - return; - - psc = (struct dri3_screen *) priv->base.psc; - front = dri3_fake_front_buffer(priv); - - dri3_copy_drawable(priv, front->pixmap, priv->base.xDrawable); - - /* In the psc->is_different_gpu case, the linear buffer has been updated, - * but not yet the tiled buffer. - * Copy back to the tiled buffer we use for rendering. - * Note that we don't need flushing. - */ - if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) - psc->image->blitImage(pcp->driContext, - front->image, - front->linear_buffer, - 0, 0, front->width, - front->height, - 0, 0, front->width, - front->height, 0); + loader_dri3_wait_x(&priv->loader_drawable); } static void dri3_wait_gl(struct glx_context *gc) { - struct dri3_context *pcp = (struct dri3_context *) gc; struct dri3_drawable *priv = (struct dri3_drawable *) GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable); - struct dri3_screen *psc; - struct dri3_buffer *front; - - if (priv == NULL || !priv->have_fake_front) - return; - - psc = (struct dri3_screen *) priv->base.psc; - front = dri3_fake_front_buffer(priv); - /* In the psc->is_different_gpu case, we update the linear_buffer - * before updating the real front. - */ - if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) - psc->image->blitImage(pcp->driContext, - front->linear_buffer, - front->image, - 0, 0, front->width, - front->height, - 0, 0, front->width, - front->height, __BLIT_FLAG_FLUSH); - dri3_copy_drawable(priv, priv->base.xDrawable, front->pixmap); + loader_dri3_wait_gl(&priv->loader_drawable); } /** @@ -782,8 +456,8 @@ dri3_wait_gl(struct glx_context *gc) static void dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate) { - struct glx_context *gc; - struct dri3_drawable *pdraw = loaderPrivate; + struct loader_dri3_drawable *draw = loaderPrivate; + struct dri3_drawable *pdraw = loader_drawable_to_dri3_drawable(draw); struct dri3_screen *psc; if (!pdraw) @@ -796,699 +470,9 @@ dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate) (void) __glXInitialize(psc->base.dpy); - gc = __glXGetCurrentContext(); + loader_dri3_flush(draw, __DRI2_FLUSH_DRAWABLE, __DRI2_THROTTLE_FLUSHFRONT); - dri3_flush(psc, pdraw, __DRI2_FLUSH_DRAWABLE, __DRI2_THROTTLE_FLUSHFRONT); - - dri3_wait_gl(gc); -} - -static uint32_t -dri3_cpp_for_format(uint32_t format) { - switch (format) { - case __DRI_IMAGE_FORMAT_R8: - return 1; - case __DRI_IMAGE_FORMAT_RGB565: - case __DRI_IMAGE_FORMAT_GR88: - return 2; - case __DRI_IMAGE_FORMAT_XRGB8888: - case __DRI_IMAGE_FORMAT_ARGB8888: - case __DRI_IMAGE_FORMAT_ABGR8888: - case __DRI_IMAGE_FORMAT_XBGR8888: - case __DRI_IMAGE_FORMAT_XRGB2101010: - case __DRI_IMAGE_FORMAT_ARGB2101010: - case __DRI_IMAGE_FORMAT_SARGB8: - return 4; - case __DRI_IMAGE_FORMAT_NONE: - default: - return 0; - } -} - - -/** dri3_alloc_render_buffer - * - * Use the driver createImage function to construct a __DRIimage, then - * get a file descriptor for that and create an X pixmap from that - * - * Allocate an xshmfence for synchronization - */ -static struct dri3_buffer * -dri3_alloc_render_buffer(struct glx_screen *glx_screen, Drawable draw, - unsigned int format, int width, int height, int depth) -{ - struct dri3_screen *psc = (struct dri3_screen *) glx_screen; - Display *dpy = glx_screen->dpy; - struct dri3_buffer *buffer; - __DRIimage *pixmap_buffer; - xcb_connection_t *c = XGetXCBConnection(dpy); - xcb_pixmap_t pixmap; - xcb_sync_fence_t sync_fence; - struct xshmfence *shm_fence; - int buffer_fd, fence_fd; - int stride; - - /* Create an xshmfence object and - * prepare to send that to the X server - */ - - fence_fd = xshmfence_alloc_shm(); - if (fence_fd < 0) { - ErrorMessageF("DRI3 Fence object allocation failure %s\n", strerror(errno)); - return NULL; - } - shm_fence = xshmfence_map_shm(fence_fd); - if (shm_fence == NULL) { - ErrorMessageF("DRI3 Fence object map failure %s\n", strerror(errno)); - goto no_shm_fence; - } - - /* Allocate the image from the driver - */ - buffer = calloc(1, sizeof (struct dri3_buffer)); - if (!buffer) - goto no_buffer; - - buffer->cpp = dri3_cpp_for_format(format); - if (!buffer->cpp) { - ErrorMessageF("DRI3 buffer format %d invalid\n", format); - goto no_image; - } - - if (!psc->is_different_gpu) { - buffer->image = (*psc->image->createImage) (psc->driScreen, - width, height, - format, - __DRI_IMAGE_USE_SHARE | - __DRI_IMAGE_USE_SCANOUT, - buffer); - pixmap_buffer = buffer->image; - - if (!buffer->image) { - ErrorMessageF("DRI3 gpu image creation failure\n"); - goto no_image; - } - } else { - buffer->image = (*psc->image->createImage) (psc->driScreen, - width, height, - format, - 0, - buffer); - - if (!buffer->image) { - ErrorMessageF("DRI3 other gpu image creation failure\n"); - goto no_image; - } - - buffer->linear_buffer = (*psc->image->createImage) (psc->driScreen, - width, height, - format, - __DRI_IMAGE_USE_SHARE | - __DRI_IMAGE_USE_LINEAR, - buffer); - pixmap_buffer = buffer->linear_buffer; - - if (!buffer->linear_buffer) { - ErrorMessageF("DRI3 gpu linear image creation failure\n"); - goto no_linear_buffer; - } - } - - /* X wants the stride, so ask the image for it - */ - if (!(*psc->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_STRIDE, &stride)) { - ErrorMessageF("DRI3 get image stride failed\n"); - goto no_buffer_attrib; - } - - buffer->pitch = stride; - - if (!(*psc->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_FD, &buffer_fd)) { - ErrorMessageF("DRI3 get image FD failed\n"); - goto no_buffer_attrib; - } - - xcb_dri3_pixmap_from_buffer(c, - (pixmap = xcb_generate_id(c)), - draw, - buffer->size, - width, height, buffer->pitch, - depth, buffer->cpp * 8, - buffer_fd); - - xcb_dri3_fence_from_fd(c, - pixmap, - (sync_fence = xcb_generate_id(c)), - false, - fence_fd); - - buffer->pixmap = pixmap; - buffer->own_pixmap = true; - buffer->sync_fence = sync_fence; - buffer->shm_fence = shm_fence; - buffer->width = width; - buffer->height = height; - - /* Mark the buffer as idle - */ - dri3_fence_set(buffer); - - return buffer; - -no_buffer_attrib: - (*psc->image->destroyImage)(pixmap_buffer); -no_linear_buffer: - if (psc->is_different_gpu) - (*psc->image->destroyImage)(buffer->image); -no_image: - free(buffer); -no_buffer: - xshmfence_unmap_shm(shm_fence); -no_shm_fence: - close(fence_fd); - ErrorMessageF("DRI3 alloc_render_buffer failed\n"); - return NULL; -} - -/** dri3_free_render_buffer - * - * Free everything associated with one render buffer including pixmap, fence - * stuff and the driver image - */ -static void -dri3_free_render_buffer(struct dri3_drawable *pdraw, struct dri3_buffer *buffer) -{ - struct dri3_screen *psc = (struct dri3_screen *) pdraw->base.psc; - xcb_connection_t *c = XGetXCBConnection(pdraw->base.psc->dpy); - - if (buffer->own_pixmap) - xcb_free_pixmap(c, buffer->pixmap); - xcb_sync_destroy_fence(c, buffer->sync_fence); - xshmfence_unmap_shm(buffer->shm_fence); - (*psc->image->destroyImage)(buffer->image); - if (buffer->linear_buffer) - (*psc->image->destroyImage)(buffer->linear_buffer); - free(buffer); -} - - -/** dri3_flush_present_events - * - * Process any present events that have been received from the X server - */ -static void -dri3_flush_present_events(struct dri3_drawable *priv) -{ - xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy); - - /* Check to see if any configuration changes have occurred - * since we were last invoked - */ - if (priv->special_event) { - xcb_generic_event_t *ev; - - while ((ev = xcb_poll_for_special_event(c, priv->special_event)) != NULL) { - xcb_present_generic_event_t *ge = (void *) ev; - dri3_handle_present_event(priv, ge); - } - } -} - -/** dri3_update_drawable - * - * Called the first time we use the drawable and then - * after we receive present configure notify events to - * track the geometry of the drawable - */ -static int -dri3_update_drawable(__DRIdrawable *driDrawable, void *loaderPrivate) -{ - struct dri3_drawable *priv = loaderPrivate; - xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy); - - /* First time through, go get the current drawable geometry - */ - if (priv->width == 0 || priv->height == 0 || priv->depth == 0) { - xcb_get_geometry_cookie_t geom_cookie; - xcb_get_geometry_reply_t *geom_reply; - xcb_void_cookie_t cookie; - xcb_generic_error_t *error; - xcb_present_query_capabilities_cookie_t present_capabilities_cookie; - xcb_present_query_capabilities_reply_t *present_capabilities_reply; - - - /* Try to select for input on the window. - * - * If the drawable is a window, this will get our events - * delivered. - * - * Otherwise, we'll get a BadWindow error back from this request which - * will let us know that the drawable is a pixmap instead. - */ - - - cookie = xcb_present_select_input_checked(c, - (priv->eid = xcb_generate_id(c)), - priv->base.xDrawable, - XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY| - XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY| - XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY); - - present_capabilities_cookie = xcb_present_query_capabilities(c, priv->base.xDrawable); - - /* Create an XCB event queue to hold present events outside of the usual - * application event queue - */ - priv->special_event = xcb_register_for_special_xge(c, - &xcb_present_id, - priv->eid, - priv->stamp); - - geom_cookie = xcb_get_geometry(c, priv->base.xDrawable); - - geom_reply = xcb_get_geometry_reply(c, geom_cookie, NULL); - - if (!geom_reply) - return false; - - priv->width = geom_reply->width; - priv->height = geom_reply->height; - priv->depth = geom_reply->depth; - priv->is_pixmap = false; - - free(geom_reply); - - /* Check to see if our select input call failed. If it failed with a - * BadWindow error, then assume the drawable is a pixmap. Destroy the - * special event queue created above and mark the drawable as a pixmap - */ - - error = xcb_request_check(c, cookie); - - present_capabilities_reply = xcb_present_query_capabilities_reply(c, - present_capabilities_cookie, - NULL); - - if (present_capabilities_reply) { - priv->present_capabilities = present_capabilities_reply->capabilities; - free(present_capabilities_reply); - } else - priv->present_capabilities = 0; - - if (error) { - if (error->error_code != BadWindow) { - free(error); - return false; - } - priv->is_pixmap = true; - xcb_unregister_for_special_event(c, priv->special_event); - priv->special_event = NULL; - } - } - dri3_flush_present_events(priv); - return true; -} - -/* the DRIimage createImage function takes __DRI_IMAGE_FORMAT codes, while - * the createImageFromFds call takes __DRI_IMAGE_FOURCC codes. To avoid - * complete confusion, just deal in __DRI_IMAGE_FORMAT codes for now and - * translate to __DRI_IMAGE_FOURCC codes in the call to createImageFromFds - */ -static int -image_format_to_fourcc(int format) -{ - - /* Convert from __DRI_IMAGE_FORMAT to __DRI_IMAGE_FOURCC (sigh) */ - switch (format) { - case __DRI_IMAGE_FORMAT_SARGB8: return __DRI_IMAGE_FOURCC_SARGB8888; - case __DRI_IMAGE_FORMAT_RGB565: return __DRI_IMAGE_FOURCC_RGB565; - case __DRI_IMAGE_FORMAT_XRGB8888: return __DRI_IMAGE_FOURCC_XRGB8888; - case __DRI_IMAGE_FORMAT_ARGB8888: return __DRI_IMAGE_FOURCC_ARGB8888; - case __DRI_IMAGE_FORMAT_ABGR8888: return __DRI_IMAGE_FOURCC_ABGR8888; - case __DRI_IMAGE_FORMAT_XBGR8888: return __DRI_IMAGE_FOURCC_XBGR8888; - } - return 0; -} - -/** dri3_get_pixmap_buffer - * - * Get the DRM object for a pixmap from the X server and - * wrap that with a __DRIimage structure using createImageFromFds - */ -static struct dri3_buffer * -dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, - unsigned int format, - enum dri3_buffer_type buffer_type, - void *loaderPrivate) -{ - struct dri3_drawable *pdraw = loaderPrivate; - int buf_id = dri3_pixmap_buf_id(buffer_type); - struct dri3_buffer *buffer = pdraw->buffers[buf_id]; - Pixmap pixmap; - xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie; - xcb_dri3_buffer_from_pixmap_reply_t *bp_reply; - int *fds; - Display *dpy; - struct dri3_screen *psc; - xcb_connection_t *c; - xcb_sync_fence_t sync_fence; - struct xshmfence *shm_fence; - int fence_fd; - __DRIimage *image_planar; - int stride, offset; - - if (buffer) - return buffer; - - pixmap = pdraw->base.xDrawable; - psc = (struct dri3_screen *) pdraw->base.psc; - dpy = psc->base.dpy; - c = XGetXCBConnection(dpy); - - buffer = calloc(1, sizeof (struct dri3_buffer)); - if (!buffer) - goto no_buffer; - - fence_fd = xshmfence_alloc_shm(); - if (fence_fd < 0) - goto no_fence; - shm_fence = xshmfence_map_shm(fence_fd); - if (shm_fence == NULL) { - close (fence_fd); - goto no_fence; - } - - xcb_dri3_fence_from_fd(c, - pixmap, - (sync_fence = xcb_generate_id(c)), - false, - fence_fd); - - /* Get an FD for the pixmap object - */ - bp_cookie = xcb_dri3_buffer_from_pixmap(c, pixmap); - bp_reply = xcb_dri3_buffer_from_pixmap_reply(c, bp_cookie, NULL); - if (!bp_reply) - goto no_image; - fds = xcb_dri3_buffer_from_pixmap_reply_fds(c, bp_reply); - - stride = bp_reply->stride; - offset = 0; - - /* createImageFromFds creates a wrapper __DRIimage structure which - * can deal with multiple planes for things like Yuv images. So, once - * we've gotten the planar wrapper, pull the single plane out of it and - * discard the wrapper. - */ - image_planar = (*psc->image->createImageFromFds) (psc->driScreen, - bp_reply->width, - bp_reply->height, - image_format_to_fourcc(format), - fds, 1, - &stride, &offset, buffer); - close(fds[0]); - if (!image_planar) - goto no_image; - - buffer->image = (*psc->image->fromPlanar)(image_planar, 0, buffer); - - (*psc->image->destroyImage)(image_planar); - - if (!buffer->image) - goto no_image; - - buffer->pixmap = pixmap; - buffer->own_pixmap = false; - buffer->width = bp_reply->width; - buffer->height = bp_reply->height; - buffer->buffer_type = buffer_type; - buffer->shm_fence = shm_fence; - buffer->sync_fence = sync_fence; - - pdraw->buffers[buf_id] = buffer; - return buffer; - -no_image: - xcb_sync_destroy_fence(c, sync_fence); - xshmfence_unmap_shm(shm_fence); -no_fence: - free(buffer); -no_buffer: - return NULL; -} - -/** dri3_find_back - * - * Find an idle back buffer. If there isn't one, then - * wait for a present idle notify event from the X server - */ -static int -dri3_find_back(xcb_connection_t *c, struct dri3_drawable *priv) -{ - int b; - xcb_generic_event_t *ev; - xcb_present_generic_event_t *ge; - - for (;;) { - for (b = 0; b < priv->num_back; b++) { - int id = DRI3_BACK_ID((b + priv->cur_back) % priv->num_back); - struct dri3_buffer *buffer = priv->buffers[id]; - - if (!buffer || !buffer->busy) { - priv->cur_back = id; - return id; - } - } - xcb_flush(c); - ev = xcb_wait_for_special_event(c, priv->special_event); - if (!ev) - return -1; - ge = (void *) ev; - dri3_handle_present_event(priv, ge); - } -} - -/** dri3_get_buffer - * - * Find a front or back buffer, allocating new ones as necessary - */ -static struct dri3_buffer * -dri3_get_buffer(__DRIdrawable *driDrawable, - unsigned int format, - enum dri3_buffer_type buffer_type, - void *loaderPrivate) -{ - struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext(); - struct dri3_drawable *priv = loaderPrivate; - struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc; - xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy); - struct dri3_buffer *buffer; - int buf_id; - - if (buffer_type == dri3_buffer_back) { - buf_id = dri3_find_back(c, priv); - - if (buf_id < 0) - return NULL; - } else { - buf_id = DRI3_FRONT_ID; - } - - buffer = priv->buffers[buf_id]; - - /* Allocate a new buffer if there isn't an old one, or if that - * old one is the wrong size - */ - if (!buffer || buffer->width != priv->width || buffer->height != priv->height) { - struct dri3_buffer *new_buffer; - - /* Allocate the new buffers - */ - new_buffer = dri3_alloc_render_buffer(priv->base.psc, - priv->base.xDrawable, - format, priv->width, priv->height, priv->depth); - if (!new_buffer) - return NULL; - - /* When resizing, copy the contents of the old buffer, waiting for that - * copy to complete using our fences before proceeding - */ - switch (buffer_type) { - case dri3_buffer_back: - if (buffer) { - if (!buffer->linear_buffer) { - dri3_fence_reset(c, new_buffer); - dri3_fence_await(c, buffer); - dri3_copy_area(c, - buffer->pixmap, - new_buffer->pixmap, - dri3_drawable_gc(priv), - 0, 0, 0, 0, priv->width, priv->height); - dri3_fence_trigger(c, new_buffer); - } else if ((&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) { - psc->image->blitImage(pcp->driContext, - new_buffer->image, - buffer->image, - 0, 0, priv->width, - priv->height, - 0, 0, priv->width, - priv->height, 0); - } - dri3_free_render_buffer(priv, buffer); - } - break; - case dri3_buffer_front: - dri3_fence_reset(c, new_buffer); - dri3_copy_area(c, - priv->base.xDrawable, - new_buffer->pixmap, - dri3_drawable_gc(priv), - 0, 0, 0, 0, priv->width, priv->height); - dri3_fence_trigger(c, new_buffer); - - if (new_buffer->linear_buffer && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) { - dri3_fence_await(c, new_buffer); - psc->image->blitImage(pcp->driContext, - new_buffer->image, - new_buffer->linear_buffer, - 0, 0, priv->width, - priv->height, - 0, 0, priv->width, - priv->height, 0); - } - break; - } - buffer = new_buffer; - buffer->buffer_type = buffer_type; - priv->buffers[buf_id] = buffer; - } - dri3_fence_await(c, buffer); - - /* Return the requested buffer */ - return buffer; -} - -/** dri3_free_buffers - * - * Free the front bufffer or all of the back buffers. Used - * when the application changes which buffers it needs - */ -static void -dri3_free_buffers(__DRIdrawable *driDrawable, - enum dri3_buffer_type buffer_type, - void *loaderPrivate) -{ - struct dri3_drawable *priv = loaderPrivate; - struct dri3_buffer *buffer; - int first_id; - int n_id; - int buf_id; - - switch (buffer_type) { - case dri3_buffer_back: - first_id = DRI3_BACK_ID(0); - n_id = DRI3_MAX_BACK; - break; - case dri3_buffer_front: - first_id = DRI3_FRONT_ID; - n_id = 1; - } - - for (buf_id = first_id; buf_id < first_id + n_id; buf_id++) { - buffer = priv->buffers[buf_id]; - if (buffer) { - dri3_free_render_buffer(priv, buffer); - priv->buffers[buf_id] = NULL; - } - } -} - -/** dri3_get_buffers - * - * The published buffer allocation API. - * Returns all of the necessary buffers, allocating - * as needed. - */ -static int -dri3_get_buffers(__DRIdrawable *driDrawable, - unsigned int format, - uint32_t *stamp, - void *loaderPrivate, - uint32_t buffer_mask, - struct __DRIimageList *buffers) -{ - struct dri3_drawable *priv = loaderPrivate; - struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc; - struct dri3_buffer *front, *back; - - buffers->image_mask = 0; - buffers->front = NULL; - buffers->back = NULL; - - front = NULL; - back = NULL; - - if (!dri3_update_drawable(driDrawable, loaderPrivate)) - return false; - - /* pixmaps always have front buffers */ - if (priv->is_pixmap) - buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; - - if (buffer_mask & __DRI_IMAGE_BUFFER_FRONT) { - /* All pixmaps are owned by the server gpu. - * When we use a different gpu, we can't use the pixmap - * as buffer since it is potentially tiled a way - * our device can't understand. In this case, use - * a fake front buffer. Hopefully the pixmap - * content will get synced with the fake front - * buffer. - */ - if (priv->is_pixmap && !psc->is_different_gpu) - front = dri3_get_pixmap_buffer(driDrawable, - format, - dri3_buffer_front, - loaderPrivate); - else - front = dri3_get_buffer(driDrawable, - format, - dri3_buffer_front, - loaderPrivate); - - if (!front) - return false; - } else { - dri3_free_buffers(driDrawable, dri3_buffer_front, loaderPrivate); - priv->have_fake_front = 0; - } - - if (buffer_mask & __DRI_IMAGE_BUFFER_BACK) { - back = dri3_get_buffer(driDrawable, - format, - dri3_buffer_back, - loaderPrivate); - if (!back) - return false; - priv->have_back = 1; - } else { - dri3_free_buffers(driDrawable, dri3_buffer_back, loaderPrivate); - priv->have_back = 0; - } - - if (front) { - buffers->image_mask |= __DRI_IMAGE_BUFFER_FRONT; - buffers->front = front->image; - priv->have_fake_front = psc->is_different_gpu || !priv->is_pixmap; - } - - if (back) { - buffers->image_mask |= __DRI_IMAGE_BUFFER_BACK; - buffers->back = back->image; - } - - priv->stamp = stamp; - - return true; + loader_dri3_wait_gl(draw); } /* The image loader extension record for DRI3 @@ -1496,7 +480,7 @@ dri3_get_buffers(__DRIdrawable *driDrawable, static const __DRIimageLoaderExtension imageLoaderExtension = { .base = { __DRI_IMAGE_LOADER, 1 }, - .getBuffers = dri3_get_buffers, + .getBuffers = loader_dri3_get_buffers, .flushFrontBuffer = dri3_flush_front_buffer, }; @@ -1519,172 +503,25 @@ static int64_t dri3_swap_buffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor, int64_t remainder, Bool flush) { - struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext(); struct dri3_drawable *priv = (struct dri3_drawable *) pdraw; - struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc; - Display *dpy = priv->base.psc->dpy; - xcb_connection_t *c = XGetXCBConnection(dpy); - struct dri3_buffer *back; - int64_t ret = 0; - uint32_t options = XCB_PRESENT_OPTION_NONE; - unsigned flags = __DRI2_FLUSH_DRAWABLE; + if (flush) flags |= __DRI2_FLUSH_CONTEXT; - dri3_flush(psc, priv, flags, __DRI2_THROTTLE_SWAPBUFFER); - - back = priv->buffers[DRI3_BACK_ID(priv->cur_back)]; - if (psc->is_different_gpu && back) { - /* Update the linear buffer before presenting the pixmap */ - psc->image->blitImage(pcp->driContext, - back->linear_buffer, - back->image, - 0, 0, back->width, - back->height, - 0, 0, back->width, - back->height, __BLIT_FLAG_FLUSH); - /* Update the fake front */ - if (priv->have_fake_front) - psc->image->blitImage(pcp->driContext, - priv->buffers[DRI3_FRONT_ID]->image, - back->image, - 0, 0, priv->width, - priv->height, - 0, 0, priv->width, - priv->height, __BLIT_FLAG_FLUSH); - } - - dri3_flush_present_events(priv); - - if (back && !priv->is_pixmap) { - dri3_fence_reset(c, back); - - /* Compute when we want the frame shown by taking the last known successful - * MSC and adding in a swap interval for each outstanding swap request. - * target_msc=divisor=remainder=0 means "Use glXSwapBuffers() semantic" - */ - ++priv->send_sbc; - if (target_msc == 0 && divisor == 0 && remainder == 0) - target_msc = priv->msc + priv->swap_interval * (priv->send_sbc - priv->recv_sbc); - else if (divisor == 0 && remainder > 0) { - /* From the GLX_OML_sync_control spec: - * - * "If <divisor> = 0, the swap will occur when MSC becomes - * greater than or equal to <target_msc>." - * - * Note that there's no mention of the remainder. The Present extension - * throws BadValue for remainder != 0 with divisor == 0, so just drop - * the passed in value. - */ - remainder = 0; - } - - /* From the GLX_EXT_swap_control spec: - * - * "If <interval> is set to a value of 0, buffer swaps are not - * synchronized to a video frame." - * - * Implementation note: It is possible to enable triple buffering behaviour - * by not using XCB_PRESENT_OPTION_ASYNC, but this should not be the default. - */ - if (priv->swap_interval == 0) - options |= XCB_PRESENT_OPTION_ASYNC; - - back->busy = 1; - back->last_swap = priv->send_sbc; - xcb_present_pixmap(c, - priv->base.xDrawable, - back->pixmap, - (uint32_t) priv->send_sbc, - 0, /* valid */ - 0, /* update */ - 0, /* x_off */ - 0, /* y_off */ - None, /* target_crtc */ - None, - back->sync_fence, - options, - target_msc, - divisor, - remainder, 0, NULL); - ret = (int64_t) priv->send_sbc; - - /* If there's a fake front, then copy the source back buffer - * to the fake front to keep it up to date. This needs - * to reset the fence and make future users block until - * the X server is done copying the bits - */ - if (priv->have_fake_front && !psc->is_different_gpu) { - dri3_fence_reset(c, priv->buffers[DRI3_FRONT_ID]); - dri3_copy_area(c, - back->pixmap, - priv->buffers[DRI3_FRONT_ID]->pixmap, - dri3_drawable_gc(priv), - 0, 0, 0, 0, priv->width, priv->height); - dri3_fence_trigger(c, priv->buffers[DRI3_FRONT_ID]); - } - xcb_flush(c); - if (priv->stamp) - ++(*priv->stamp); - } - (*psc->f->invalidate)(priv->driDrawable); - - return ret; + return loader_dri3_swap_buffers_msc(&priv->loader_drawable, + target_msc, divisor, remainder, + flags, false); } static int dri3_get_buffer_age(__GLXDRIdrawable *pdraw) { - xcb_connection_t *c = XGetXCBConnection(pdraw->psc->dpy); - struct dri3_drawable *priv = (struct dri3_drawable *) pdraw; - int back_id = DRI3_BACK_ID(dri3_find_back(c, priv)); - - if (back_id < 0 || !priv->buffers[back_id]) - return 0; - - if (priv->buffers[back_id]->last_swap != 0) - return priv->send_sbc - priv->buffers[back_id]->last_swap + 1; - else - return 0; -} - -/** dri3_open - * - * Wrapper around xcb_dri3_open - */ -static int -dri3_open(Display *dpy, - Window root, - CARD32 provider) -{ - xcb_dri3_open_cookie_t cookie; - xcb_dri3_open_reply_t *reply; - xcb_connection_t *c = XGetXCBConnection(dpy); - int fd; - - cookie = xcb_dri3_open(c, - root, - provider); - - reply = xcb_dri3_open_reply(c, cookie, NULL); - if (!reply) - return -1; - - if (reply->nfd != 1) { - free(reply); - return -1; - } + struct dri3_drawable *priv = (struct dri3_drawable *)pdraw; - fd = xcb_dri3_open_reply_fds(c, reply)[0]; - fcntl(fd, F_SETFD, FD_CLOEXEC); - - free(reply); - - return fd; + return loader_dri3_query_buffer_age(&priv->loader_drawable); } - /** dri3_destroy_screen */ static void @@ -1727,8 +564,7 @@ dri3_set_swap_interval(__GLXDRIdrawable *pdraw, int interval) break; } - priv->swap_interval = interval; - dri3_update_num_back(priv); + loader_dri3_set_swap_interval(&priv->loader_drawable, interval); return 0; } @@ -1759,14 +595,14 @@ dri3_bind_tex_image(Display * dpy, if (pdraw != NULL) { psc = (struct dri3_screen *) base->psc; - (*psc->f->invalidate)(pdraw->driDrawable); + (*psc->f->invalidate)(pdraw->loader_drawable.dri_drawable); XSync(dpy, false); (*psc->texBuffer->setTexBuffer2) (pcp->driContext, pdraw->base.textureTarget, pdraw->base.textureFormat, - pdraw->driDrawable); + pdraw->loader_drawable.dri_drawable); } } @@ -1786,7 +622,7 @@ dri3_release_tex_image(Display * dpy, GLXDrawable drawable, int buffer) psc->texBuffer->releaseTexBuffer != NULL) (*psc->texBuffer->releaseTexBuffer) (pcp->driContext, pdraw->base.textureTarget, - pdraw->driDrawable); + pdraw->loader_drawable.dri_drawable); } } @@ -1908,7 +744,7 @@ dri3_create_screen(int screen, struct glx_display * priv) return NULL; } - psc->fd = dri3_open(priv->dpy, RootWindow(priv->dpy, screen), None); + psc->fd = loader_dri3_open(c, RootWindow(priv->dpy, screen), None); if (psc->fd < 0) { int conn_error = xcb_connection_has_error(c); @@ -2000,6 +836,13 @@ dri3_create_screen(int screen, struct glx_display * priv) goto handle_error; } + psc->loader_dri3_ext.core = psc->core; + psc->loader_dri3_ext.image_driver = psc->image_driver; + psc->loader_dri3_ext.flush = psc->f; + psc->loader_dri3_ext.tex_buffer = psc->texBuffer; + psc->loader_dri3_ext.image = psc->image; + psc->loader_dri3_ext.config = psc->config; + configs = driConvertConfigs(psc->core, psc->base.configs, driver_configs); visuals = driConvertConfigs(psc->core, psc->base.visuals, driver_configs); diff --git a/src/glx/dri3_priv.h b/src/glx/dri3_priv.h index 160444907e6..56a63309f36 100644 --- a/src/glx/dri3_priv.h +++ b/src/glx/dri3_priv.h @@ -59,50 +59,14 @@ #include <xcb/present.h> #include <xcb/sync.h> +#include "loader_dri3_helper.h" + /* From xmlpool/options.h, user exposed so should be stable */ #define DRI_CONF_VBLANK_NEVER 0 #define DRI_CONF_VBLANK_DEF_INTERVAL_0 1 #define DRI_CONF_VBLANK_DEF_INTERVAL_1 2 #define DRI_CONF_VBLANK_ALWAYS_SYNC 3 -enum dri3_buffer_type { - dri3_buffer_back = 0, - dri3_buffer_front = 1 -}; - -struct dri3_buffer { - __DRIimage *image; - __DRIimage *linear_buffer; - uint32_t pixmap; - - /* Synchronization between the client and X server is done using an - * xshmfence that is mapped into an X server SyncFence. This lets the - * client check whether the X server is done using a buffer with a simple - * xshmfence call, rather than going to read X events from the wire. - * - * However, we can only wait for one xshmfence to be triggered at a time, - * so we need to know *which* buffer is going to be idle next. We do that - * by waiting for a PresentIdleNotify event. When that event arrives, the - * 'busy' flag gets cleared and the client knows that the fence has been - * triggered, and that the wait call will not block. - */ - - uint32_t sync_fence; /* XID of X SyncFence object */ - struct xshmfence *shm_fence; /* pointer to xshmfence object */ - GLboolean busy; /* Set on swap, cleared on IdleNotify */ - GLboolean own_pixmap; /* We allocated the pixmap ID, free on destroy */ - void *driverPrivate; - - uint32_t size; - uint32_t pitch; - uint32_t cpp; - uint32_t flags; - uint32_t width, height; - uint64_t last_swap; - - enum dri3_buffer_type buffer_type; -}; - struct dri3_display { __GLXDRIdisplay base; @@ -139,6 +103,8 @@ struct dri3_screen { int is_different_gpu; int show_fps_interval; + + struct loader_dri3_extensions loader_dri3_ext; }; struct dri3_context @@ -147,60 +113,10 @@ struct dri3_context __DRIcontext *driContext; }; -#define DRI3_MAX_BACK 4 -#define DRI3_BACK_ID(i) (i) -#define DRI3_FRONT_ID (DRI3_MAX_BACK) - -static inline int -dri3_pixmap_buf_id(enum dri3_buffer_type buffer_type) -{ - if (buffer_type == dri3_buffer_back) - return DRI3_BACK_ID(0); - else - return DRI3_FRONT_ID; -} - -#define DRI3_NUM_BUFFERS (1 + DRI3_MAX_BACK) - struct dri3_drawable { __GLXDRIdrawable base; - __DRIdrawable *driDrawable; - int width, height, depth; + struct loader_dri3_drawable loader_drawable; int swap_interval; - uint8_t have_back; - uint8_t have_fake_front; - uint8_t is_pixmap; - uint8_t flipping; - - /* Present extension capabilities - */ - uint32_t present_capabilities; - - /* SBC numbers are tracked by using the serial numbers - * in the present request and complete events - */ - uint64_t send_sbc; - uint64_t recv_sbc; - - /* Last received UST/MSC values for pixmap present complete */ - uint64_t ust, msc; - - /* Last received UST/MSC values from present notify msc event */ - uint64_t notify_ust, notify_msc; - - /* Serial numbers for tracking wait_for_msc events */ - uint32_t send_msc_serial; - uint32_t recv_msc_serial; - - struct dri3_buffer *buffers[DRI3_NUM_BUFFERS]; - int cur_back; - int num_back; - - uint32_t *stamp; - - xcb_present_event_t eid; - xcb_gcontext_t gc; - xcb_special_event_t *special_event; /* LIBGL_SHOW_FPS support */ uint64_t previous_ust; diff --git a/src/loader/Makefile.am b/src/loader/Makefile.am index 5190f7f8a46..9ca17540d54 100644 --- a/src/loader/Makefile.am +++ b/src/loader/Makefile.am @@ -25,13 +25,16 @@ EXTRA_DIST = SConscript noinst_LTLIBRARIES = libloader.la -libloader_la_CPPFLAGS = \ +AM_CPPFLAGS = \ $(DEFINES) \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ $(VISIBILITY_CFLAGS) \ + $(XCB_DRI3_CFLAGS) \ + $(LIBDRM_CFLAGS) \ $(LIBUDEV_CFLAGS) +libloader_la_CPPFLAGS = $(AM_CPPFLAGS) libloader_la_SOURCES = $(LOADER_C_FILES) libloader_la_LIBADD = @@ -49,9 +52,15 @@ libloader_la_CPPFLAGS += \ endif if HAVE_LIBDRM -libloader_la_CPPFLAGS += \ - $(LIBDRM_CFLAGS) - libloader_la_LIBADD += \ $(LIBDRM_LIBS) endif + +if HAVE_DRI3 +noinst_LTLIBRARIES += libloader_dri3_helper.la + +libloader_dri3_helper_la_SOURCES = \ + loader_dri3_helper.c \ + loader_dri3_helper.h +libloader_dri3_helper_la_LIBADD = $(XCB_DRI3_LIBS) +endif diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c new file mode 100644 index 00000000000..62bfe845c08 --- /dev/null +++ b/src/loader/loader_dri3_helper.c @@ -0,0 +1,1396 @@ +/* + * Copyright © 2013 Keith Packard + * Copyright © 2015 Boyan Ding + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#include <X11/xshmfence.h> +#include <xcb/xcb.h> +#include <xcb/dri3.h> +#include <xcb/present.h> + +#include <X11/Xlib-xcb.h> + +#include "loader_dri3_helper.h" + +/* From xmlpool/options.h, user exposed so should be stable */ +#define DRI_CONF_VBLANK_NEVER 0 +#define DRI_CONF_VBLANK_DEF_INTERVAL_0 1 +#define DRI_CONF_VBLANK_DEF_INTERVAL_1 2 +#define DRI_CONF_VBLANK_ALWAYS_SYNC 3 + +static inline void +dri3_fence_reset(xcb_connection_t *c, struct loader_dri3_buffer *buffer) +{ + xshmfence_reset(buffer->shm_fence); +} + +static inline void +dri3_fence_set(struct loader_dri3_buffer *buffer) +{ + xshmfence_trigger(buffer->shm_fence); +} + +static inline void +dri3_fence_trigger(xcb_connection_t *c, struct loader_dri3_buffer *buffer) +{ + xcb_sync_trigger_fence(c, buffer->sync_fence); +} + +static inline void +dri3_fence_await(xcb_connection_t *c, struct loader_dri3_buffer *buffer) +{ + xcb_flush(c); + xshmfence_await(buffer->shm_fence); +} + +static void +dri3_update_num_back(struct loader_dri3_drawable *draw) +{ + draw->num_back = 1; + if (draw->flipping) { + if (!draw->is_pixmap && + !(draw->present_capabilities & XCB_PRESENT_CAPABILITY_ASYNC)) + draw->num_back++; + draw->num_back++; + } + if (draw->vtable->get_swap_interval(draw) == 0) + draw->num_back++; +} + +void +loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval) +{ + interval = draw->vtable->clamp_swap_interval(draw, interval); + draw->vtable->set_swap_interval(draw, interval); + dri3_update_num_back(draw); +} + +/** dri3_free_render_buffer + * + * Free everything associated with one render buffer including pixmap, fence + * stuff and the driver image + */ +static void +dri3_free_render_buffer(struct loader_dri3_drawable *draw, + struct loader_dri3_buffer *buffer) +{ + if (buffer->own_pixmap) + xcb_free_pixmap(draw->conn, buffer->pixmap); + xcb_sync_destroy_fence(draw->conn, buffer->sync_fence); + xshmfence_unmap_shm(buffer->shm_fence); + (draw->ext->image->destroyImage)(buffer->image); + if (buffer->linear_buffer) + (draw->ext->image->destroyImage)(buffer->linear_buffer); + free(buffer); +} + +void +loader_dri3_drawable_fini(struct loader_dri3_drawable *draw) +{ + int i; + + (draw->ext->core->destroyDrawable)(draw->dri_drawable); + + for (i = 0; i < LOADER_DRI3_NUM_BUFFERS; i++) { + if (draw->buffers[i]) + dri3_free_render_buffer(draw, draw->buffers[i]); + } + + if (draw->special_event) + xcb_unregister_for_special_event(draw->conn, draw->special_event); +} + +int +loader_dri3_drawable_init(xcb_connection_t *conn, + xcb_drawable_t drawable, + __DRIscreen *dri_screen, + bool is_different_gpu, + const __DRIconfig *dri_config, + struct loader_dri3_extensions *ext, + struct loader_dri3_vtable *vtable, + struct loader_dri3_drawable *draw) +{ + xcb_get_geometry_cookie_t cookie; + xcb_get_geometry_reply_t *reply; + xcb_generic_error_t *error; + GLint vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1; + int swap_interval; + + draw->conn = conn; + draw->ext = ext; + draw->vtable = vtable; + draw->drawable = drawable; + draw->dri_screen = dri_screen; + draw->is_different_gpu = is_different_gpu; + + draw->have_back = 0; + draw->have_fake_front = 0; + draw->first_init = true; + + if (draw->ext->config) + draw->ext->config->configQueryi(draw->dri_screen, + "vblank_mode", &vblank_mode); + + switch (vblank_mode) { + case DRI_CONF_VBLANK_NEVER: + case DRI_CONF_VBLANK_DEF_INTERVAL_0: + swap_interval = 0; + break; + case DRI_CONF_VBLANK_DEF_INTERVAL_1: + case DRI_CONF_VBLANK_ALWAYS_SYNC: + default: + swap_interval = 1; + break; + } + draw->vtable->set_swap_interval(draw, swap_interval); + + dri3_update_num_back(draw); + + /* Create a new drawable */ + draw->dri_drawable = + (draw->ext->image_driver->createNewDrawable)(dri_screen, + dri_config, + draw); + + if (!draw->dri_drawable) + return 1; + + cookie = xcb_get_geometry(draw->conn, draw->drawable); + reply = xcb_get_geometry_reply(draw->conn, cookie, &error); + if (reply == NULL || error != NULL) { + draw->ext->core->destroyDrawable(draw->dri_drawable); + return 1; + } + + draw->width = reply->width; + draw->height = reply->height; + draw->depth = reply->depth; + draw->vtable->set_drawable_size(draw, draw->width, draw->height); + free(reply); + + /* + * Make sure server has the same swap interval we do for the new + * drawable. + */ + loader_dri3_set_swap_interval(draw, swap_interval); + + return 0; +} + +/* + * Process one Present event + */ +static void +dri3_handle_present_event(struct loader_dri3_drawable *draw, + xcb_present_generic_event_t *ge) +{ + switch (ge->evtype) { + case XCB_PRESENT_CONFIGURE_NOTIFY: { + xcb_present_configure_notify_event_t *ce = (void *) ge; + + draw->width = ce->width; + draw->height = ce->height; + draw->vtable->set_drawable_size(draw, draw->width, draw->height); + break; + } + case XCB_PRESENT_COMPLETE_NOTIFY: { + xcb_present_complete_notify_event_t *ce = (void *) ge; + + /* Compute the processed SBC number from the received 32-bit serial number + * merged with the upper 32-bits of the sent 64-bit serial number while + * checking for wrap. + */ + if (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) { + draw->recv_sbc = (draw->send_sbc & 0xffffffff00000000LL) | ce->serial; + if (draw->recv_sbc > draw->send_sbc) + draw->recv_sbc -= 0x100000000; + switch (ce->mode) { + case XCB_PRESENT_COMPLETE_MODE_FLIP: + draw->flipping = true; + break; + case XCB_PRESENT_COMPLETE_MODE_COPY: + draw->flipping = false; + break; + } + dri3_update_num_back(draw); + + if (draw->vtable->show_fps) + draw->vtable->show_fps(draw, ce->ust); + + draw->ust = ce->ust; + draw->msc = ce->msc; + } else { + draw->recv_msc_serial = ce->serial; + draw->notify_ust = ce->ust; + draw->notify_msc = ce->msc; + } + break; + } + case XCB_PRESENT_EVENT_IDLE_NOTIFY: { + xcb_present_idle_notify_event_t *ie = (void *) ge; + int b; + + for (b = 0; b < sizeof(draw->buffers) / sizeof(draw->buffers[0]); b++) { + struct loader_dri3_buffer *buf = draw->buffers[b]; + + if (buf && buf->pixmap == ie->pixmap) { + buf->busy = 0; + if (draw->num_back <= b && b < LOADER_DRI3_MAX_BACK) { + dri3_free_render_buffer(draw, buf); + draw->buffers[b] = NULL; + } + break; + } + } + break; + } + } + free(ge); +} + +static bool +dri3_wait_for_event(struct loader_dri3_drawable *draw) +{ + xcb_generic_event_t *ev; + xcb_present_generic_event_t *ge; + + xcb_flush(draw->conn); + ev = xcb_wait_for_special_event(draw->conn, draw->special_event); + if (!ev) + return false; + ge = (void *) ev; + dri3_handle_present_event(draw, ge); + return true; +} + +/** loader_dri3_wait_for_msc + * + * Get the X server to send an event when the target msc/divisor/remainder is + * reached. + */ +bool +loader_dri3_wait_for_msc(struct loader_dri3_drawable *draw, + int64_t target_msc, + int64_t divisor, int64_t remainder, + int64_t *ust, int64_t *msc, int64_t *sbc) +{ + uint32_t msc_serial; + + msc_serial = ++draw->send_msc_serial; + xcb_present_notify_msc(draw->conn, + draw->drawable, + msc_serial, + target_msc, + divisor, + remainder); + + xcb_flush(draw->conn); + + /* Wait for the event */ + if (draw->special_event) { + while ((int32_t) (msc_serial - draw->recv_msc_serial) > 0) { + if (!dri3_wait_for_event(draw)) + return false; + } + } + + *ust = draw->notify_ust; + *msc = draw->notify_msc; + *sbc = draw->recv_sbc; + + return true; +} + +/** loader_dri3_wait_for_sbc + * + * Wait for the completed swap buffer count to reach the specified + * target. Presumably the application knows that this will be reached with + * outstanding complete events, or we're going to be here awhile. + */ +int +loader_dri3_wait_for_sbc(struct loader_dri3_drawable *draw, + int64_t target_sbc, int64_t *ust, + int64_t *msc, int64_t *sbc) +{ + /* From the GLX_OML_sync_control spec: + * + * "If <target_sbc> = 0, the function will block until all previous + * swaps requested with glXSwapBuffersMscOML for that window have + * completed." + */ + if (!target_sbc) + target_sbc = draw->send_sbc; + + while (draw->recv_sbc < target_sbc) { + if (!dri3_wait_for_event(draw)) + return 0; + } + + *ust = draw->ust; + *msc = draw->msc; + *sbc = draw->recv_sbc; + return 1; +} + +/** loader_dri3_find_back + * + * Find an idle back buffer. If there isn't one, then + * wait for a present idle notify event from the X server + */ +static int +dri3_find_back(struct loader_dri3_drawable *draw) +{ + int b; + xcb_generic_event_t *ev; + xcb_present_generic_event_t *ge; + + for (;;) { + for (b = 0; b < draw->num_back; b++) { + int id = LOADER_DRI3_BACK_ID((b + draw->cur_back) % draw->num_back); + struct loader_dri3_buffer *buffer = draw->buffers[id]; + + if (!buffer || !buffer->busy) { + draw->cur_back = id; + return id; + } + } + xcb_flush(draw->conn); + ev = xcb_wait_for_special_event(draw->conn, draw->special_event); + if (!ev) + return -1; + ge = (void *) ev; + dri3_handle_present_event(draw, ge); + } +} + +static xcb_gcontext_t +dri3_drawable_gc(struct loader_dri3_drawable *draw) +{ + if (!draw->gc) { + uint32_t v = 0; + xcb_create_gc(draw->conn, + (draw->gc = xcb_generate_id(draw->conn)), + draw->drawable, + XCB_GC_GRAPHICS_EXPOSURES, + &v); + } + return draw->gc; +} + + +static struct loader_dri3_buffer * +dri3_back_buffer(struct loader_dri3_drawable *draw) +{ + return draw->buffers[LOADER_DRI3_BACK_ID(draw->cur_back)]; +} + +static struct loader_dri3_buffer * +dri3_fake_front_buffer(struct loader_dri3_drawable *draw) +{ + return draw->buffers[LOADER_DRI3_FRONT_ID]; +} + +static void +dri3_copy_area(xcb_connection_t *c, + xcb_drawable_t src_drawable, + xcb_drawable_t dst_drawable, + xcb_gcontext_t gc, + int16_t src_x, + int16_t src_y, + int16_t dst_x, + int16_t dst_y, + uint16_t width, + uint16_t height) +{ + xcb_void_cookie_t cookie; + + cookie = xcb_copy_area_checked(c, + src_drawable, + dst_drawable, + gc, + src_x, + src_y, + dst_x, + dst_y, + width, + height); + xcb_discard_reply(c, cookie.sequence); +} + +/** + * Asks the driver to flush any queued work necessary for serializing with the + * X command stream, and optionally the slightly more strict requirement of + * glFlush() equivalence (which would require flushing even if nothing had + * been drawn to a window system framebuffer, for example). + */ +void +loader_dri3_flush(struct loader_dri3_drawable *draw, + unsigned flags, + enum __DRI2throttleReason throttle_reason) +{ + /* NEED TO CHECK WHETHER CONTEXT IS NULL */ + __DRIcontext *dri_context = draw->vtable->get_dri_context(draw); + + if (dri_context) { + draw->ext->flush->flush_with_flags(dri_context, draw->dri_drawable, + flags, throttle_reason); + } +} + +void +loader_dri3_copy_sub_buffer(struct loader_dri3_drawable *draw, + int x, int y, + int width, int height, + bool flush) +{ + struct loader_dri3_buffer *back; + unsigned flags = __DRI2_FLUSH_DRAWABLE; + __DRIcontext *dri_context; + + dri_context = draw->vtable->get_dri_context(draw); + + /* Check we have the right attachments */ + if (!draw->have_back || draw->is_pixmap) + return; + + if (flush) + flags |= __DRI2_FLUSH_CONTEXT; + loader_dri3_flush(draw, flags, __DRI2_THROTTLE_SWAPBUFFER); + + back = dri3_back_buffer(draw); + y = draw->height - y - height; + + if (draw->is_different_gpu && draw->vtable->in_current_context(draw)) { + /* Update the linear buffer part of the back buffer + * for the dri3_copy_area operation + */ + draw->ext->image->blitImage(dri_context, + back->linear_buffer, + back->image, + 0, 0, back->width, + back->height, + 0, 0, back->width, + back->height, __BLIT_FLAG_FLUSH); + /* We use blitImage to update our fake front, + */ + if (draw->have_fake_front) + draw->ext->image->blitImage(dri_context, + dri3_fake_front_buffer(draw)->image, + back->image, + x, y, width, height, + x, y, width, height, __BLIT_FLAG_FLUSH); + } + + dri3_fence_reset(draw->conn, back); + dri3_copy_area(draw->conn, + dri3_back_buffer(draw)->pixmap, + draw->drawable, + dri3_drawable_gc(draw), + x, y, x, y, width, height); + dri3_fence_trigger(draw->conn, back); + /* Refresh the fake front (if present) after we just damaged the real + * front. + */ + if (draw->have_fake_front && !draw->is_different_gpu) { + dri3_fence_reset(draw->conn, dri3_fake_front_buffer(draw)); + dri3_copy_area(draw->conn, + dri3_back_buffer(draw)->pixmap, + dri3_fake_front_buffer(draw)->pixmap, + dri3_drawable_gc(draw), + x, y, x, y, width, height); + dri3_fence_trigger(draw->conn, dri3_fake_front_buffer(draw)); + dri3_fence_await(draw->conn, dri3_fake_front_buffer(draw)); + } + dri3_fence_await(draw->conn, back); +} + +void +loader_dri3_copy_drawable(struct loader_dri3_drawable *draw, + xcb_drawable_t dest, + xcb_drawable_t src) +{ + loader_dri3_flush(draw, __DRI2_FLUSH_DRAWABLE, 0); + + dri3_fence_reset(draw->conn, dri3_fake_front_buffer(draw)); + dri3_copy_area(draw->conn, + src, dest, + dri3_drawable_gc(draw), + 0, 0, 0, 0, draw->width, draw->height); + dri3_fence_trigger(draw->conn, dri3_fake_front_buffer(draw)); + dri3_fence_await(draw->conn, dri3_fake_front_buffer(draw)); +} + +void +loader_dri3_wait_x(struct loader_dri3_drawable *draw) +{ + struct loader_dri3_buffer *front; + __DRIcontext *dri_context; + + if (draw == NULL || !draw->have_fake_front) + return; + + front = dri3_fake_front_buffer(draw); + dri_context = draw->vtable->get_dri_context(draw); + + loader_dri3_copy_drawable(draw, front->pixmap, draw->drawable); + + /* In the psc->is_different_gpu case, the linear buffer has been updated, + * but not yet the tiled buffer. + * Copy back to the tiled buffer we use for rendering. + * Note that we don't need flushing. + */ + if (draw->is_different_gpu && draw->vtable->in_current_context(draw)) + draw->ext->image->blitImage(dri_context, + front->image, + front->linear_buffer, + 0, 0, front->width, + front->height, + 0, 0, front->width, + front->height, 0); +} + +void +loader_dri3_wait_gl(struct loader_dri3_drawable *draw) +{ + struct loader_dri3_buffer *front; + __DRIcontext *dri_context; + + if (draw == NULL || !draw->have_fake_front) + return; + + front = dri3_fake_front_buffer(draw); + dri_context = draw->vtable->get_dri_context(draw); + + /* In the psc->is_different_gpu case, we update the linear_buffer + * before updating the real front. + */ + if (draw->is_different_gpu && draw->vtable->in_current_context(draw)) + draw->ext->image->blitImage(dri_context, + front->linear_buffer, + front->image, + 0, 0, front->width, + front->height, + 0, 0, front->width, + front->height, __BLIT_FLAG_FLUSH); + loader_dri3_copy_drawable(draw, draw->drawable, front->pixmap); +} + +/** dri3_flush_present_events + * + * Process any present events that have been received from the X server + */ +static void +dri3_flush_present_events(struct loader_dri3_drawable *draw) +{ + /* Check to see if any configuration changes have occurred + * since we were last invoked + */ + if (draw->special_event) { + xcb_generic_event_t *ev; + + while ((ev = xcb_poll_for_special_event(draw->conn, + draw->special_event)) != NULL) { + xcb_present_generic_event_t *ge = (void *) ev; + dri3_handle_present_event(draw, ge); + } + } +} + +/** loader_dri3_swap_buffers_msc + * + * Make the current back buffer visible using the present extension + */ +int64_t +loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw, + int64_t target_msc, int64_t divisor, + int64_t remainder, unsigned flush_flags, + bool force_copy) +{ + struct loader_dri3_buffer *back; + __DRIcontext *dri_context; + int64_t ret = 0; + uint32_t options = XCB_PRESENT_OPTION_NONE; + int swap_interval; + + dri_context = draw->vtable->get_dri_context(draw); + swap_interval = draw->vtable->get_swap_interval(draw); + + draw->vtable->flush_drawable(draw, flush_flags); + + back = draw->buffers[LOADER_DRI3_BACK_ID(draw->cur_back)]; + if (draw->is_different_gpu && back) { + /* Update the linear buffer before presenting the pixmap */ + draw->ext->image->blitImage(dri_context, + back->linear_buffer, + back->image, + 0, 0, back->width, + back->height, + 0, 0, back->width, + back->height, __BLIT_FLAG_FLUSH); + /* Update the fake front */ + if (draw->have_fake_front) + draw->ext->image->blitImage(dri_context, + draw->buffers[LOADER_DRI3_FRONT_ID]->image, + back->image, + 0, 0, draw->width, draw->height, + 0, 0, draw->width, draw->height, + __BLIT_FLAG_FLUSH); + } + + dri3_flush_present_events(draw); + + if (back && !draw->is_pixmap) { + dri3_fence_reset(draw->conn, back); + + /* Compute when we want the frame shown by taking the last known + * successful MSC and adding in a swap interval for each outstanding swap + * request. target_msc=divisor=remainder=0 means "Use glXSwapBuffers() + * semantic" + */ + ++draw->send_sbc; + if (target_msc == 0 && divisor == 0 && remainder == 0) + target_msc = draw->msc + swap_interval * + (draw->send_sbc - draw->recv_sbc); + else if (divisor == 0 && remainder > 0) { + /* From the GLX_OML_sync_control spec: + * "If <divisor> = 0, the swap will occur when MSC becomes + * greater than or equal to <target_msc>." + * + * Note that there's no mention of the remainder. The Present + * extension throws BadValue for remainder != 0 with divisor == 0, so + * just drop the passed in value. + */ + remainder = 0; + } + + /* From the GLX_EXT_swap_control spec + * and the EGL 1.4 spec (page 53): + * + * "If <interval> is set to a value of 0, buffer swaps are not + * synchronized to a video frame." + * + * Implementation note: It is possible to enable triple buffering + * behaviour by not using XCB_PRESENT_OPTION_ASYNC, but this should not be + * the default. + */ + if (swap_interval == 0) + options |= XCB_PRESENT_OPTION_ASYNC; + if (force_copy) + options |= XCB_PRESENT_OPTION_COPY; + + back->busy = 1; + back->last_swap = draw->send_sbc; + xcb_present_pixmap(draw->conn, + draw->drawable, + back->pixmap, + (uint32_t) draw->send_sbc, + 0, /* valid */ + 0, /* update */ + 0, /* x_off */ + 0, /* y_off */ + None, /* target_crtc */ + None, + back->sync_fence, + options, + target_msc, + divisor, + remainder, 0, NULL); + ret = (int64_t) draw->send_sbc; + + /* If there's a fake front, then copy the source back buffer + * to the fake front to keep it up to date. This needs + * to reset the fence and make future users block until + * the X server is done copying the bits + */ + if (draw->have_fake_front && !draw->is_different_gpu) { + dri3_fence_reset(draw->conn, draw->buffers[LOADER_DRI3_FRONT_ID]); + dri3_copy_area(draw->conn, + back->pixmap, + draw->buffers[LOADER_DRI3_FRONT_ID]->pixmap, + dri3_drawable_gc(draw), + 0, 0, 0, 0, + draw->width, draw->height); + dri3_fence_trigger(draw->conn, draw->buffers[LOADER_DRI3_FRONT_ID]); + } + xcb_flush(draw->conn); + if (draw->stamp) + ++(*draw->stamp); + } + + (draw->ext->flush->invalidate)(draw->dri_drawable); + + return ret; +} + +int +loader_dri3_query_buffer_age(struct loader_dri3_drawable *draw) +{ + int back_id = LOADER_DRI3_BACK_ID(dri3_find_back(draw)); + + if (back_id < 0 || !draw->buffers[back_id]) + return 0; + + if (draw->buffers[back_id]->last_swap != 0) + return draw->send_sbc - draw->buffers[back_id]->last_swap + 1; + else + return 0; +} + +/** loader_dri3_open + * + * Wrapper around xcb_dri3_open + */ +int +loader_dri3_open(xcb_connection_t *conn, + xcb_window_t root, + uint32_t provider) +{ + xcb_dri3_open_cookie_t cookie; + xcb_dri3_open_reply_t *reply; + int fd; + + cookie = xcb_dri3_open(conn, + root, + provider); + + reply = xcb_dri3_open_reply(conn, cookie, NULL); + if (!reply) + return -1; + + if (reply->nfd != 1) { + free(reply); + return -1; + } + + fd = xcb_dri3_open_reply_fds(conn, reply)[0]; + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + + return fd; +} + +static uint32_t +dri3_cpp_for_format(uint32_t format) { + switch (format) { + case __DRI_IMAGE_FORMAT_R8: + return 1; + case __DRI_IMAGE_FORMAT_RGB565: + case __DRI_IMAGE_FORMAT_GR88: + return 2; + case __DRI_IMAGE_FORMAT_XRGB8888: + case __DRI_IMAGE_FORMAT_ARGB8888: + case __DRI_IMAGE_FORMAT_ABGR8888: + case __DRI_IMAGE_FORMAT_XBGR8888: + case __DRI_IMAGE_FORMAT_XRGB2101010: + case __DRI_IMAGE_FORMAT_ARGB2101010: + case __DRI_IMAGE_FORMAT_SARGB8: + return 4; + case __DRI_IMAGE_FORMAT_NONE: + default: + return 0; + } +} + +/** loader_dri3_alloc_render_buffer + * + * Use the driver createImage function to construct a __DRIimage, then + * get a file descriptor for that and create an X pixmap from that + * + * Allocate an xshmfence for synchronization + */ +static struct loader_dri3_buffer * +dri3_alloc_render_buffer(struct loader_dri3_drawable *draw, unsigned int format, + int width, int height, int depth) +{ + struct loader_dri3_buffer *buffer; + __DRIimage *pixmap_buffer; + xcb_pixmap_t pixmap; + xcb_sync_fence_t sync_fence; + struct xshmfence *shm_fence; + int buffer_fd, fence_fd; + int stride; + + /* Create an xshmfence object and + * prepare to send that to the X server + */ + + fence_fd = xshmfence_alloc_shm(); + if (fence_fd < 0) + return NULL; + + shm_fence = xshmfence_map_shm(fence_fd); + if (shm_fence == NULL) + goto no_shm_fence; + + /* Allocate the image from the driver + */ + buffer = calloc(1, sizeof *buffer); + if (!buffer) + goto no_buffer; + + buffer->cpp = dri3_cpp_for_format(format); + if (!buffer->cpp) + goto no_image; + + if (!draw->is_different_gpu) { + buffer->image = (draw->ext->image->createImage)(draw->dri_screen, + width, height, + format, + __DRI_IMAGE_USE_SHARE | + __DRI_IMAGE_USE_SCANOUT, + buffer); + pixmap_buffer = buffer->image; + + if (!buffer->image) + goto no_image; + } else { + buffer->image = (draw->ext->image->createImage)(draw->dri_screen, + width, height, + format, + 0, + buffer); + + if (!buffer->image) + goto no_image; + + buffer->linear_buffer = + (draw->ext->image->createImage)(draw->dri_screen, + width, height, format, + __DRI_IMAGE_USE_SHARE | + __DRI_IMAGE_USE_LINEAR, + buffer); + pixmap_buffer = buffer->linear_buffer; + + if (!buffer->linear_buffer) + goto no_linear_buffer; + } + + /* X wants the stride, so ask the image for it + */ + if (!(draw->ext->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_STRIDE, + &stride)) + goto no_buffer_attrib; + + buffer->pitch = stride; + + if (!(draw->ext->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_FD, + &buffer_fd)) + goto no_buffer_attrib; + + xcb_dri3_pixmap_from_buffer(draw->conn, + (pixmap = xcb_generate_id(draw->conn)), + draw->drawable, + buffer->size, + width, height, buffer->pitch, + depth, buffer->cpp * 8, + buffer_fd); + + xcb_dri3_fence_from_fd(draw->conn, + pixmap, + (sync_fence = xcb_generate_id(draw->conn)), + false, + fence_fd); + + buffer->pixmap = pixmap; + buffer->own_pixmap = true; + buffer->sync_fence = sync_fence; + buffer->shm_fence = shm_fence; + buffer->width = width; + buffer->height = height; + + /* Mark the buffer as idle + */ + dri3_fence_set(buffer); + + return buffer; + +no_buffer_attrib: + (draw->ext->image->destroyImage)(pixmap_buffer); +no_linear_buffer: + if (draw->is_different_gpu) + (draw->ext->image->destroyImage)(buffer->image); +no_image: + free(buffer); +no_buffer: + xshmfence_unmap_shm(shm_fence); +no_shm_fence: + close(fence_fd); + return NULL; +} + +/** loader_dri3_update_drawable + * + * Called the first time we use the drawable and then + * after we receive present configure notify events to + * track the geometry of the drawable + */ +static int +dri3_update_drawable(__DRIdrawable *driDrawable, + struct loader_dri3_drawable *draw) +{ + if (draw->first_init) { + xcb_get_geometry_cookie_t geom_cookie; + xcb_get_geometry_reply_t *geom_reply; + xcb_void_cookie_t cookie; + xcb_generic_error_t *error; + xcb_present_query_capabilities_cookie_t present_capabilities_cookie; + xcb_present_query_capabilities_reply_t *present_capabilities_reply; + + draw->first_init = false; + + /* Try to select for input on the window. + * + * If the drawable is a window, this will get our events + * delivered. + * + * Otherwise, we'll get a BadWindow error back from this request which + * will let us know that the drawable is a pixmap instead. + */ + + draw->eid = xcb_generate_id(draw->conn); + cookie = + xcb_present_select_input_checked(draw->conn, draw->eid, draw->drawable, + XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY | + XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY | + XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY); + + present_capabilities_cookie = + xcb_present_query_capabilities(draw->conn, draw->drawable); + + /* Create an XCB event queue to hold present events outside of the usual + * application event queue + */ + draw->special_event = xcb_register_for_special_xge(draw->conn, + &xcb_present_id, + draw->eid, + draw->stamp); + geom_cookie = xcb_get_geometry(draw->conn, draw->drawable); + + geom_reply = xcb_get_geometry_reply(draw->conn, geom_cookie, NULL); + + if (!geom_reply) + return false; + + draw->width = geom_reply->width; + draw->height = geom_reply->height; + draw->depth = geom_reply->depth; + draw->vtable->set_drawable_size(draw, draw->width, draw->height); + + free(geom_reply); + + draw->is_pixmap = false; + + /* Check to see if our select input call failed. If it failed with a + * BadWindow error, then assume the drawable is a pixmap. Destroy the + * special event queue created above and mark the drawable as a pixmap + */ + + error = xcb_request_check(draw->conn, cookie); + + present_capabilities_reply = + xcb_present_query_capabilities_reply(draw->conn, + present_capabilities_cookie, + NULL); + + if (present_capabilities_reply) { + draw->present_capabilities = present_capabilities_reply->capabilities; + free(present_capabilities_reply); + } else + draw->present_capabilities = 0; + + if (error) { + if (error->error_code != BadWindow) { + free(error); + return false; + } + draw->is_pixmap = true; + xcb_unregister_for_special_event(draw->conn, draw->special_event); + draw->special_event = NULL; + } + } + dri3_flush_present_events(draw); + return true; +} + +/* the DRIimage createImage function takes __DRI_IMAGE_FORMAT codes, while + * the createImageFromFds call takes __DRI_IMAGE_FOURCC codes. To avoid + * complete confusion, just deal in __DRI_IMAGE_FORMAT codes for now and + * translate to __DRI_IMAGE_FOURCC codes in the call to createImageFromFds + */ +static int +image_format_to_fourcc(int format) +{ + + /* Convert from __DRI_IMAGE_FORMAT to __DRI_IMAGE_FOURCC (sigh) */ + switch (format) { + case __DRI_IMAGE_FORMAT_SARGB8: return __DRI_IMAGE_FOURCC_SARGB8888; + case __DRI_IMAGE_FORMAT_RGB565: return __DRI_IMAGE_FOURCC_RGB565; + case __DRI_IMAGE_FORMAT_XRGB8888: return __DRI_IMAGE_FOURCC_XRGB8888; + case __DRI_IMAGE_FORMAT_ARGB8888: return __DRI_IMAGE_FOURCC_ARGB8888; + case __DRI_IMAGE_FORMAT_ABGR8888: return __DRI_IMAGE_FOURCC_ABGR8888; + case __DRI_IMAGE_FORMAT_XBGR8888: return __DRI_IMAGE_FOURCC_XBGR8888; + } + return 0; +} + +__DRIimage * +loader_dri3_create_image(xcb_connection_t *c, + xcb_dri3_buffer_from_pixmap_reply_t *bp_reply, + unsigned int format, + __DRIscreen *dri_screen, + const __DRIimageExtension *image, + void *loaderPrivate) +{ + int *fds; + __DRIimage *image_planar, *ret; + int stride, offset; + + /* Get an FD for the pixmap object + */ + fds = xcb_dri3_buffer_from_pixmap_reply_fds(c, bp_reply); + + stride = bp_reply->stride; + offset = 0; + + /* createImageFromFds creates a wrapper __DRIimage structure which + * can deal with multiple planes for things like Yuv images. So, once + * we've gotten the planar wrapper, pull the single plane out of it and + * discard the wrapper. + */ + image_planar = (image->createImageFromFds)(dri_screen, + bp_reply->width, + bp_reply->height, + image_format_to_fourcc(format), + fds, 1, + &stride, &offset, loaderPrivate); + close(fds[0]); + if (!image_planar) + return NULL; + + ret = (image->fromPlanar)(image_planar, 0, loaderPrivate); + + (image->destroyImage)(image_planar); + + return ret; +} + +/** dri3_get_pixmap_buffer + * + * Get the DRM object for a pixmap from the X server and + * wrap that with a __DRIimage structure using createImageFromFds + */ +static struct loader_dri3_buffer * +dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, unsigned int format, + enum loader_dri3_buffer_type buffer_type, + struct loader_dri3_drawable *draw) +{ + int buf_id = loader_dri3_pixmap_buf_id(buffer_type); + struct loader_dri3_buffer *buffer = draw->buffers[buf_id]; + xcb_drawable_t pixmap; + xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie; + xcb_dri3_buffer_from_pixmap_reply_t *bp_reply; + xcb_sync_fence_t sync_fence; + struct xshmfence *shm_fence; + int fence_fd; + + if (buffer) + return buffer; + + pixmap = draw->drawable; + + buffer = calloc(1, sizeof *buffer); + if (!buffer) + goto no_buffer; + + fence_fd = xshmfence_alloc_shm(); + if (fence_fd < 0) + goto no_fence; + shm_fence = xshmfence_map_shm(fence_fd); + if (shm_fence == NULL) { + close (fence_fd); + goto no_fence; + } + + xcb_dri3_fence_from_fd(draw->conn, + pixmap, + (sync_fence = xcb_generate_id(draw->conn)), + false, + fence_fd); + + bp_cookie = xcb_dri3_buffer_from_pixmap(draw->conn, pixmap); + bp_reply = xcb_dri3_buffer_from_pixmap_reply(draw->conn, bp_cookie, NULL); + if (!bp_reply) + goto no_image; + + buffer->image = loader_dri3_create_image(draw->conn, bp_reply, format, + draw->dri_screen, draw->ext->image, + buffer); + if (!buffer->image) + goto no_image; + + buffer->pixmap = pixmap; + buffer->own_pixmap = false; + buffer->width = bp_reply->width; + buffer->height = bp_reply->height; + buffer->buffer_type = buffer_type; + buffer->shm_fence = shm_fence; + buffer->sync_fence = sync_fence; + + draw->buffers[buf_id] = buffer; + + free(bp_reply); + + return buffer; + +no_image: + free(bp_reply); + xcb_sync_destroy_fence(draw->conn, sync_fence); + xshmfence_unmap_shm(shm_fence); +no_fence: + free(buffer); +no_buffer: + return NULL; +} + +/** dri3_get_buffer + * + * Find a front or back buffer, allocating new ones as necessary + */ +static struct loader_dri3_buffer * +dri3_get_buffer(__DRIdrawable *driDrawable, + unsigned int format, + enum loader_dri3_buffer_type buffer_type, + struct loader_dri3_drawable *draw) +{ + struct loader_dri3_buffer *buffer; + int buf_id; + __DRIcontext *dri_context; + + dri_context = draw->vtable->get_dri_context(draw); + + if (buffer_type == loader_dri3_buffer_back) { + buf_id = dri3_find_back(draw); + + if (buf_id < 0) + return NULL; + } else { + buf_id = LOADER_DRI3_FRONT_ID; + } + + buffer = draw->buffers[buf_id]; + + /* Allocate a new buffer if there isn't an old one, or if that + * old one is the wrong size + */ + if (!buffer || buffer->width != draw->width || + buffer->height != draw->height) { + struct loader_dri3_buffer *new_buffer; + + /* Allocate the new buffers + */ + new_buffer = dri3_alloc_render_buffer(draw, + format, + draw->width, + draw->height, + draw->depth); + if (!new_buffer) + return NULL; + + /* When resizing, copy the contents of the old buffer, waiting for that + * copy to complete using our fences before proceeding + */ + switch (buffer_type) { + case loader_dri3_buffer_back: + if (buffer) { + if (!buffer->linear_buffer) { + dri3_fence_reset(draw->conn, new_buffer); + dri3_fence_await(draw->conn, buffer); + dri3_copy_area(draw->conn, + buffer->pixmap, + new_buffer->pixmap, + dri3_drawable_gc(draw), + 0, 0, 0, 0, + draw->width, draw->height); + dri3_fence_trigger(draw->conn, new_buffer); + } else if (draw->vtable->in_current_context(draw)) { + draw->ext->image->blitImage(dri_context, + new_buffer->image, + buffer->image, + 0, 0, draw->width, draw->height, + 0, 0, draw->width, draw->height, 0); + } + dri3_free_render_buffer(draw, buffer); + } + break; + case loader_dri3_buffer_front: + dri3_fence_reset(draw->conn, new_buffer); + dri3_copy_area(draw->conn, + draw->drawable, + new_buffer->pixmap, + dri3_drawable_gc(draw), + 0, 0, 0, 0, + draw->width, draw->height); + dri3_fence_trigger(draw->conn, new_buffer); + + if (new_buffer->linear_buffer && + draw->vtable->in_current_context(draw)) { + dri3_fence_await(draw->conn, new_buffer); + draw->ext->image->blitImage(dri_context, + new_buffer->image, + new_buffer->linear_buffer, + 0, 0, draw->width, draw->height, + 0, 0, draw->width, draw->height, 0); + } + break; + } + buffer = new_buffer; + buffer->buffer_type = buffer_type; + draw->buffers[buf_id] = buffer; + } + dri3_fence_await(draw->conn, buffer); + + /* Return the requested buffer */ + return buffer; +} + +/** dri3_free_buffers + * + * Free the front bufffer or all of the back buffers. Used + * when the application changes which buffers it needs + */ +static void +dri3_free_buffers(__DRIdrawable *driDrawable, + enum loader_dri3_buffer_type buffer_type, + struct loader_dri3_drawable *draw) +{ + struct loader_dri3_buffer *buffer; + int first_id; + int n_id; + int buf_id; + + switch (buffer_type) { + case loader_dri3_buffer_back: + first_id = LOADER_DRI3_BACK_ID(0); + n_id = LOADER_DRI3_MAX_BACK; + break; + case loader_dri3_buffer_front: + first_id = LOADER_DRI3_FRONT_ID; + n_id = 1; + } + + for (buf_id = first_id; buf_id < first_id + n_id; buf_id++) { + buffer = draw->buffers[buf_id]; + if (buffer) { + dri3_free_render_buffer(draw, buffer); + draw->buffers[buf_id] = NULL; + } + } +} + +/** loader_dri3_get_buffers + * + * The published buffer allocation API. + * Returns all of the necessary buffers, allocating + * as needed. + */ +int +loader_dri3_get_buffers(__DRIdrawable *driDrawable, + unsigned int format, + uint32_t *stamp, + void *loaderPrivate, + uint32_t buffer_mask, + struct __DRIimageList *buffers) +{ + struct loader_dri3_drawable *draw = loaderPrivate; + struct loader_dri3_buffer *front, *back; + + buffers->image_mask = 0; + buffers->front = NULL; + buffers->back = NULL; + + front = NULL; + back = NULL; + + if (!dri3_update_drawable(driDrawable, draw)) + return false; + + /* pixmaps always have front buffers */ + if (draw->is_pixmap) + buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; + + if (buffer_mask & __DRI_IMAGE_BUFFER_FRONT) { + /* All pixmaps are owned by the server gpu. + * When we use a different gpu, we can't use the pixmap + * as buffer since it is potentially tiled a way + * our device can't understand. In this case, use + * a fake front buffer. Hopefully the pixmap + * content will get synced with the fake front + * buffer. + */ + if (draw->is_pixmap && !draw->is_different_gpu) + front = dri3_get_pixmap_buffer(driDrawable, + format, + loader_dri3_buffer_front, + draw); + else + front = dri3_get_buffer(driDrawable, + format, + loader_dri3_buffer_front, + draw); + + if (!front) + return false; + } else { + dri3_free_buffers(driDrawable, loader_dri3_buffer_front, draw); + draw->have_fake_front = 0; + } + + if (buffer_mask & __DRI_IMAGE_BUFFER_BACK) { + back = dri3_get_buffer(driDrawable, + format, + loader_dri3_buffer_back, + draw); + if (!back) + return false; + draw->have_back = 1; + } else { + dri3_free_buffers(driDrawable, loader_dri3_buffer_back, draw); + draw->have_back = 0; + } + + if (front) { + buffers->image_mask |= __DRI_IMAGE_BUFFER_FRONT; + buffers->front = front->image; + draw->have_fake_front = draw->is_different_gpu || !draw->is_pixmap; + } + + if (back) { + buffers->image_mask |= __DRI_IMAGE_BUFFER_BACK; + buffers->back = back->image; + } + + draw->stamp = stamp; + + return true; +} diff --git a/src/loader/loader_dri3_helper.h b/src/loader/loader_dri3_helper.h new file mode 100644 index 00000000000..5b8fd1d24ca --- /dev/null +++ b/src/loader/loader_dri3_helper.h @@ -0,0 +1,241 @@ +/* + * Copyright © 2013 Keith Packard + * Copyright © 2015 Boyan Ding + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#ifndef LOADER_DRI3_HEADER_H +#define LOADER_DRI3_HEADER_H + +#include <stdbool.h> +#include <stdint.h> + +#include <xcb/xcb.h> +#include <xcb/dri3.h> +#include <xcb/present.h> + +#include <GL/gl.h> +#include <GL/internal/dri_interface.h> + +enum loader_dri3_buffer_type { + loader_dri3_buffer_back = 0, + loader_dri3_buffer_front = 1 +}; + +struct loader_dri3_buffer { + __DRIimage *image; + __DRIimage *linear_buffer; + uint32_t pixmap; + + /* Synchronization between the client and X server is done using an + * xshmfence that is mapped into an X server SyncFence. This lets the + * client check whether the X server is done using a buffer with a simple + * xshmfence call, rather than going to read X events from the wire. + * + * However, we can only wait for one xshmfence to be triggered at a time, + * so we need to know *which* buffer is going to be idle next. We do that + * by waiting for a PresentIdleNotify event. When that event arrives, the + * 'busy' flag gets cleared and the client knows that the fence has been + * triggered, and that the wait call will not block. + */ + + uint32_t sync_fence; /* XID of X SyncFence object */ + struct xshmfence *shm_fence; /* pointer to xshmfence object */ + bool busy; /* Set on swap, cleared on IdleNotify */ + bool own_pixmap; /* We allocated the pixmap ID, free on destroy */ + + uint32_t size; + uint32_t pitch; + uint32_t cpp; + uint32_t flags; + uint32_t width, height; + uint64_t last_swap; + + enum loader_dri3_buffer_type buffer_type; +}; + + +#define LOADER_DRI3_MAX_BACK 4 +#define LOADER_DRI3_BACK_ID(i) (i) +#define LOADER_DRI3_FRONT_ID (LOADER_DRI3_MAX_BACK) + +static inline int +loader_dri3_pixmap_buf_id(enum loader_dri3_buffer_type buffer_type) +{ + if (buffer_type == loader_dri3_buffer_back) + return LOADER_DRI3_BACK_ID(0); + else + return LOADER_DRI3_FRONT_ID; +} + +struct loader_dri3_extensions { + const __DRIcoreExtension *core; + const __DRIimageDriverExtension *image_driver; + const __DRI2flushExtension *flush; + const __DRI2configQueryExtension *config; + const __DRItexBufferExtension *tex_buffer; + const __DRIimageExtension *image; +}; + +struct loader_dri3_drawable; + +struct loader_dri3_vtable { + int (*get_swap_interval)(struct loader_dri3_drawable *); + int (*clamp_swap_interval)(struct loader_dri3_drawable *, int); + void (*set_swap_interval)(struct loader_dri3_drawable *, int); + void (*set_drawable_size)(struct loader_dri3_drawable *, int, int); + bool (*in_current_context)(struct loader_dri3_drawable *); + __DRIcontext *(*get_dri_context)(struct loader_dri3_drawable *); + void (*flush_drawable)(struct loader_dri3_drawable *, unsigned); + void (*show_fps)(struct loader_dri3_drawable *, uint64_t); +}; + +#define LOADER_DRI3_NUM_BUFFERS (1 + LOADER_DRI3_MAX_BACK) + +struct loader_dri3_drawable { + xcb_connection_t *conn; + __DRIdrawable *dri_drawable; + xcb_drawable_t drawable; + int width; + int height; + int depth; + uint8_t have_back; + uint8_t have_fake_front; + uint8_t is_pixmap; + uint8_t flipping; + + /* Information about the GPU owning the buffer */ + __DRIscreen *dri_screen; + bool is_different_gpu; + + /* Present extension capabilities + */ + uint32_t present_capabilities; + + /* SBC numbers are tracked by using the serial numbers + * in the present request and complete events + */ + uint64_t send_sbc; + uint64_t recv_sbc; + + /* Last received UST/MSC values for pixmap present complete */ + uint64_t ust, msc; + + /* Last received UST/MSC values from present notify msc event */ + uint64_t notify_ust, notify_msc; + + /* Serial numbers for tracking wait_for_msc events */ + uint32_t send_msc_serial; + uint32_t recv_msc_serial; + + struct loader_dri3_buffer *buffers[LOADER_DRI3_NUM_BUFFERS]; + int cur_back; + int num_back; + + uint32_t *stamp; + + xcb_present_event_t eid; + xcb_gcontext_t gc; + xcb_special_event_t *special_event; + + bool first_init; + + struct loader_dri3_extensions *ext; + struct loader_dri3_vtable *vtable; +}; + +void +loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, + int interval); + +void +loader_dri3_drawable_fini(struct loader_dri3_drawable *draw); + +int +loader_dri3_drawable_init(xcb_connection_t *conn, + xcb_drawable_t drawable, + __DRIscreen *dri_screen, + bool is_different_gpu, + const __DRIconfig *dri_config, + struct loader_dri3_extensions *ext, + struct loader_dri3_vtable *vtable, + struct loader_dri3_drawable*); + +bool loader_dri3_wait_for_msc(struct loader_dri3_drawable *draw, + int64_t target_msc, + int64_t divisor, int64_t remainder, + int64_t *ust, int64_t *msc, int64_t *sbc); + +int64_t +loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw, + int64_t target_msc, int64_t divisor, + int64_t remainder, unsigned flush_flags, + bool force_copy); + +int +loader_dri3_wait_for_sbc(struct loader_dri3_drawable *draw, + int64_t target_sbc, int64_t *ust, + int64_t *msc, int64_t *sbc); + +int loader_dri3_query_buffer_age(struct loader_dri3_drawable *draw); + +void +loader_dri3_flush(struct loader_dri3_drawable *draw, + unsigned flags, + enum __DRI2throttleReason throttle_reason); + +void +loader_dri3_copy_sub_buffer(struct loader_dri3_drawable *draw, + int x, int y, + int width, int height, + bool flush); + +void +loader_dri3_copy_drawable(struct loader_dri3_drawable *draw, + xcb_drawable_t dest, + xcb_drawable_t src); + +void +loader_dri3_wait_x(struct loader_dri3_drawable *draw); + +void +loader_dri3_wait_gl(struct loader_dri3_drawable *draw); + +int loader_dri3_open(xcb_connection_t *conn, + xcb_window_t root, + uint32_t provider); + +__DRIimage * +loader_dri3_create_image(xcb_connection_t *c, + xcb_dri3_buffer_from_pixmap_reply_t *bp_reply, + unsigned int format, + __DRIscreen *dri_screen, + const __DRIimageExtension *image, + void *loaderPrivate); + +int +loader_dri3_get_buffers(__DRIdrawable *driDrawable, + unsigned int format, + uint32_t *stamp, + void *loaderPrivate, + uint32_t buffer_mask, + struct __DRIimageList *buffers); + +#endif diff --git a/src/mapi/glapi/gen/EXT_gpu_shader4.xml b/src/mapi/glapi/gen/EXT_gpu_shader4.xml index b1f7eae2610..b4120b9c192 100644 --- a/src/mapi/glapi/gen/EXT_gpu_shader4.xml +++ b/src/mapi/glapi/gen/EXT_gpu_shader4.xml @@ -232,7 +232,8 @@ <param name="params" type="GLuint *"/> </function> - <function name="BindFragDataLocationEXT" alias="BindFragDataLocation"> + <function name="BindFragDataLocationEXT" alias="BindFragDataLocation" + es2="3.0"> <param name="program" type="GLuint"/> <param name="colorNumber" type="GLuint"/> <param name="name" type="const GLchar *"/> diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 9a777a24c61..577d8254c43 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -914,4 +914,30 @@ </function> </category> +<category name="GL_EXT_blend_func_extended" number="247"> + + <function name="BindFragDataLocationIndexedEXT" alias="BindFragDataLocationIndexed" + es2="3.0"> + <param name="program" type="GLuint"/> + <param name="colorNumber" type="GLuint"/> + <param name="index" type="GLuint"/> + <param name="name" type="const GLchar *"/> + </function> + + <function name="GetFragDataIndexEXT" alias="GetFragDataIndex" + es2="3.0"> + <param name="program" type="GLuint"/> + <param name="name" type="const GLchar *"/> + <return type="GLint"/> + </function> + + <function name="GetProgramResourceLocationIndexEXT" alias="GetProgramResourceLocationIndex" + es2="3.1"> + <param name="program" type="GLuint"/> + <param name="programInterface" type="GLenum"/> + <param name="name" type="const GLchar *"/> + <return type="GLint"/> + </function> + +</category> </OpenGLAPI> diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c index a9da0a21ba3..bde170fcf6f 100644 --- a/src/mesa/drivers/common/meta_generate_mipmap.c +++ b/src/mesa/drivers/common/meta_generate_mipmap.c @@ -102,13 +102,13 @@ fallback_required(struct gl_context *ctx, GLenum target, */ if (!mipmap->FBO) _mesa_GenFramebuffers(1, &mipmap->FBO); - _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, mipmap->FBO); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, mipmap->FBO); - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, baseImage, 0); + _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, baseImage, 0); - status = _mesa_CheckFramebufferStatus(GL_FRAMEBUFFER_EXT); + status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); - _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, fboSave); + _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fboSave); if (status != GL_FRAMEBUFFER_COMPLETE_EXT) { _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH, @@ -131,6 +131,11 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gen_mipmap_state *mipmap) _mesa_DeleteSamplers(1, &mipmap->Sampler); mipmap->Sampler = 0; + if (mipmap->FBO != 0) { + _mesa_DeleteFramebuffers(1, &mipmap->FBO); + mipmap->FBO = 0; + } + _mesa_meta_blit_shader_table_cleanup(&mipmap->shaders); } diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index b8ab480ddfe..a8f7c9b854b 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -59,6 +59,9 @@ extern char *program_invocation_name, *program_invocation_short_name; #elif defined(__NetBSD__) && defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 106000100) # include <stdlib.h> # define GET_PROGRAM_NAME() getprogname() +#elif defined(__DragonFly__) +# include <stdlib.h> +# define GET_PROGRAM_NAME() getprogname() #elif defined(__APPLE__) # include <stdlib.h> # define GET_PROGRAM_NAME() getprogname() diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 7fa4ce87f18..b8990cef89e 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -85,7 +85,7 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst, unsigned msg_length) { fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf), - fs_reg(0u), fs_reg(0u)); + brw_imm_ud(0u), brw_imm_ud(0u)); inst->base_mrf = base_mrf; inst->mlen = msg_length; diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index 40ad14402a7..73ba85e2a61 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -224,7 +224,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, vec1(t_nopersp), brw_imm_f(0)); brw_IF(p, BRW_EXECUTE_1); - brw_MOV(p, t_nopersp, brw_imm_vf4(1, 0, 0, 0)); + brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0), + brw_float_to_vf(0.0), + brw_float_to_vf(0.0), + brw_float_to_vf(0.0))); brw_ENDIF(p); /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */ diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index cd78af0dce4..e49994f19a8 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -23,6 +23,7 @@ #pragma once +#include <stdio.h> #include "brw_device_info.h" #include "main/mtypes.h" @@ -89,8 +90,7 @@ struct brw_compiler { void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); - bool scalar_vs; - bool scalar_gs; + bool scalar_stage[MESA_SHADER_STAGES]; struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; }; @@ -493,6 +493,34 @@ enum shader_dispatch_mode { DISPATCH_MODE_SIMD8 = 3, }; +/** + * @defgroup Tessellator parameter enumerations. + * + * These correspond to the hardware values in 3DSTATE_TE, and are provided + * as part of the tessellation evaluation shader. + * + * @{ + */ +enum brw_tess_partitioning { + BRW_TESS_PARTITIONING_INTEGER = 0, + BRW_TESS_PARTITIONING_ODD_FRACTIONAL = 1, + BRW_TESS_PARTITIONING_EVEN_FRACTIONAL = 2, +}; + +enum brw_tess_output_topology { + BRW_TESS_OUTPUT_TOPOLOGY_POINT = 0, + BRW_TESS_OUTPUT_TOPOLOGY_LINE = 1, + BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2, + BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3, +}; + +enum brw_tess_domain { + BRW_TESS_DOMAIN_QUAD = 0, + BRW_TESS_DOMAIN_TRI = 1, + BRW_TESS_DOMAIN_ISOLINE = 2, +}; +/** @} */ + struct brw_vue_prog_data { struct brw_stage_prog_data base; struct brw_vue_map vue_map; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index ac6045dbba9..2ea0a9eca92 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -322,64 +322,82 @@ static void brw_initialize_context_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + const struct brw_compiler *compiler = brw->intelScreen->compiler; + + const bool stage_exists[MESA_SHADER_STAGES] = { + [MESA_SHADER_VERTEX] = true, + [MESA_SHADER_TESS_CTRL] = false, + [MESA_SHADER_TESS_EVAL] = false, + [MESA_SHADER_GEOMETRY] = brw->gen >= 6, + [MESA_SHADER_FRAGMENT] = true, + [MESA_SHADER_COMPUTE] = _mesa_extension_override_enables.ARB_compute_shader, + }; + + unsigned num_stages = 0; + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + if (stage_exists[i]) + num_stages++; + } unsigned max_samplers = brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16; + ctx->Const.MaxDualSourceDrawBuffers = 1; + ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; + ctx->Const.MaxCombinedShaderOutputResources = + MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; + ctx->Const.QueryCounterBits.Timestamp = 36; + ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ + ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; + ctx->Const.MaxRenderbufferSize = 8192; + ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); + ctx->Const.Max3DTextureLevels = 12; /* 2048 */ + ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ + ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512; + ctx->Const.MaxTextureMbytes = 1536; + ctx->Const.MaxTextureRectSize = 1 << 12; + ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.StripTextureBorder = true; + if (brw->gen >= 7) + ctx->Const.MaxProgramTextureGatherComponents = 4; + else if (brw->gen == 6) + ctx->Const.MaxProgramTextureGatherComponents = 1; ctx->Const.MaxUniformBlockSize = 65536; + for (int i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_program_constants *prog = &ctx->Const.Program[i]; + + if (!stage_exists[i]) + continue; + + prog->MaxTextureImageUnits = max_samplers; + prog->MaxUniformBlocks = BRW_MAX_UBO; prog->MaxCombinedUniformComponents = prog->MaxUniformComponents + ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; + + prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; + prog->MaxAtomicBuffers = BRW_MAX_ABO; + prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0; + prog->MaxShaderStorageBlocks = BRW_MAX_SSBO; } - ctx->Const.MaxDualSourceDrawBuffers = 1; - ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers; - ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers; - if (brw->gen >= 6) - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers; - else - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0; - if (_mesa_extension_override_enables.ARB_compute_shader) { - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT; - ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO; - } else { - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0; - } - ctx->Const.MaxCombinedTextureImageUnits = - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits + - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits + - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits + - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; - - ctx->Const.MaxTextureLevels = 14; /* 8192 */ - if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS) - ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS; - ctx->Const.Max3DTextureLevels = 12; /* 2048 */ - ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ - ctx->Const.MaxTextureMbytes = 1536; - - if (brw->gen >= 7) - ctx->Const.MaxArrayTextureLayers = 2048; - else - ctx->Const.MaxArrayTextureLayers = 512; - ctx->Const.MaxTextureRectSize = 1 << 12; + ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO; + ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO; + ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO; + ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO; + ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO; + ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers; + ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES; - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - - ctx->Const.MaxRenderbufferSize = 8192; /* Hardware only supports a limited number of transform feedback buffers. * So we need to override the Mesa default (which is based only on software @@ -427,6 +445,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxColorTextureSamples = max_samples; ctx->Const.MaxDepthTextureSamples = max_samples; ctx->Const.MaxIntegerSamples = max_samples; + ctx->Const.MaxImageSamples = 0; /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used * to map indices of rectangular grid to sample numbers within a pixel. @@ -436,11 +455,6 @@ brw_initialize_context_constants(struct brw_context *brw) */ gen6_set_sample_maps(ctx); - if (brw->gen >= 7) - ctx->Const.MaxProgramTextureGatherComponents = 4; - else if (brw->gen == 6) - ctx->Const.MaxProgramTextureGatherComponents = 1; - ctx->Const.MinLineWidth = 1.0; ctx->Const.MinLineWidthAA = 1.0; if (brw->gen >= 6) { @@ -511,30 +525,6 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; - if (brw->gen >= 7) { - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO; - - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms = - BRW_MAX_IMAGES; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms = - (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0); - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms = - BRW_MAX_IMAGES; - ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; - ctx->Const.MaxCombinedShaderOutputResources = - MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; - ctx->Const.MaxImageSamples = 0; - ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES; - } - /* Gen6 converts quads to polygon in beginning of 3D pipeline, * but we're not sure how it's actually done for vertex order, * that affect provoking vertex decision. Always use last vertex @@ -586,21 +576,6 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.TextureBufferOffsetAlignment = 16; ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; - /* FIXME: Tessellation stages are not yet supported in i965, so - * MaxCombinedShaderStorageBlocks doesn't take them into account. - */ - ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0; - ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.MaxCombinedShaderStorageBlocks = BRW_MAX_SSBO * 3; - ctx->Const.MaxShaderStorageBufferBindings = BRW_MAX_SSBO * 3; - - if (_mesa_extension_override_enables.ARB_compute_shader) - ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO; - if (brw->gen >= 6) { ctx->Const.MaxVarying = 32; ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4b2db61c758..fe45edb89ff 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -523,6 +523,8 @@ struct brw_tracked_state { enum shader_time_shader_type { ST_NONE, ST_VS, + ST_TCS, + ST_TES, ST_GS, ST_FS8, ST_FS16, @@ -1465,6 +1467,8 @@ void brw_upload_image_surfaces(struct brw_context *brw, /* brw_surface_formats.c */ bool brw_render_target_supported(struct brw_context *brw, struct gl_renderbuffer *rb); +bool brw_losslessly_compressible_format(struct brw_context *brw, + uint32_t brw_format); uint32_t brw_depth_format(struct brw_context *brw, mesa_format format); mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo, mesa_format format); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3ad90da8b2f..36d9f716e03 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1291,6 +1291,16 @@ enum opcode { * Calculate the high 32-bits of a 32x32 multiply. */ SHADER_OPCODE_MULH, + + /** + * A MOV that uses VxH indirect addressing. + * + * Source 0: A register to start from (HW_REG). + * Source 1: An indirect offset (in bytes, UD GRF). + * Source 2: The length of the region that could be accessed (in bytes, + * UD immediate). + */ + SHADER_OPCODE_MOV_INDIRECT, }; enum brw_urb_write_flags { @@ -1930,8 +1940,14 @@ enum brw_message_target { /* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size * is 2^9, or 512. It's counted in multiples of 64 bytes. + * + * Identical for VS, DS, and HS. */ #define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64) +#define GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES (512*64) +#define GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES (512*64) +#define GEN7_MAX_VS_URB_ENTRY_SIZE_BYTES (512*64) + /* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit * (128 bytes) URB rows and the maximum allowed value is 5 rows. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 250d4097e38..419168966de 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -43,6 +43,7 @@ #include "brw_wm.h" #include "brw_fs.h" #include "brw_cs.h" +#include "brw_nir.h" #include "brw_vec4_gs_visitor.h" #include "brw_cfg.h" #include "brw_dead_control_flow.h" @@ -186,7 +187,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, * the redundant ones. */ fs_reg vec4_offset = vgrf(glsl_type::int_type); - bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3)); + bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3)); int scale = 1; if (devinfo->gen == 4 && bld.dispatch_width() == 8) { @@ -374,54 +375,6 @@ fs_reg::fs_reg() this->file = BAD_FILE; } -/** Immediate value constructor. */ -fs_reg::fs_reg(float f) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_F; - this->stride = 0; - this->f = f; -} - -/** Immediate value constructor. */ -fs_reg::fs_reg(int32_t i) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_D; - this->stride = 0; - this->d = i; -} - -/** Immediate value constructor. */ -fs_reg::fs_reg(uint32_t u) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_UD; - this->stride = 0; - this->ud = u; -} - -/** Vector float immediate value constructor. */ -fs_reg::fs_reg(uint8_t vf[4]) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_VF; - memcpy(&this->ud, vf, sizeof(unsigned)); -} - -/** Vector float immediate value constructor. */ -fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3) -{ - init(); - this->file = IMM; - this->type = BRW_REGISTER_TYPE_VF; - this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24); -} - fs_reg::fs_reg(struct brw_reg reg) : backend_reg(reg) { @@ -591,7 +544,7 @@ fs_visitor::emit_shader_time_end() fs_reg reset = shader_end_time; reset.set_smear(2); set_condmod(BRW_CONDITIONAL_Z, - ibld.AND(ibld.null_reg_ud(), reset, fs_reg(1u))); + ibld.AND(ibld.null_reg_ud(), reset, brw_imm_ud(1u))); ibld.IF(BRW_PREDICATE_NORMAL); fs_reg start = shader_start_time; @@ -606,11 +559,11 @@ fs_visitor::emit_shader_time_end() * is 2 cycles. Remove that overhead, so I can forget about that when * trying to determine the time taken for single instructions. */ - cbld.ADD(diff, diff, fs_reg(-2u)); + cbld.ADD(diff, diff, brw_imm_ud(-2u)); SHADER_TIME_ADD(cbld, 0, diff); - SHADER_TIME_ADD(cbld, 1, fs_reg(1u)); + SHADER_TIME_ADD(cbld, 1, brw_imm_ud(1u)); ibld.emit(BRW_OPCODE_ELSE); - SHADER_TIME_ADD(cbld, 2, fs_reg(1u)); + SHADER_TIME_ADD(cbld, 2, brw_imm_ud(1u)); ibld.emit(BRW_OPCODE_ENDIF); } @@ -620,7 +573,7 @@ fs_visitor::SHADER_TIME_ADD(const fs_builder &bld, fs_reg value) { int index = shader_time_index * 3 + shader_time_subindex; - fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE); + struct brw_reg offset = brw_imm_d(index * SHADER_TIME_STRIDE); fs_reg payload; if (dispatch_width == 8) @@ -841,6 +794,34 @@ fs_inst::regs_read(int arg) const case SHADER_OPCODE_BARRIER: return 1; + case SHADER_OPCODE_MOV_INDIRECT: + if (arg == 0) { + assert(src[2].file == IMM); + unsigned region_length = src[2].ud; + + if (src[0].file == FIXED_GRF) { + /* If the start of the region is not register aligned, then + * there's some portion of the register that's technically + * unread at the beginning. + * + * However, the register allocator works in terms of whole + * registers, and does not use subnr. It assumes that the + * read starts at the beginning of the register, and extends + * regs_read() whole registers beyond that. + * + * To compensate, we extend the region length to include this + * unread portion at the beginning. + */ + if (src[0].subnr) + region_length += src[0].subnr * type_sz(src[0].type); + + return DIV_ROUND_UP(region_length, REG_SIZE); + } else { + assert(!"Invalid register file"); + } + } + break; + default: if (is_tex() && arg == 0 && src[0].file == VGRF) return mlen; @@ -1005,7 +986,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, if (pixel_center_integer) { bld.MOV(wpos, this->pixel_x); } else { - bld.ADD(wpos, this->pixel_x, fs_reg(0.5f)); + bld.ADD(wpos, this->pixel_x, brw_imm_f(0.5f)); } wpos = offset(wpos, bld, 1); @@ -1021,7 +1002,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, offset += key->drawable_height - 1.0f; } - bld.ADD(wpos, pixel_y, fs_reg(offset)); + bld.ADD(wpos, pixel_y, brw_imm_f(offset)); } wpos = offset(wpos, bld, 1); @@ -1198,7 +1179,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W)); g0.negate = true; - bld.ASR(*reg, g0, fs_reg(15)); + bld.ASR(*reg, g0, brw_imm_d(15)); } else { /* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create * a boolean result from this (1/true or 0/false). @@ -1213,7 +1194,7 @@ fs_visitor::emit_frontfacing_interpolation() fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D)); g1_6.negate = true; - bld.ASR(*reg, g1_6, fs_reg(31)); + bld.ASR(*reg, g1_6, brw_imm_d(31)); } return reg; @@ -1230,7 +1211,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) /* Convert int_sample_pos to floating point */ bld.MOV(dst, int_sample_pos); /* Scale to the range [0, 1] */ - bld.MUL(dst, dst, fs_reg(1 / 16.0f)); + bld.MUL(dst, dst, brw_imm_f(1 / 16.0f)); } else { /* From ARB_sample_shading specification: @@ -1238,7 +1219,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) * rasterization is disabled, gl_SamplePosition will always be * (0.5, 0.5). */ - bld.MOV(dst, fs_reg(0.5f)); + bld.MOV(dst, brw_imm_f(0.5f)); } } @@ -1333,8 +1314,8 @@ fs_visitor::emit_sampleid_setup() abld.exec_all().group(1, 0) .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), - fs_reg(sspi_mask)); - abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5)); + brw_imm_ud(sspi_mask)); + abld.exec_all().group(1, 0).SHR(t1, t1, brw_imm_d(5)); /* This works for both SIMD8 and SIMD16 */ abld.exec_all().group(4, 0) @@ -1349,7 +1330,7 @@ fs_visitor::emit_sampleid_setup() * "When rendering to a non-multisample buffer, or if multisample * rasterization is disabled, gl_SampleID will always be zero." */ - abld.MOV(*reg, fs_reg(0)); + abld.MOV(*reg, brw_imm_d(0)); } return reg; @@ -1662,24 +1643,7 @@ fs_visitor::assign_gs_urb_setup() first_non_payload_grf += 8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in; - const unsigned first_icp_handle = payload.num_regs - - (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0); - foreach_block_and_inst(block, fs_inst, inst, cfg) { - /* Lower URB_READ_SIMD8 opcodes into real messages. */ - if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) { - assert(inst->src[0].file == IMM); - inst->src[0] = retype(brw_vec8_grf(first_icp_handle + - inst->src[0].ud, - 0), BRW_REGISTER_TYPE_UD); - /* for now, assume constant - we can do per-slot offsets later */ - assert(inst->src[1].file == IMM); - inst->offset = inst->src[1].ud; - inst->src[1] = fs_reg(); - inst->mlen = 1; - inst->base_mrf = -1; - } - /* Rewrite all ATTR file references to GRFs. */ convert_attr_sources_to_hw_regs(inst); } @@ -2037,16 +2001,16 @@ fs_visitor::demote_pull_constants() /* Generate a pull load into dst. */ if (inst->src[i].reladdr) { VARYING_PULL_CONSTANT_LOAD(ibld, dst, - fs_reg(index), + brw_imm_ud(index), *inst->src[i].reladdr, pull_index); inst->src[i].reladdr = NULL; inst->src[i].stride = 1; } else { const fs_builder ubld = ibld.exec_all().group(8, 0); - fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); + struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, fs_reg(index), offset); + dst, brw_imm_ud(index), offset); inst->src[i].set_smear(pull_index & 3); } brw_mark_surface_used(prog_data, index); @@ -2738,7 +2702,7 @@ fs_visitor::eliminate_find_live_channel() case SHADER_OPCODE_FIND_LIVE_CHANNEL: if (depth == 0) { inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = fs_reg(0u); + inst->src[0] = brw_imm_ud(0u); inst->sources = 1; inst->force_writemask_all = true; progress = true; @@ -3591,6 +3555,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, assert(devinfo->gen >= 9); assert(bld.dispatch_width() != 16); + /* XXX: src_stencil is only available on gen9+. dst_depth is never + * available on gen9+. As such it's impossible to have both enabled at the + * same time and therefore length cannot overrun the array. + */ + assert(length < 15); + sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD); bld.exec_all().annotate("FB write OS") .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length], @@ -3660,7 +3630,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, (has_lod || shadow_c.file != BAD_FILE || (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) { for (unsigned i = coord_components; i < 3; i++) - bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f)); + bld.MOV(offset(msg_end, bld, i), brw_imm_f(0.0f)); msg_end = offset(msg_end, bld, 3 - coord_components); } @@ -3717,7 +3687,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, /* There's no plain shadow compare message, so we use shadow * compare with a bias of 0.0. */ - bld.MOV(msg_end, fs_reg(0.0f)); + bld.MOV(msg_end, brw_imm_f(0.0f)); msg_end = offset(msg_end, bld, 1); } @@ -3813,7 +3783,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, case SHADER_OPCODE_TXF_CMS: msg_lod = offset(msg_coords, bld, 3); /* lod */ - bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)); /* sample index */ bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index); msg_end = offset(msg_lod, bld, 2); @@ -3896,7 +3866,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, if (bld.shader->stage != MESA_SHADER_FRAGMENT && op == SHADER_OPCODE_TEX) { op = SHADER_OPCODE_TXL; - lod = fs_reg(0.0f); + lod = brw_imm_f(0.0f); } /* Set up the LOD info */ @@ -4110,7 +4080,7 @@ emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask) { fs_builder ubld = bld.exec_all().group(8, 0); const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD); - ubld.MOV(dst, fs_reg(0)); + ubld.MOV(dst, brw_imm_d(0)); ubld.MOV(component(dst, 7), sample_mask); return dst; } @@ -4252,7 +4222,7 @@ fs_visitor::lower_logical_sends() case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: lower_surface_logical_send(ibld, inst, SHADER_OPCODE_TYPED_SURFACE_READ, - fs_reg(0xffff)); + brw_imm_d(0xffff)); break; case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: @@ -4677,6 +4647,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) case IMM: unreachable("not reached"); } + if (inst->dst.stride != 1) + fprintf(file, "<%u>", inst->dst.stride); fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type)); for (int i = 0; i < inst->sources; i++) { @@ -4764,6 +4736,16 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "|"); if (inst->src[i].file != IMM) { + unsigned stride; + if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) { + unsigned hstride = inst->src[i].hstride; + stride = (hstride == 0 ? 0 : (1 << (hstride - 1))); + } else { + stride = inst->src[i].stride; + } + if (stride != 1) + fprintf(file, "<%u>", stride); + fprintf(file, ":%s", brw_reg_type_letters(inst->src[i].type)); } @@ -5241,7 +5223,7 @@ fs_visitor::run_gs() */ if (gs_compile->control_data_header_size_bits <= 32) { const fs_builder abld = bld.annotate("initialize control data bits"); - abld.MOV(this->control_data_bits, fs_reg(0u)); + abld.MOV(this->control_data_bits, brw_imm_ud(0u)); } } @@ -5474,13 +5456,18 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_wm_prog_key *key, struct brw_wm_prog_data *prog_data, - const nir_shader *shader, + const nir_shader *src_shader, struct gl_program *prog, int shader_time_index8, int shader_time_index16, bool use_rep_send, unsigned *final_assembly_size, char **error_str) { + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, + true); + shader = brw_postprocess_nir(shader, compiler->devinfo, true); + /* key->alpha_test_func means simulating alpha testing via discards, * so the shader definitely kills pixels. */ @@ -5633,11 +5620,16 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_cs_prog_key *key, struct brw_cs_prog_data *prog_data, - const nir_shader *shader, + const nir_shader *src_shader, int shader_time_index, unsigned *final_assembly_size, char **error_str) { + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, + true); + shader = brw_postprocess_nir(shader, compiler->devinfo, true); + prog_data->local_size[0] = shader->info.cs.local_size[0]; prog_data->local_size[1] = shader->info.cs.local_size[1]; prog_data->local_size[2] = shader->info.cs.local_size[2]; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 9b56afd292f..658608f9951 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -116,10 +116,6 @@ public: void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); void compute_clip_distance(gl_clip_plane *clip_planes); - uint32_t gather_channel(int orig_chan, uint32_t surface, uint32_t sampler); - void swizzle_result(ir_texture_opcode op, int dest_components, - fs_reg orig_val, uint32_t sampler); - fs_inst *get_instruction_generating_reg(fs_inst *start, fs_inst *end, const fs_reg ®); @@ -218,8 +214,6 @@ public: void emit_interpolation_setup_gen4(); void emit_interpolation_setup_gen6(); void compute_sample_position(fs_reg dst, fs_reg int_sample_pos); - fs_reg rescale_texcoord(fs_reg coordinate, int coord_components, - bool is_rect, uint32_t sampler); void emit_texture(ir_texture_opcode op, const glsl_type *dest_type, fs_reg coordinate, int components, @@ -230,7 +224,6 @@ public: fs_reg mcs, int gather_component, bool is_cube_array, - bool is_rect, uint32_t surface, fs_reg surface_reg, uint32_t sampler, @@ -305,7 +298,8 @@ public: unsigned stream_id); void emit_gs_thread_end(); void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src, - unsigned offset, unsigned num_components); + const fs_reg &indirect_offset, unsigned imm_offset, + unsigned num_components); void emit_cs_terminate(); fs_reg *emit_cs_local_invocation_id_setup(); fs_reg *emit_cs_work_group_id_setup(); @@ -530,6 +524,11 @@ private: struct brw_reg offset, struct brw_reg value); + void generate_mov_indirect(fs_inst *inst, + struct brw_reg dst, + struct brw_reg reg, + struct brw_reg indirect_byte_offset); + bool patch_discard_jumps_to_fb_writes(); const struct brw_compiler *compiler; diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index 22b2f22073f..dd3c383a17d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -225,7 +225,7 @@ namespace brw { sample_mask_reg() const { if (shader->stage != MESA_SHADER_FRAGMENT) { - return src_reg(0xffff); + return brw_imm_d(0xffff); } else if (((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill) { return brw_flag_reg(0, 1); } else { @@ -548,7 +548,7 @@ namespace brw { const dst_reg x_times_one_minus_a = vgrf(dst.type); MUL(y_times_a, y, a); - ADD(one_minus_a, negate(a), src_reg(1.0f)); + ADD(one_minus_a, negate(a), brw_imm_f(1.0f)); MUL(x_times_one_minus_a, x, src_reg(one_minus_a)); return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp index 8fdc959f992..7c01f1e3d62 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp @@ -90,7 +90,8 @@ opt_cmod_propagation_local(bblock_t *block) foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (scan_inst->overwrites_reg(inst->src[0])) { if (scan_inst->is_partial_write() || - scan_inst->dst.reg_offset != inst->src[0].reg_offset) + scan_inst->dst.reg_offset != inst->src[0].reg_offset || + scan_inst->exec_size != inst->exec_size) break; /* CMP's result is the same regardless of dest type. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp index 0c115f50748..c3ad7ad4771 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp @@ -279,7 +279,7 @@ fs_visitor::opt_combine_constants() imm->block->last_non_control_flow_inst()->next); const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0); - ibld.MOV(reg, fs_reg(imm->val)); + ibld.MOV(reg, brw_imm_f(imm->val)); imm->nr = reg.nr; imm->subreg_offset = reg.subreg_offset; diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 426ea57d8f9..62ae9abede7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -275,6 +275,59 @@ is_logic_op(enum opcode opcode) opcode == BRW_OPCODE_NOT); } +static bool +can_take_stride(fs_inst *inst, unsigned arg, unsigned stride, + const brw_device_info *devinfo) +{ + if (stride > 4) + return false; + + /* 3-source instructions can only be Align16, which restricts what strides + * they can take. They can only take a stride of 1 (the usual case), or 0 + * with a special "repctrl" bit. But the repctrl bit doesn't work for + * 64-bit datatypes, so if the source type is 64-bit then only a stride of + * 1 is allowed. From the Broadwell PRM, Volume 7 "3D Media GPGPU", page + * 944: + * + * This is applicable to 32b datatypes and 16b datatype. 64b datatypes + * cannot use the replicate control. + */ + if (inst->is_3src()) { + if (type_sz(inst->src[arg].type) > 4) + return stride == 1; + else + return stride == 1 || stride == 0; + } + + /* From the Broadwell PRM, Volume 2a "Command Reference - Instructions", + * page 391 ("Extended Math Function"): + * + * The following restrictions apply for align1 mode: Scalar source is + * supported. Source and destination horizontal stride must be the + * same. + * + * From the Haswell PRM Volume 2b "Command Reference - Instructions", page + * 134 ("Extended Math Function"): + * + * Scalar source is supported. Source and destination horizontal stride + * must be 1. + * + * and similar language exists for IVB and SNB. Pre-SNB, math instructions + * are sends, so the sources are moved to MRF's and there are no + * restrictions. + */ + if (inst->is_math()) { + if (devinfo->gen == 6 || devinfo->gen == 7) { + assert(inst->dst.stride == 1); + return stride == 1 || stride == 0; + } else if (devinfo->gen >= 8) { + return stride == inst->dst.stride || stride == 0; + } + } + + return true; +} + bool fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) { @@ -326,7 +379,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) /* Bail if the result of composing both strides would exceed the * hardware limit. */ - if (entry->src.stride * inst->src[arg].stride > 4) + if (!can_take_stride(inst, arg, entry->src.stride * inst->src[arg].stride, + devinfo)) return false; /* Bail if the instruction type is larger than the execution type of the diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 8c67caff6e0..3b65a382dc8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -78,6 +78,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_MOV_INDIRECT: return true; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: @@ -209,6 +210,8 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); } else { copy = bld.MOV(inst->dst, src); + copy->force_sechalf = inst->force_sechalf; + copy->force_writemask_all = inst->force_writemask_all; copy->src[0].negate = negate; } assert(copy->regs_written == written); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 9d7fb94c397..8528f391941 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -89,39 +89,9 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen) brw_reg.abs = reg->abs; brw_reg.negate = reg->negate; break; - case IMM: - assert(reg->stride == ((reg->type == BRW_REGISTER_TYPE_V || - reg->type == BRW_REGISTER_TYPE_UV || - reg->type == BRW_REGISTER_TYPE_VF) ? 1 : 0)); - - switch (reg->type) { - case BRW_REGISTER_TYPE_F: - brw_reg = brw_imm_f(reg->f); - break; - case BRW_REGISTER_TYPE_D: - brw_reg = brw_imm_d(reg->d); - break; - case BRW_REGISTER_TYPE_UD: - brw_reg = brw_imm_ud(reg->ud); - break; - case BRW_REGISTER_TYPE_W: - brw_reg = brw_imm_w(reg->d); - break; - case BRW_REGISTER_TYPE_UW: - brw_reg = brw_imm_uw(reg->ud); - break; - case BRW_REGISTER_TYPE_VF: - brw_reg = brw_imm_vf(reg->ud); - break; - case BRW_REGISTER_TYPE_V: - brw_reg = brw_imm_v(reg->ud); - break; - default: - unreachable("not reached"); - } - break; case ARF: case FIXED_GRF: + case IMM: brw_reg = *static_cast<struct brw_reg *>(reg); break; case BAD_FILE: @@ -372,6 +342,36 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) } void +fs_generator::generate_mov_indirect(fs_inst *inst, + struct brw_reg dst, + struct brw_reg reg, + struct brw_reg indirect_byte_offset) +{ + assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD); + assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE); + + unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr; + + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ + struct brw_reg addr = vec8(brw_address_reg(0)); + + /* The destination stride of an instruction (in bytes) must be greater + * than or equal to the size of the rest of the instruction. Since the + * address register is of type UW, we can't use a D-type instruction. + * In order to get around this, re re-type to UW and use a stride. + */ + indirect_byte_offset = + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + + /* Prior to Broadwell, there are only 8 address registers. */ + assert(inst->exec_size == 8 || devinfo->gen >= 8); + + brw_MOV(p, addr, indirect_byte_offset); + brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE); + brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); +} + +void fs_generator::generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg header) @@ -700,6 +700,17 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; } + /* Stomp the resinfo output type to UINT32. On gens 4-5, the output type + * is set as part of the message descriptor. On gen4, the PRM seems to + * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on + * later gens UINT32 is required. Once you hit Sandy Bridge, the bit is + * gone from the message descriptor entirely and you just get UINT32 all + * the time regasrdless. Since we can really only do non-UINT32 on gen4, + * just stomp it to UINT32 all the time. + */ + if (inst->opcode == SHADER_OPCODE_TXS) + return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; + switch (inst->exec_size) { case 8: simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; @@ -2087,6 +2098,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) fill_count++; break; + case SHADER_OPCODE_MOV_INDIRECT: + generate_mov_indirect(inst, dst, src[0], src[1]); + break; + case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: generate_urb_read(inst, dst, src[0]); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 3a666b8debc..6b0c4a5b36e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -208,7 +208,7 @@ emit_system_values_block(nir_block *block, void *void_visitor) const fs_builder abld = v->bld.annotate("gl_InvocationID", NULL); fs_reg g1(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); fs_reg iid = abld.vgrf(BRW_REGISTER_TYPE_UD, 1); - abld.SHR(iid, g1, fs_reg(27u)); + abld.SHR(iid, g1, brw_imm_ud(27u)); *reg = iid; } break; @@ -250,6 +250,57 @@ emit_system_values_block(nir_block *block, void *void_visitor) *reg = *v->emit_cs_work_group_id_setup(); break; + case nir_intrinsic_load_helper_invocation: + assert(v->stage == MESA_SHADER_FRAGMENT); + reg = &v->nir_system_values[SYSTEM_VALUE_HELPER_INVOCATION]; + if (reg->file == BAD_FILE) { + const fs_builder abld = + v->bld.annotate("gl_HelperInvocation", NULL); + + /* On Gen6+ (gl_HelperInvocation is only exposed on Gen7+) the + * pixel mask is in g1.7 of the thread payload. + * + * We move the per-channel pixel enable bit to the low bit of each + * channel by shifting the byte containing the pixel mask by the + * vector immediate 0x76543210UV. + * + * The region of <1,8,0> reads only 1 byte (the pixel masks for + * subspans 0 and 1) in SIMD8 and an additional byte (the pixel + * masks for 2 and 3) in SIMD16. + */ + fs_reg shifted = abld.vgrf(BRW_REGISTER_TYPE_UW, 1); + abld.SHR(shifted, + stride(byte_offset(retype(brw_vec1_grf(1, 0), + BRW_REGISTER_TYPE_UB), 28), + 1, 8, 0), + brw_imm_uv(0x76543210)); + + /* A set bit in the pixel mask means the channel is enabled, but + * that is the opposite of gl_HelperInvocation so we need to invert + * the mask. + * + * The negate source-modifier bit of logical instructions on Gen8+ + * performs 1's complement negation, so we can use that instead of + * a NOT instruction. + */ + fs_reg inverted = negate(shifted); + if (v->devinfo->gen < 8) { + inverted = abld.vgrf(BRW_REGISTER_TYPE_UW); + abld.NOT(inverted, shifted); + } + + /* We then resolve the 0/1 result to 0/~0 boolean values by ANDing + * with 1 and negating. + */ + fs_reg anded = abld.vgrf(BRW_REGISTER_TYPE_UD, 1); + abld.AND(anded, inverted, brw_imm_uw(1)); + + fs_reg dst = abld.vgrf(BRW_REGISTER_TYPE_D, 1); + abld.MOV(dst, negate(retype(anded, BRW_REGISTER_TYPE_D))); + *reg = dst; + } + break; + default: break; } @@ -454,8 +505,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, tmp.subreg_offset = 2; tmp.stride = 2; - fs_inst *or_inst = bld.OR(tmp, g0, fs_reg(0x3f80)); - or_inst->src[1].type = BRW_REGISTER_TYPE_UW; + bld.OR(tmp, g0, brw_imm_uw(0x3f80)); tmp.type = BRW_REGISTER_TYPE_D; tmp.subreg_offset = 0; @@ -479,9 +529,9 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, g1_6.negate = true; } - bld.OR(tmp, g1_6, fs_reg(0x3f800000)); + bld.OR(tmp, g1_6, brw_imm_d(0x3f800000)); } - bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000)); + bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, brw_imm_d(0xbf800000)); return true; } @@ -594,14 +644,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not * zero. */ - bld.CMP(bld.null_reg_f(), op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ); fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD); op[0].type = BRW_REGISTER_TYPE_UD; result.type = BRW_REGISTER_TYPE_UD; - bld.AND(result_int, op[0], fs_reg(0x80000000u)); + bld.AND(result_int, op[0], brw_imm_ud(0x80000000u)); - inst = bld.OR(result_int, result_int, fs_reg(0x3f800000u)); + inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u)); inst->predicate = BRW_PREDICATE_NORMAL; if (instr->dest.saturate) { inst = bld.MOV(result, result); @@ -615,9 +665,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) * -> non-negative val generates 0x00000000. * Predicated OR sets 1 if val is positive. */ - bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_G); - bld.ASR(result, op[0], fs_reg(31)); - inst = bld.OR(result, result, fs_reg(1)); + bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G); + bld.ASR(result, op[0], brw_imm_d(31)); + inst = bld.OR(result, result, brw_imm_d(1)); inst->predicate = BRW_PREDICATE_NORMAL; break; @@ -665,21 +715,21 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_fddy: if (fs_key->high_quality_derivatives) { inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + brw_imm_d(fs_key->render_to_fbo)); } else { inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + brw_imm_d(fs_key->render_to_fbo)); } inst->saturate = instr->dest.saturate; break; case nir_op_fddy_fine: inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + brw_imm_d(fs_key->render_to_fbo)); inst->saturate = instr->dest.saturate; break; case nir_op_fddy_coarse: inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], - fs_reg(fs_key->render_to_fbo)); + brw_imm_d(fs_key->render_to_fbo)); inst->saturate = instr->dest.saturate; break; @@ -828,10 +878,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_f2b: - bld.CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ); + bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ); break; case nir_op_i2b: - bld.CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ); + bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ); break; case nir_op_ftrunc: @@ -931,9 +981,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then * subtract the result from 31 to convert the MSB count into an LSB count. */ - bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); - inst = bld.ADD(result, result, fs_reg(31)); + inst = bld.ADD(result, result, brw_imm_d(31)); inst->predicate = BRW_PREDICATE_NORMAL; inst->src[0].negate = true; break; @@ -986,7 +1036,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) if (optimize_frontfacing_ternary(instr, result)) return; - bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ); inst = bld.SEL(result, op[1], op[2]); inst->predicate = BRW_PREDICATE_NORMAL; break; @@ -1001,7 +1051,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) if (devinfo->gen <= 5 && (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { fs_reg masked = vgrf(glsl_type::int_type); - bld.AND(masked, result, fs_reg(1)); + bld.AND(masked, result, brw_imm_d(1)); masked.negate = true; bld.MOV(retype(result, BRW_REGISTER_TYPE_D), masked); } @@ -1014,7 +1064,7 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld, fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components); for (unsigned i = 0; i < instr->def.num_components; i++) - bld.MOV(offset(reg, bld, i), fs_reg(instr->value.i[i])); + bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i[i])); nir_ssa_values[instr->def.index] = reg; } @@ -1042,7 +1092,7 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg, reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type)); v->bld.MUL(*reg.reladdr, v->get_nir_src(*indirect), - fs_reg(multiplier)); + brw_imm_d(multiplier)); } return reg; @@ -1108,12 +1158,12 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref) */ bld.emit_minmax(tmp, retype(get_nir_src(deref_array->indirect), BRW_REGISTER_TYPE_UD), - fs_reg(size - base - 1), BRW_CONDITIONAL_L); + brw_imm_ud(size - base - 1), BRW_CONDITIONAL_L); } else { bld.MOV(tmp, get_nir_src(deref_array->indirect)); } - bld.MUL(tmp, tmp, fs_reg(element_size)); + bld.MUL(tmp, tmp, brw_imm_ud(element_size)); if (image.reladdr) bld.ADD(*image.reladdr, *image.reladdr, tmp); else @@ -1232,7 +1282,7 @@ intexp2(const fs_builder &bld, const fs_reg &x) fs_reg result = bld.vgrf(x.type, 1); fs_reg one = bld.vgrf(x.type, 1); - bld.MOV(one, retype(fs_reg(1), one.type)); + bld.MOV(one, retype(brw_imm_d(1), one.type)); bld.SHL(result, one, x); return result; } @@ -1285,7 +1335,7 @@ fs_visitor::emit_gs_end_primitive(const nir_src &vertex_count_nir_src) /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */ fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu)); + abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu)); fs_reg mask = intexp2(abld, prev_count); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this @@ -1356,26 +1406,26 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count) if (opcode != SHADER_OPCODE_URB_WRITE_SIMD8) { fs_reg dword_index = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu)); + abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu)); unsigned log2_bits_per_vertex = _mesa_fls(gs_compile->control_data_bits_per_vertex); - abld.SHR(dword_index, prev_count, fs_reg(6u - log2_bits_per_vertex)); + abld.SHR(dword_index, prev_count, brw_imm_ud(6u - log2_bits_per_vertex)); if (per_slot_offset.file != BAD_FILE) { /* Set the per-slot offset to dword_index / 4, so that we'll write to * the appropriate OWord within the control data header. */ - abld.SHR(per_slot_offset, dword_index, fs_reg(2u)); + abld.SHR(per_slot_offset, dword_index, brw_imm_ud(2u)); } /* Set the channel masks to 1 << (dword_index % 4), so that we'll * write to the appropriate DWORD within the OWORD. */ fs_reg channel = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - fwa_bld.AND(channel, dword_index, fs_reg(3u)); + fwa_bld.AND(channel, dword_index, brw_imm_ud(3u)); channel_mask = intexp2(fwa_bld, channel); /* Then the channel masks need to be in bits 23:16. */ - fwa_bld.SHL(channel_mask, channel_mask, fs_reg(16u)); + fwa_bld.SHL(channel_mask, channel_mask, brw_imm_ud(16u)); } /* Store the control data bits in the message payload and send it. */ @@ -1435,11 +1485,11 @@ fs_visitor::set_gs_stream_control_data_bits(const fs_reg &vertex_count, /* reg::sid = stream_id */ fs_reg sid = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - abld.MOV(sid, fs_reg(stream_id)); + abld.MOV(sid, brw_imm_ud(stream_id)); /* reg:shift_count = 2 * (vertex_count - 1) */ fs_reg shift_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - abld.SHL(shift_count, vertex_count, fs_reg(1u)); + abld.SHL(shift_count, vertex_count, brw_imm_ud(1u)); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this @@ -1510,14 +1560,14 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src, */ fs_inst *inst = abld.AND(bld.null_reg_d(), vertex_count, - fs_reg(32u / gs_compile->control_data_bits_per_vertex - 1u)); + brw_imm_ud(32u / gs_compile->control_data_bits_per_vertex - 1u)); inst->conditional_mod = BRW_CONDITIONAL_Z; abld.IF(BRW_PREDICATE_NORMAL); /* If vertex_count is 0, then no control data bits have been * accumulated yet, so we can skip emitting them. */ - abld.CMP(bld.null_reg_d(), vertex_count, fs_reg(0u), + abld.CMP(bld.null_reg_d(), vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_NEQ); abld.IF(BRW_PREDICATE_NORMAL); emit_gs_control_data_bits(vertex_count); @@ -1530,7 +1580,7 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src, * effect of any call to EndPrimitive() that the shader may have * made before outputting its first vertex. */ - inst = abld.MOV(this->control_data_bits, fs_reg(0u)); + inst = abld.MOV(this->control_data_bits, brw_imm_ud(0u)); inst->force_writemask_all = true; abld.emit(BRW_OPCODE_ENDIF); } @@ -1551,42 +1601,113 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src, void fs_visitor::emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src, - unsigned input_offset, + const fs_reg &indirect_offset, + unsigned imm_offset, unsigned num_components) { - const brw_vue_prog_data *vue_prog_data = (const brw_vue_prog_data *) prog_data; - const unsigned vertex = nir_src_as_const_value(vertex_src)->u[0]; + struct brw_gs_prog_data *gs_prog_data = (struct brw_gs_prog_data *) prog_data; - const unsigned array_stride = vue_prog_data->urb_read_length * 8; + /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y], + * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. Only + * gl_PointSize is available as a GS input, however, so it must be that. + */ + const bool is_point_size = + indirect_offset.file == BAD_FILE && imm_offset == 0; + + nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); + const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8; + + if (indirect_offset.file == BAD_FILE && vertex_const != NULL && + 4 * imm_offset < push_reg_count) { + imm_offset = 4 * imm_offset + vertex_const->u[0] * push_reg_count; + /* This input was pushed into registers. */ + if (is_point_size) { + /* gl_PointSize comes in .w */ + bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type)); + } else { + for (unsigned i = 0; i < num_components; i++) { + bld.MOV(offset(dst, bld, i), + fs_reg(ATTR, imm_offset + i, dst.type)); + } + } + } else { + /* Resort to the pull model. Ensure the VUE handles are provided. */ + gs_prog_data->base.include_vue_handles = true; - const bool pushed = 4 * input_offset < array_stride; + unsigned first_icp_handle = gs_prog_data->include_primitive_id ? 3 : 2; + fs_reg icp_handle; - if (input_offset == 0) { - /* This is the VUE header, containing VARYING_SLOT_LAYER [.y], - * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. - * Only gl_PointSize is available as a GS input, so they must - * be asking for that input. - */ - if (pushed) { - bld.MOV(dst, fs_reg(ATTR, array_stride * vertex + 3, dst.type)); + if (vertex_const) { + /* The vertex index is constant; just select the proper URB handle. */ + icp_handle = + retype(brw_vec8_grf(first_icp_handle + vertex_const->i[0], 0), + BRW_REGISTER_TYPE_UD); } else { - fs_reg tmp = bld.vgrf(dst.type, 4); - fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, - fs_reg(vertex), fs_reg(0)); - inst->regs_written = 4; - bld.MOV(dst, offset(tmp, bld, 3)); + /* The vertex index is non-constant. We need to use indirect + * addressing to fetch the proper URB handle. + * + * First, we start with the sequence <7, 6, 5, 4, 3, 2, 1, 0> + * indicating that channel <n> should read the handle from + * DWord <n>. We convert that to bytes by multiplying by 4. + * + * Next, we convert the vertex index to bytes by multiplying + * by 32 (shifting by 5), and add the two together. This is + * the final indirect byte offset. + */ + fs_reg sequence = bld.vgrf(BRW_REGISTER_TYPE_W, 1); + fs_reg channel_offsets = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + + /* sequence = <7, 6, 5, 4, 3, 2, 1, 0> */ + bld.MOV(sequence, fs_reg(brw_imm_v(0x76543210))); + /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */ + bld.SHL(channel_offsets, sequence, brw_imm_ud(2u)); + /* Convert vertex_index to bytes (multiply by 32) */ + bld.SHL(vertex_offset_bytes, + retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD), + brw_imm_ud(5u)); + bld.ADD(icp_offset_bytes, vertex_offset_bytes, channel_offsets); + + /* Use first_icp_handle as the base offset. There is one register + * of URB handles per vertex, so inform the register allocator that + * we might read up to nir->info.gs.vertices_in registers. + */ + bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, + fs_reg(brw_vec8_grf(first_icp_handle, 0)), + fs_reg(icp_offset_bytes), + brw_imm_ud(nir->info.gs.vertices_in * REG_SIZE)); } - } else { - if (pushed) { - int index = vertex * array_stride + 4 * input_offset; - for (unsigned i = 0; i < num_components; i++) { - bld.MOV(offset(dst, bld, i), fs_reg(ATTR, index + i, dst.type)); - } + + fs_inst *inst; + if (indirect_offset.file == BAD_FILE) { + /* Constant indexing - use global offset. */ + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle); + inst->offset = imm_offset; + inst->base_mrf = -1; + inst->mlen = 1; + inst->regs_written = num_components; } else { - fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, - fs_reg(vertex), fs_reg(input_offset)); + /* Indirect indexing - use per-slot offsets as well. */ + const fs_reg srcs[] = { icp_handle, indirect_offset }; + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); + + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload); + inst->offset = imm_offset; + inst->base_mrf = -1; + inst->mlen = 2; inst->regs_written = num_components; } + + if (is_point_size) { + /* Read the whole VUE header (because of alignment) and read .w. */ + fs_reg tmp = bld.vgrf(dst.type, 4); + inst->dst = tmp; + inst->regs_written = 4; + bld.MOV(dst, offset(tmp, bld, 3)); + } } } @@ -1626,6 +1747,7 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { assert(stage == MESA_SHADER_GEOMETRY); + fs_reg indirect_offset; fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) @@ -1644,9 +1766,11 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld, unreachable("load_input intrinsics are invalid for the GS stage"); case nir_intrinsic_load_per_vertex_input_indirect: - assert(!"Not allowed"); + indirect_offset = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_D); + /* fallthrough */ case nir_intrinsic_load_per_vertex_input: - emit_gs_input_load(dest, instr->src[0], instr->const_index[0], + emit_gs_input_load(dest, instr->src[0], + indirect_offset, instr->const_index[0], instr->num_components); break; @@ -1703,6 +1827,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, break; } + case nir_intrinsic_load_helper_invocation: case nir_intrinsic_load_sample_mask_in: case nir_intrinsic_load_sample_id: { gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); @@ -1723,7 +1848,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, fs_inst *cmp; if (instr->intrinsic == nir_intrinsic_discard_if) { cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]), - fs_reg(0), BRW_CONDITIONAL_Z); + brw_imm_d(0), BRW_CONDITIONAL_Z); } else { fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); @@ -1771,7 +1896,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, fs_reg(), /* src */ - fs_reg(0u), + brw_imm_ud(0u), interpolation); break; @@ -1785,7 +1910,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, fs_reg(), /* src */ - fs_reg(msg_data), + brw_imm_ud(msg_data), interpolation); } else { const fs_reg sample_src = retype(get_nir_src(instr->src[0]), @@ -1794,7 +1919,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, if (nir_src_is_dynamically_uniform(instr->src[0])) { const fs_reg sample_id = bld.emit_uniformize(sample_src); const fs_reg msg_data = vgrf(glsl_type::uint_type); - bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); + bld.exec_all().group(1, 0) + .SHL(msg_data, sample_id, brw_imm_ud(4u)); emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, @@ -1820,7 +1946,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, sample_src, sample_id, BRW_CONDITIONAL_EQ); const fs_reg msg_data = vgrf(glsl_type::uint_type); - bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); + bld.exec_all().group(1, 0) + .SHL(msg_data, sample_id, brw_imm_ud(4u)); fs_inst *inst = emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, @@ -1851,7 +1978,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, fs_reg(), /* src */ - fs_reg(off_x | (off_y << 4)), + brw_imm_ud(off_x | (off_y << 4)), interpolation); } else { fs_reg src = vgrf(glsl_type::ivec2_type); @@ -1859,7 +1986,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, BRW_REGISTER_TYPE_F); for (int i = 0; i < 2; i++) { fs_reg temp = vgrf(glsl_type::float_type); - bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f)); + bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f)); fs_reg itemp = vgrf(glsl_type::int_type); bld.MOV(itemp, temp); /* float to int */ @@ -1879,7 +2006,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, * FRAGMENT_INTERPOLATION_OFFSET_BITS" */ set_condmod(BRW_CONDITIONAL_L, - bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); + bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7))); } const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; @@ -1887,7 +2014,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, opcode, dst_xy, src, - fs_reg(0u), + brw_imm_ud(0u), interpolation); } break; @@ -1947,14 +2074,14 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, cs_prog_data->uses_num_work_groups = true; - fs_reg surf_index = fs_reg(surface); + fs_reg surf_index = brw_imm_ud(surface); brw_mark_surface_used(prog_data, surface); /* Read the 3 GLuint components of gl_NumWorkGroups */ for (unsigned i = 0; i < 3; i++) { fs_reg read_result = emit_untyped_read(bld, surf_index, - fs_reg(i << 2), + brw_imm_ud(i << 2), 1 /* dims */, 1 /* size */, BRW_PREDICATE_NONE); read_result.type = dest.type; @@ -1994,16 +2121,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr /* Emit a surface read or atomic op. */ switch (instr->intrinsic) { case nir_intrinsic_atomic_counter_read: - tmp = emit_untyped_read(bld, fs_reg(surface), offset, 1, 1); + tmp = emit_untyped_read(bld, brw_imm_ud(surface), offset, 1, 1); break; case nir_intrinsic_atomic_counter_inc: - tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(), + tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, fs_reg(), fs_reg(), 1, 1, BRW_AOP_INC); break; case nir_intrinsic_atomic_counter_dec: - tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(), + tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, fs_reg(), fs_reg(), 1, 1, BRW_AOP_PREDEC); break; @@ -2145,14 +2272,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr for (unsigned c = 0; c < info->dest_components; ++c) { if ((int)c >= type->coordinate_components()) { bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), - fs_reg(1)); + brw_imm_d(1)); } else if (c == 1 && is_1d_array_image) { bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), offset(size, bld, 2)); } else if (c == 2 && is_cube_array_image) { bld.emit(SHADER_OPCODE_INT_QUOTIENT, offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), - offset(size, bld, c), fs_reg(6)); + offset(size, bld, c), brw_imm_d(6)); } else { bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c), offset(size, bld, c)); @@ -2164,7 +2291,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_image_samples: /* The driver does not support multi-sampled images. */ - bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1)); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1)); break; case nir_intrinsic_load_uniform_indirect: @@ -2195,7 +2322,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (const_index) { const unsigned index = stage_prog_data->binding_table.ubo_start + const_index->u[0]; - surf_index = fs_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(prog_data, index); } else { /* The block index is not a constant. Evaluate the index expression @@ -2204,7 +2331,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr */ surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ubo_start)); + brw_imm_ud(stage_prog_data->binding_table.ubo_start)); surf_index = bld.emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide @@ -2220,7 +2347,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg base_offset = vgrf(glsl_type::int_type); bld.SHR(base_offset, retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_D), - fs_reg(2)); + brw_imm_d(2)); unsigned vec4_offset = instr->const_index[0] / 4; for (int i = 0; i < instr->num_components; i++) @@ -2230,7 +2357,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; - fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15); + struct brw_reg const_offset_reg = brw_imm_ud(instr->const_index[0] & ~15); bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, surf_index, const_offset_reg); @@ -2262,12 +2389,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (const_uniform_block) { unsigned index = stage_prog_data->binding_table.ssbo_start + const_uniform_block->u[0]; - surf_index = fs_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(prog_data, index); } else { surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ssbo_start)); + brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. @@ -2282,7 +2409,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (has_indirect) { offset_reg = get_nir_src(instr->src[1]); } else { - offset_reg = fs_reg(instr->const_index[0]); + offset_reg = brw_imm_ud(instr->const_index[0]); } /* Read the vector */ @@ -2333,12 +2460,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (const_uniform_block) { unsigned index = stage_prog_data->binding_table.ssbo_start + const_uniform_block->u[0]; - surf_index = fs_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(prog_data, index); } else { surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[1]), - fs_reg(stage_prog_data->binding_table.ssbo_start)); + brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); brw_mark_surface_used(prog_data, stage_prog_data->binding_table.ssbo_start + @@ -2362,12 +2489,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg offset_reg; if (!has_indirect) { - offset_reg = fs_reg(instr->const_index[0] + 4 * first_component); + offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component); } else { offset_reg = vgrf(glsl_type::uint_type); bld.ADD(offset_reg, retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD), - fs_reg(4 * first_component)); + brw_imm_ud(4 * first_component)); } emit_untyped_write(bld, surf_index, offset_reg, @@ -2438,7 +2565,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr int reg_width = dispatch_width / 8; /* Set LOD = 0 */ - fs_reg source = fs_reg(0); + fs_reg source = brw_imm_d(0); int mlen = 1 * reg_width; @@ -2457,7 +2584,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr BRW_REGISTER_TYPE_UD); const unsigned index = prog_data->binding_table.ssbo_start + ssbo_index; fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, buffer_size, - src_payload, fs_reg(index)); + src_payload, brw_imm_ud(index)); inst->header_size = 0; inst->mlen = mlen; inst->regs_written = regs_written; @@ -2486,12 +2613,12 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, if (const_surface) { unsigned surf_index = stage_prog_data->binding_table.ssbo_start + const_surface->u[0]; - surface = fs_reg(surf_index); + surface = brw_imm_ud(surf_index); brw_mark_surface_used(prog_data, surf_index); } else { surface = vgrf(glsl_type::uint_type); bld.ADD(surface, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ssbo_start)); + brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); /* Assume this may touch any SSBO. This is the same we do for other * UBO/SSBO accesses with non-constant surface. @@ -2524,13 +2651,11 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; - fs_reg texture_reg(texture); - fs_reg sampler_reg(sampler); + fs_reg texture_reg(brw_imm_ud(texture)); + fs_reg sampler_reg(brw_imm_ud(sampler)); int gather_component = instr->component; - bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT; - bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->is_array; @@ -2552,6 +2677,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) switch (instr->op) { case nir_texop_txf: case nir_texop_txf_ms: + case nir_texop_samples_identical: coordinate = retype(src, BRW_REGISTER_TYPE_D); break; default: @@ -2604,7 +2730,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ texture_reg = vgrf(glsl_type::uint_type); - bld.ADD(texture_reg, src, fs_reg(texture)); + bld.ADD(texture_reg, src, brw_imm_ud(texture)); texture_reg = bld.emit_uniformize(texture_reg); break; } @@ -2612,7 +2738,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) case nir_tex_src_sampler_offset: { /* Emit code to evaluate the actual indexing expression */ sampler_reg = vgrf(glsl_type::uint_type); - bld.ADD(sampler_reg, src, fs_reg(sampler)); + bld.ADD(sampler_reg, src, brw_imm_ud(sampler)); sampler_reg = bld.emit_uniformize(sampler_reg); break; } @@ -2622,19 +2748,20 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) } } - if (instr->op == nir_texop_txf_ms) { + if (instr->op == nir_texop_txf_ms || + instr->op == nir_texop_samples_identical) { if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << texture)) { mcs = emit_mcs_fetch(coordinate, instr->coord_components, texture_reg); } else { - mcs = fs_reg(0u); + mcs = brw_imm_ud(0u); } } for (unsigned i = 0; i < 3; i++) { if (instr->const_offset[i] != 0) { assert(offset_components == 0); - tex_offset = fs_reg(brw_texture_offset(instr->const_offset, 3)); + tex_offset = brw_imm_ud(brw_texture_offset(instr->const_offset, 3)); break; } } @@ -2668,6 +2795,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) inst->base_mrf = -1; return; } + case nir_texop_samples_identical: op = ir_samples_identical; break; default: unreachable("unknown texture opcode"); } @@ -2675,8 +2803,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) emit_texture(op, dest_type, coordinate, instr->coord_components, shadow_comparitor, lod, lod2, lod_components, sample_index, tex_offset, mcs, gather_component, - is_cube_array, is_rect, - texture, texture_reg, sampler, sampler_reg); + is_cube_array, texture, texture_reg, sampler, sampler_reg); fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp index 534d8490cdf..45694ec0894 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -44,7 +44,7 @@ namespace brw { */ const fs_reg usurface = bld.emit_uniformize(surface); const fs_reg srcs[] = { - addr, src, usurface, fs_reg(dims), fs_reg(arg) + addr, src, usurface, brw_imm_ud(dims), brw_imm_ud(arg) }; const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize); fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); @@ -330,7 +330,7 @@ namespace { * messages causes a hang on IVB and VLV. */ set_predicate(pred, - bld.CMP(bld.null_reg_ud(), stride, fs_reg(4), + bld.CMP(bld.null_reg_ud(), stride, brw_imm_d(4), BRW_CONDITIONAL_G)); return BRW_PREDICATE_NORMAL; @@ -361,7 +361,7 @@ namespace { */ bld.CMP(bld.null_reg_ud(), retype(size, BRW_REGISTER_TYPE_UD), - fs_reg(0), BRW_CONDITIONAL_NZ); + brw_imm_d(0), BRW_CONDITIONAL_NZ); return BRW_PREDICATE_NORMAL; } else { @@ -438,7 +438,7 @@ namespace { * FINISHME: Factor out this frequently recurring pattern into a * helper function. */ - const fs_reg srcs[] = { addr, fs_reg(0), offset(addr, bld, 1) }; + const fs_reg srcs[] = { addr, brw_imm_d(0), offset(addr, bld, 1) }; const fs_reg dst = bld.vgrf(addr.type, dims); bld.LOAD_PAYLOAD(dst, srcs, dims, 0); return dst; @@ -488,7 +488,7 @@ namespace { bld.ADD(offset(addr, bld, c), offset(off, bld, c), (c < dims ? offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) : - fs_reg(0))); + fs_reg(brw_imm_d(0)))); /* The layout of 3-D textures in memory is sort-of like a tiling * format. At each miplevel, the slices are arranged in rows of @@ -515,7 +515,7 @@ namespace { /* Decompose z into a major (tmp.y) and a minor (tmp.x) * index. */ - bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), fs_reg(0), + bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), brw_imm_d(0), offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2)); bld.SHR(offset(tmp, bld, 1), offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2), @@ -549,7 +549,7 @@ namespace { for (unsigned c = 0; c < 2; ++c) { /* Calculate the minor x and y indices. */ bld.BFE(offset(minor, bld, c), offset(tile, bld, c), - fs_reg(0), offset(addr, bld, c)); + brw_imm_d(0), offset(addr, bld, c)); /* Calculate the major x and y indices. */ bld.SHR(offset(major, bld, c), @@ -595,7 +595,7 @@ namespace { /* XOR tmp.x and tmp.y with bit 6 of the memory address. */ bld.XOR(tmp, tmp, offset(tmp, bld, 1)); - bld.AND(tmp, tmp, fs_reg(1 << 6)); + bld.AND(tmp, tmp, brw_imm_d(1 << 6)); bld.XOR(dst, dst, tmp); } @@ -647,7 +647,7 @@ namespace { const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); /* Shift each component left to the correct bitfield position. */ - bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32)); + bld.SHL(tmp, offset(src, bld, c), brw_imm_ud(shifts[c] % 32)); /* Add everything up. */ if (seen[shifts[c] / 32]) { @@ -679,13 +679,13 @@ namespace { /* Shift left to discard the most significant bits. */ bld.SHL(offset(dst, bld, c), offset(src, bld, shifts[c] / 32), - fs_reg(32 - shifts[c] % 32 - widths[c])); + brw_imm_ud(32 - shifts[c] % 32 - widths[c])); /* Shift back to the least significant bits using an arithmetic * shift to get sign extension on signed types. */ bld.ASR(offset(dst, bld, c), - offset(dst, bld, c), fs_reg(32 - widths[c])); + offset(dst, bld, c), brw_imm_ud(32 - widths[c])); } } @@ -709,13 +709,13 @@ namespace { if (widths[c]) { /* Clamp to the maximum value. */ bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c), - fs_reg((int)scale(widths[c] - s)), + brw_imm_d((int)scale(widths[c] - s)), BRW_CONDITIONAL_L); /* Clamp to the minimum value. */ if (is_signed) bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c), - fs_reg(-(int)scale(widths[c] - s) - 1), + brw_imm_d(-(int)scale(widths[c] - s) - 1), BRW_CONDITIONAL_GE); } } @@ -741,12 +741,12 @@ namespace { /* Divide by the normalization constants. */ bld.MUL(offset(dst, bld, c), offset(dst, bld, c), - fs_reg(1.0f / scale(widths[c] - s))); + brw_imm_f(1.0f / scale(widths[c] - s))); /* Clamp to the minimum value. */ if (is_signed) bld.emit_minmax(offset(dst, bld, c), - offset(dst, bld, c), fs_reg(-1.0f), + offset(dst, bld, c), brw_imm_f(-1.0f), BRW_CONDITIONAL_GE); } } @@ -771,10 +771,10 @@ namespace { /* Clamp the normalized floating-point argument. */ if (is_signed) { bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c), - fs_reg(-1.0f), BRW_CONDITIONAL_GE); + brw_imm_f(-1.0f), BRW_CONDITIONAL_GE); bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), - fs_reg(1.0f), BRW_CONDITIONAL_L); + brw_imm_f(1.0f), BRW_CONDITIONAL_L); } else { set_saturate(true, bld.MOV(offset(fdst, bld, c), offset(src, bld, c))); @@ -782,7 +782,7 @@ namespace { /* Multiply by the normalization constants. */ bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c), - fs_reg((float)scale(widths[c] - s))); + brw_imm_f((float)scale(widths[c] - s))); /* Convert to integer. */ bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c)); @@ -814,7 +814,7 @@ namespace { */ if (widths[c] < 16) bld.SHL(offset(dst, bld, c), - offset(dst, bld, c), fs_reg(15 - widths[c])); + offset(dst, bld, c), brw_imm_ud(15 - widths[c])); /* Convert to 32-bit floating point. */ bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c)); @@ -842,7 +842,7 @@ namespace { /* Clamp to the minimum value. */ if (widths[c] < 16) bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), - fs_reg(0.0f), BRW_CONDITIONAL_GE); + brw_imm_f(0.0f), BRW_CONDITIONAL_GE); /* Convert to 16-bit floating-point. */ bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c)); @@ -855,7 +855,7 @@ namespace { */ if (widths[c] < 16) bld.SHR(offset(dst, bld, c), offset(dst, bld, c), - fs_reg(15 - widths[c])); + brw_imm_ud(15 - widths[c])); } } @@ -874,7 +874,8 @@ namespace { for (unsigned c = 0; c < 4; ++c) bld.MOV(offset(dst, bld, c), - widths[c] ? offset(src, bld, c) : fs_reg(pad[c])); + widths[c] ? offset(src, bld, c) + : fs_reg(brw_imm_ud(pad[c]))); return dst; } @@ -939,7 +940,7 @@ namespace brw { /* An out of bounds surface access should give zero as result. */ for (unsigned c = 0; c < size; ++c) set_predicate(pred, bld.SEL(offset(tmp, bld, c), - offset(tmp, bld, c), fs_reg(0))); + offset(tmp, bld, c), brw_imm_d(0))); } /* Set the register type to D instead of UD if the data type is @@ -1122,7 +1123,7 @@ namespace brw { /* An unbound surface access should give zero as result. */ if (rsize) - set_predicate(pred, bld.SEL(tmp, tmp, fs_reg(0))); + set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0))); return tmp; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2647a40c730..e82acd141f3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -79,122 +79,6 @@ fs_visitor::emit_vs_system_value(int location) return reg; } -fs_reg -fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, - bool is_rect, uint32_t sampler) -{ - bool needs_gl_clamp = true; - fs_reg scale_x, scale_y; - - /* The 965 requires the EU to do the normalization of GL rectangle - * texture coordinates. We use the program parameter state - * tracking to get the scaling factor. - */ - if (is_rect && - (devinfo->gen < 6 || - (devinfo->gen >= 6 && (key_tex->gl_clamp_mask[0] & (1 << sampler) || - key_tex->gl_clamp_mask[1] & (1 << sampler))))) { - struct gl_program_parameter_list *params = prog->Parameters; - - - /* FINISHME: We're failing to recompile our programs when the sampler is - * updated. This only matters for the texture rectangle scale - * parameters (pre-gen6, or gen6+ with GL_CLAMP). - */ - int tokens[STATE_LENGTH] = { - STATE_INTERNAL, - STATE_TEXRECT_SCALE, - prog->SamplerUnits[sampler], - 0, - 0 - }; - - no16("rectangle scale uniform setup not supported on SIMD16\n"); - if (dispatch_width == 16) { - return coordinate; - } - - GLuint index = _mesa_add_state_reference(params, - (gl_state_index *)tokens); - /* Try to find existing copies of the texrect scale uniforms. */ - for (unsigned i = 0; i < uniforms; i++) { - if (stage_prog_data->param[i] == - &prog->Parameters->ParameterValues[index][0]) { - scale_x = fs_reg(UNIFORM, i); - scale_y = fs_reg(UNIFORM, i + 1); - break; - } - } - - /* If we didn't already set them up, do so now. */ - if (scale_x.file == BAD_FILE) { - scale_x = fs_reg(UNIFORM, uniforms); - scale_y = fs_reg(UNIFORM, uniforms + 1); - - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][0]; - stage_prog_data->param[uniforms++] = - &prog->Parameters->ParameterValues[index][1]; - } - } - - /* The 965 requires the EU to do the normalization of GL rectangle - * texture coordinates. We use the program parameter state - * tracking to get the scaling factor. - */ - if (devinfo->gen < 6 && is_rect) { - fs_reg dst = fs_reg(VGRF, alloc.allocate(coord_components)); - fs_reg src = coordinate; - coordinate = dst; - - bld.MUL(dst, src, scale_x); - dst = offset(dst, bld, 1); - src = offset(src, bld, 1); - bld.MUL(dst, src, scale_y); - } else if (is_rect) { - /* On gen6+, the sampler handles the rectangle coordinates - * natively, without needing rescaling. But that means we have - * to do GL_CLAMP clamping at the [0, width], [0, height] scale, - * not [0, 1] like the default case below. - */ - needs_gl_clamp = false; - - for (int i = 0; i < 2; i++) { - if (key_tex->gl_clamp_mask[i] & (1 << sampler)) { - fs_reg chan = coordinate; - chan = offset(chan, bld, i); - - set_condmod(BRW_CONDITIONAL_GE, - bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f))); - - /* Our parameter comes in as 1.0/width or 1.0/height, - * because that's what people normally want for doing - * texture rectangle handling. We need width or height - * for clamping, but we don't care enough to make a new - * parameter type, so just invert back. - */ - fs_reg limit = vgrf(glsl_type::float_type); - bld.MOV(limit, i == 0 ? scale_x : scale_y); - bld.emit(SHADER_OPCODE_RCP, limit, limit); - - set_condmod(BRW_CONDITIONAL_L, - bld.emit(BRW_OPCODE_SEL, chan, chan, limit)); - } - } - } - - if (coord_components > 0 && needs_gl_clamp) { - for (int i = 0; i < MIN2(coord_components, 3); i++) { - if (key_tex->gl_clamp_mask[i] & (1 << sampler)) { - fs_reg chan = coordinate; - chan = offset(chan, bld, i); - set_saturate(true, bld.MOV(chan, chan)); - } - } - } - return coordinate; -} - /* Sample from the MCS surface attached to this multisample texture. */ fs_reg fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, @@ -203,7 +87,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, const fs_reg dest = vgrf(glsl_type::uvec4_type); const fs_reg srcs[] = { coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(), - texture, texture, fs_reg(), fs_reg(components), fs_reg(0) + texture, texture, fs_reg(), brw_imm_ud(components), brw_imm_d(0) }; fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, ARRAY_SIZE(srcs)); @@ -227,7 +111,6 @@ fs_visitor::emit_texture(ir_texture_opcode op, fs_reg mcs, int gather_component, bool is_cube_array, - bool is_rect, uint32_t surface, fs_reg surface_reg, uint32_t sampler, @@ -235,38 +118,32 @@ fs_visitor::emit_texture(ir_texture_opcode op, { fs_inst *inst = NULL; - if (op == ir_tg4) { - /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother - * emitting anything other than setting up the constant result. - */ - int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component); - if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { - - fs_reg res = vgrf(glsl_type::vec4_type); - this->result = res; - - for (int i=0; i<4; i++) { - bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)); - res = offset(res, bld, 1); - } - return; - } - } - if (op == ir_query_levels) { /* textureQueryLevels() is implemented in terms of TXS so we need to * pass a valid LOD argument. */ assert(lod.file == BAD_FILE); - lod = fs_reg(0u); + lod = brw_imm_ud(0u); } - if (coordinate.file != BAD_FILE) { - /* FINISHME: Texture coordinate rescaling doesn't work with non-constant - * samplers. This should only be a problem with GL_CLAMP on Gen7. + if (op == ir_samples_identical) { + fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 1)); + + /* If mcs is an immediate value, it means there is no MCS. In that case + * just return false. */ - coordinate = rescale_texcoord(coordinate, coord_components, is_rect, - sampler); + if (mcs.file == BRW_IMMEDIATE_VALUE) { + bld.MOV(dst, brw_imm_ud(0u)); + } else if ((key_tex->msaa_16 & (1 << sampler))) { + fs_reg tmp = vgrf(glsl_type::uint_type); + bld.OR(tmp, mcs, offset(mcs, bld, 1)); + bld.CMP(dst, tmp, brw_imm_ud(0u), BRW_CONDITIONAL_EQ); + } else { + bld.CMP(dst, mcs, brw_imm_ud(0u), BRW_CONDITIONAL_EQ); + } + + this->result = dst; + return; } /* Writemasking doesn't eliminate channels on SIMD8 texture @@ -276,7 +153,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, const fs_reg srcs[] = { coordinate, shadow_c, lod, lod2, sample_index, mcs, surface_reg, sampler_reg, offset_value, - fs_reg(coord_components), fs_reg(grad_components) + brw_imm_d(coord_components), brw_imm_d(grad_components) }; enum opcode opcode; @@ -327,8 +204,15 @@ fs_visitor::emit_texture(ir_texture_opcode op, inst->offset = offset_value.ud; if (op == ir_tg4) { - inst->offset |= - gather_channel(gather_component, surface, sampler) << 16; /* M0.2:16-17 */ + if (gather_component == 1 && + key_tex->gather_channel_quirk_mask & (1 << surface)) { + /* gather4 sampler is broken for green channel on RG32F -- + * we must ask for blue instead. + */ + inst->offset |= 2 << 16; + } else { + inst->offset |= gather_component << 16; + } if (devinfo->gen == 6) emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], dst); @@ -338,7 +222,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, if (op == ir_txs && is_cube_array) { fs_reg depth = offset(dst, bld, 2); fs_reg fixed_depth = vgrf(glsl_type::int_type); - bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6)); + bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, brw_imm_d(6)); fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written); int components = inst->regs_written / (inst->exec_size / 8); @@ -352,7 +236,12 @@ fs_visitor::emit_texture(ir_texture_opcode op, bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0); } - swizzle_result(op, dest_type->vector_elements, dst, sampler); + if (op == ir_query_levels) { + /* # levels is in .w */ + dst = offset(dst, bld, 3); + } + + this->result = dst; } /** @@ -369,7 +258,7 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) for (int i = 0; i < 4; i++) { fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F); /* Convert from UNORM to UINT */ - bld.MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1))); + bld.MUL(dst_f, dst_f, brw_imm_f((1 << width) - 1)); bld.MOV(dst, dst_f); if (wa & WA_SIGN) { @@ -377,83 +266,14 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) * shifting the sign bit into place, then shifting back * preserving sign. */ - bld.SHL(dst, dst, fs_reg(32 - width)); - bld.ASR(dst, dst, fs_reg(32 - width)); + bld.SHL(dst, dst, brw_imm_d(32 - width)); + bld.ASR(dst, dst, brw_imm_d(32 - width)); } dst = offset(dst, bld, 1); } } -/** - * Set up the gather channel based on the swizzle, for gather4. - */ -uint32_t -fs_visitor::gather_channel(int orig_chan, uint32_t surface, uint32_t sampler) -{ - int swiz = GET_SWZ(key_tex->swizzles[sampler], orig_chan); - switch (swiz) { - case SWIZZLE_X: return 0; - case SWIZZLE_Y: - /* gather4 sampler is broken for green channel on RG32F -- - * we must ask for blue instead. - */ - if (key_tex->gather_channel_quirk_mask & (1 << surface)) - return 2; - return 1; - case SWIZZLE_Z: return 2; - case SWIZZLE_W: return 3; - default: - unreachable("Not reached"); /* zero, one swizzles handled already */ - } -} - -/** - * Swizzle the result of a texture result. This is necessary for - * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. - */ -void -fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components, - fs_reg orig_val, uint32_t sampler) -{ - if (op == ir_query_levels) { - /* # levels is in .w */ - this->result = offset(orig_val, bld, 3); - return; - } - - this->result = orig_val; - - /* txs,lod don't actually sample the texture, so swizzling the result - * makes no sense. - */ - if (op == ir_txs || op == ir_lod || op == ir_tg4) - return; - - if (dest_components == 1) { - /* Ignore DEPTH_TEXTURE_MODE swizzling. */ - } else if (key_tex->swizzles[sampler] != SWIZZLE_NOOP) { - fs_reg swizzled_result = vgrf(glsl_type::vec4_type); - swizzled_result.type = orig_val.type; - - for (int i = 0; i < 4; i++) { - int swiz = GET_SWZ(key_tex->swizzles[sampler], i); - fs_reg l = swizzled_result; - l = offset(l, bld, i); - - if (swiz == SWIZZLE_ZERO) { - bld.MOV(l, fs_reg(0.0f)); - } else if (swiz == SWIZZLE_ONE) { - bld.MOV(l, fs_reg(1.0f)); - } else { - bld.MOV(l, offset(orig_val, bld, - GET_SWZ(key_tex->swizzles[sampler], i))); - } - } - this->result = swizzled_result; - } -} - /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ void fs_visitor::emit_dummy_fs() @@ -464,7 +284,7 @@ fs_visitor::emit_dummy_fs() const float color[4] = { 1.0, 0.0, 1.0, 0.0 }; for (int i = 0; i < 4; i++) { bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F), - fs_reg(color[i])); + brw_imm_f(color[i])); } fs_inst *write; @@ -683,7 +503,7 @@ fs_visitor::emit_alpha_test() fs_reg color = offset(outputs[0], bld, 3); /* f0.1 &= func(color, ref) */ - cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref), + cmp = abld.CMP(bld.null_reg_f(), color, brw_imm_f(key->alpha_test_ref), cond_for_alpha_func(key->alpha_test_func)); } cmp->predicate = BRW_PREDICATE_NORMAL; @@ -716,7 +536,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, const fs_reg sources[] = { color0, color1, src0_alpha, src_depth, dst_depth, src_stencil, - sample_mask, fs_reg(components) + sample_mask, brw_imm_ud(components) }; assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS); fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(), @@ -950,12 +770,12 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) fs_reg offset; if (gs_vertex_count.file == IMM) { - per_slot_offsets = fs_reg(output_vertex_size_owords * - gs_vertex_count.ud); + per_slot_offsets = brw_imm_ud(output_vertex_size_owords * + gs_vertex_count.ud); } else { per_slot_offsets = vgrf(glsl_type::int_type); bld.MUL(per_slot_offsets, gs_vertex_count, - fs_reg(output_vertex_size_owords)); + brw_imm_ud(output_vertex_size_owords)); } } @@ -978,7 +798,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) } fs_reg zero(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - bld.MOV(zero, fs_reg(0u)); + bld.MOV(zero, brw_imm_ud(0u)); sources[length++] = zero; if (vue_map->slots_valid & VARYING_BIT_LAYER) @@ -1038,7 +858,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) for (unsigned i = 0; i < output_components[varying]; i++) sources[length++] = offset(this->outputs[varying], bld, i); for (unsigned i = output_components[varying]; i < 4; i++) - sources[length++] = fs_reg(0); + sources[length++] = brw_imm_d(0); } break; } @@ -1115,11 +935,11 @@ fs_visitor::emit_barrier() const fs_builder pbld = bld.exec_all().group(8, 0); /* Clear the message payload */ - pbld.MOV(payload, fs_reg(0u)); + pbld.MOV(payload, brw_imm_ud(0u)); /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */ fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)); - pbld.AND(component(payload, 2), r0_2, fs_reg(0x0f000000u)); + pbld.AND(component(payload, 2), r0_2, brw_imm_ud(0x0f000000u)); /* Emit a gateway "barrier" message using the payload we set up, followed * by a wait instruction. diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index ed0890f430f..149b43ba055 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -75,7 +75,9 @@ brw_codegen_gs_prog(struct brw_context *brw, * every uniform is a float which gets padded to the size of a vec4. */ struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - int param_count = gp->program.Base.nir->num_uniforms * 4; + int param_count = gp->program.Base.nir->num_uniforms; + if (!compiler->scalar_stage[MESA_SHADER_GEOMETRY]) + param_count *= 4; prog_data.base.base.param = rzalloc_array(NULL, const gl_constant_value *, param_count); @@ -87,7 +89,8 @@ brw_codegen_gs_prog(struct brw_context *brw, prog_data.base.base.nr_image_params = gs->NumImages; brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base, - &prog_data.base.base, compiler->scalar_gs); + &prog_data.base.base, + compiler->scalar_stage[MESA_SHADER_GEOMETRY]); GLbitfield64 outputs_written = gp->program.Base.OutputsWritten; diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h index 4ed95c473cd..cd9f6ef591d 100644 --- a/src/mesa/drivers/dri/i965/brw_inst.h +++ b/src/mesa/drivers/dri/i965/brw_inst.h @@ -694,7 +694,7 @@ brw_inst_bits(const brw_inst *inst, unsigned high, unsigned low) high %= 64; low %= 64; - const uint64_t mask = (1ull << (high - low + 1)) - 1; + const uint64_t mask = (~0ull >> (64 - (high - low + 1))); return (inst->data[word] >> low) & mask; } @@ -713,7 +713,7 @@ brw_inst_set_bits(brw_inst *inst, unsigned high, unsigned low, uint64_t value) high %= 64; low %= 64; - const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low; + const uint64_t mask = (~0ull >> (64 - (high - low + 1))) << low; /* Make sure the supplied value actually fits in the given bitfield. */ assert((value & (mask >> low)) == value); diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 7e977e9e727..0410053ce27 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -36,11 +36,6 @@ public: void init(); fs_reg(); - explicit fs_reg(float f); - explicit fs_reg(int32_t i); - explicit fs_reg(uint32_t u); - explicit fs_reg(uint8_t vf[4]); - explicit fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3); fs_reg(struct brw_reg reg); fs_reg(enum brw_reg_file file, int nr); fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type); diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 110e64b979e..e2e66044d3a 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -41,11 +41,6 @@ public: src_reg(enum brw_reg_file file, int nr, const glsl_type *type); src_reg(); - src_reg(float f); - src_reg(uint32_t u); - src_reg(int32_t i); - src_reg(uint8_t vf[4]); - src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3); src_reg(struct brw_reg reg); bool equals(const src_reg &r) const; diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index 29911732761..14421d421b6 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -66,12 +66,14 @@ brw_lower_packing_builtins(struct brw_context *brw, gl_shader_stage shader_type, exec_list *ir) { + const struct brw_compiler *compiler = brw->intelScreen->compiler; + int ops = LOWER_PACK_SNORM_2x16 | LOWER_UNPACK_SNORM_2x16 | LOWER_PACK_UNORM_2x16 | LOWER_UNPACK_UNORM_2x16; - if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) { + if (compiler->scalar_stage[shader_type]) { ops |= LOWER_UNPACK_UNORM_4x8 | LOWER_UNPACK_SNORM_4x8 | LOWER_PACK_UNORM_4x8 @@ -84,7 +86,7 @@ brw_lower_packing_builtins(struct brw_context *brw, * lowering is needed. For SOA code, the Half2x16 ops must be * scalarized. */ - if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) { + if (compiler->scalar_stage[shader_type]) { ops |= LOWER_PACK_HALF_2x16_TO_SPLIT | LOWER_UNPACK_HALF_2x16_TO_SPLIT; } @@ -103,6 +105,7 @@ process_glsl_ir(gl_shader_stage stage, struct gl_shader *shader) { struct gl_context *ctx = &brw->ctx; + const struct brw_compiler *compiler = brw->intelScreen->compiler; const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[shader->Stage]; @@ -161,7 +164,7 @@ process_glsl_ir(gl_shader_stage stage, do { progress = false; - if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) { + if (compiler->scalar_stage[shader->Stage]) { brw_do_channel_expressions(shader->ir); brw_do_vector_splitting(shader->ir); } @@ -252,7 +255,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) brw_add_texrect_params(prog); prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, - is_scalar_shader_stage(compiler, stage)); + compiler->scalar_stage[stage]); _mesa_reference_program(ctx, &prog, NULL); } diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 12e7c32e424..1f8bfdfa492 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances) } static void -get_fast_clear_rect(struct gl_framebuffer *fb, +get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb, struct intel_renderbuffer *irb, struct rect *rect) { unsigned int x_align, y_align; @@ -228,7 +228,14 @@ get_fast_clear_rect(struct gl_framebuffer *fb, */ intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align); x_align *= 16; - y_align *= 32; + + /* SKL+ line alignment requirement for Y-tiled are half those of the prior + * generations. + */ + if (brw->gen >= 9) + y_align *= 16; + else + y_align *= 32; /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p327): @@ -265,8 +272,10 @@ get_fast_clear_rect(struct gl_framebuffer *fb, * terms of (width,height) of the RT. * * MSAA Width of Clear Rect Height of Clear Rect + * 2X Ceil(1/8*width) Ceil(1/2*height) * 4X Ceil(1/8*width) Ceil(1/2*height) * 8X Ceil(1/2*width) Ceil(1/2*height) + * 16X width Ceil(1/2*height) * * The text "with upper left co-ordinate to coincide with actual * rectangle being cleared" is a little confusing--it seems to imply @@ -289,6 +298,9 @@ get_fast_clear_rect(struct gl_framebuffer *fb, case 8: x_scaledown = 2; break; + case 16: + x_scaledown = 1; + break; default: unreachable("Unexpected sample count for fast clear"); } @@ -347,8 +359,12 @@ is_color_fast_clear_compatible(struct brw_context *brw, } for (int i = 0; i < 4; i++) { - if (color->f[i] != 0.0f && color->f[i] != 1.0f && - _mesa_format_has_color_component(format, i)) { + if (!_mesa_format_has_color_component(format, i)) { + continue; + } + + if (brw->gen < 9 && + color->f[i] != 0.0f && color->f[i] != 1.0f) { return false; } } @@ -357,18 +373,55 @@ is_color_fast_clear_compatible(struct brw_context *brw, /** * Convert the given color to a bitfield suitable for ORing into DWORD 7 of - * SURFACE_STATE. + * SURFACE_STATE (DWORD 12-15 on SKL+). */ -static uint32_t -compute_fast_clear_color_bits(const union gl_color_union *color) +static void +set_fast_clear_color(struct brw_context *brw, + struct intel_mipmap_tree *mt, + const union gl_color_union *color) { - uint32_t bits = 0; - for (int i = 0; i < 4; i++) { - /* Testing for non-0 works for integer and float colors */ - if (color->f[i] != 0.0f) - bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); + union gl_color_union override_color = *color; + + /* The sampler doesn't look at the format of the surface when the fast + * clear color is used so we need to implement luminance, intensity and + * missing components manually. + */ + switch (_mesa_get_format_base_format(mt->format)) { + case GL_INTENSITY: + override_color.ui[3] = override_color.ui[0]; + /* flow through */ + case GL_LUMINANCE: + case GL_LUMINANCE_ALPHA: + override_color.ui[1] = override_color.ui[0]; + override_color.ui[2] = override_color.ui[0]; + break; + default: + for (int i = 0; i < 3; i++) { + if (!_mesa_format_has_color_component(mt->format, i)) + override_color.ui[i] = 0; + } + break; + } + + if (!_mesa_format_has_color_component(mt->format, 3)) { + if (_mesa_is_format_integer_color(mt->format)) + override_color.ui[3] = 1; + else + override_color.f[3] = 1.0f; + } + + if (brw->gen >= 9) { + mt->gen9_fast_clear_color = override_color; + } else { + mt->fast_clear_color_value = 0; + for (int i = 0; i < 4; i++) { + /* Testing for non-0 works for integer and float colors */ + if (override_color.f[i] != 0.0f) { + mt->fast_clear_color_value |= + 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); + } + } } - return bits; } static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 }; @@ -408,6 +461,55 @@ use_rectlist(struct brw_context *brw, bool enable) brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM; } +/** + * Individually fast clear each color buffer attachment. On previous gens this + * isn't required. The motivation for this comes from one line (which seems to + * be specific to SKL+). The list item is in section titled _MCS Buffer for + * Render Target(s)_ + * + * "Since only one RT is bound with a clear pass, only one RT can be cleared + * at a time. To clear multiple RTs, multiple clear passes are required." + * + * The code follows the same idea as the resolve code which creates a fake FBO + * to avoid interfering with too much of the GL state. + */ +static void +fast_clear_attachments(struct brw_context *brw, + struct gl_framebuffer *fb, + uint32_t fast_clear_buffers, + struct rect fast_clear_rect) +{ + assert(brw->gen >= 9); + struct gl_context *ctx = &brw->ctx; + + brw_bind_rep_write_shader(brw, (float *) fast_clear_color); + + /* SKL+ also has a resolve mode for compressed render targets and thus more + * bits to let us select the type of resolve. For fast clear resolves, it + * turns out we can use the same value as pre-SKL though. + */ + set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE); + + while (fast_clear_buffers) { + int index = ffs(fast_clear_buffers) - 1; + + fast_clear_buffers &= ~(1 << index); + + _mesa_meta_drawbuffers_from_bitfield(1 << index); + + brw_draw_rectlist(ctx, &fast_clear_rect, MAX2(1, fb->MaxNumLayers)); + + /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll + * resolve them eventually. + */ + struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[0]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; + } + + set_fast_clear_op(brw, 0); +} + bool brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, GLbitfield buffers, bool partial_clear) @@ -447,13 +549,15 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, if (brw->gen < 7) clear_type = REP_CLEAR; - if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS) + /* Certain formats have unresolved issues with sampling from the MCS + * buffer on Gen9. This disables fast clears altogether for MSRTs until + * we can figure out what's going on. + */ + if (brw->gen >= 9 && irb->mt->num_samples > 1) clear_type = REP_CLEAR; - if (brw->gen >= 9 && clear_type == FAST_CLEAR) { - perf_debug("fast MCS clears are disabled on gen9"); + if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS) clear_type = REP_CLEAR; - } /* We can't do scissored fast clears because of the restrictions on the * fast clear rectangle size. @@ -503,8 +607,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, switch (clear_type) { case FAST_CLEAR: - irb->mt->fast_clear_color_value = - compute_fast_clear_color_bits(&ctx->Color.ClearColor); + set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor); irb->need_downsample = true; /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the @@ -520,7 +623,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; irb->need_downsample = true; fast_clear_buffers |= 1 << index; - get_fast_clear_rect(fb, irb, &fast_clear_rect); + get_fast_clear_rect(brw, fb, irb, &fast_clear_rect); break; case REP_CLEAR: @@ -584,12 +687,27 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, use_rectlist(brw, true); layers = MAX2(1, fb->MaxNumLayers); - if (fast_clear_buffers) { + + if (brw->gen >= 9 && fast_clear_buffers) { + fast_clear_attachments(brw, fb, fast_clear_buffers, fast_clear_rect); + } else if (fast_clear_buffers) { _mesa_meta_drawbuffers_from_bitfield(fast_clear_buffers); brw_bind_rep_write_shader(brw, (float *) fast_clear_color); set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE); brw_draw_rectlist(ctx, &fast_clear_rect, layers); set_fast_clear_op(brw, 0); + + /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll + * resolve them eventually. + */ + for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) { + struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + int index = fb->_ColorDrawBufferIndexes[buf]; + + if ((1 << index) & fast_clear_buffers) + irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; + } } if (rep_clear_buffers) { @@ -598,18 +716,6 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, brw_draw_rectlist(ctx, &clear_rect, layers); } - /* Now set the mts we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll - * resolve them eventually. - */ - for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) { - struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf]; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - int index = fb->_ColorDrawBufferIndexes[buf]; - - if ((1 << index) & fast_clear_buffers) - irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; - } - bail_to_meta: /* Dirty _NEW_BUFFERS so we reemit SURFACE_STATE which sets the fast clear * color before resolve and sets irb->mt->fast_clear_state to UNRESOLVED if @@ -655,8 +761,9 @@ get_resolve_rect(struct brw_context *brw, * * The scaledown factors in the table that follows are related to the * alignment size returned by intel_get_non_msrt_mcs_alignment() by a - * multiplier. For IVB and HSW, we divide by two, for BDW we multiply - * by 8 and 16 and 8 and 8 for SKL. + * multiplier. For IVB and HSW, we divide by two, for BDW we multiply + * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling + * by a factor of 2. */ intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align); @@ -702,6 +809,10 @@ brw_meta_resolve_color(struct brw_context *brw, brw_bind_rep_write_shader(brw, (float *) fast_clear_color); + /* SKL+ also has a resolve mode for compressed render targets and thus more + * bits to let us select the type of resolve. For fast clear resolves, it + * turns out we can use the same value as pre-SKL though. + */ set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 58754adc887..91358d8f389 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -56,8 +56,9 @@ remap_vs_attrs(nir_block *block, void *closure) } static void -brw_nir_lower_inputs(const struct brw_device_info *devinfo, - nir_shader *nir, bool is_scalar) +brw_nir_lower_inputs(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar) { switch (nir->stage) { case MESA_SHADER_VERTEX: @@ -170,131 +171,159 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar) } } -static void +static bool +should_clone_nir() +{ + static int should_clone = -1; + if (should_clone < 1) + should_clone = brw_env_var_as_boolean("NIR_TEST_CLONE", false); + + return should_clone; +} + +#define _OPT(do_pass) (({ \ + bool this_progress = true; \ + do_pass \ + nir_validate_shader(nir); \ + if (should_clone_nir()) { \ + nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ + ralloc_free(nir); \ + nir = clone; \ + } \ + this_progress; \ +})) + +#define OPT(pass, ...) _OPT( \ + nir_metadata_set_validation_flag(nir); \ + this_progress = pass(nir ,##__VA_ARGS__); \ + if (this_progress) { \ + progress = true; \ + nir_metadata_check_validation_flag(nir); \ + } \ +) + +#define OPT_V(pass, ...) _OPT( \ + pass(nir, ##__VA_ARGS__); \ +) + +static nir_shader * nir_optimize(nir_shader *nir, bool is_scalar) { bool progress; do { progress = false; - nir_lower_vars_to_ssa(nir); - nir_validate_shader(nir); + OPT_V(nir_lower_vars_to_ssa); if (is_scalar) { - nir_lower_alu_to_scalar(nir); - nir_validate_shader(nir); + OPT_V(nir_lower_alu_to_scalar); } - progress |= nir_copy_prop(nir); - nir_validate_shader(nir); + OPT(nir_copy_prop); if (is_scalar) { - nir_lower_phis_to_scalar(nir); - nir_validate_shader(nir); + OPT_V(nir_lower_phis_to_scalar); } - progress |= nir_copy_prop(nir); - nir_validate_shader(nir); - progress |= nir_opt_dce(nir); - nir_validate_shader(nir); - progress |= nir_opt_cse(nir); - nir_validate_shader(nir); - progress |= nir_opt_peephole_select(nir); - nir_validate_shader(nir); - progress |= nir_opt_algebraic(nir); - nir_validate_shader(nir); - progress |= nir_opt_constant_folding(nir); - nir_validate_shader(nir); - progress |= nir_opt_dead_cf(nir); - nir_validate_shader(nir); - progress |= nir_opt_remove_phis(nir); - nir_validate_shader(nir); - progress |= nir_opt_undef(nir); - nir_validate_shader(nir); + OPT(nir_copy_prop); + OPT(nir_opt_dce); + OPT(nir_opt_cse); + OPT(nir_opt_peephole_select); + OPT(nir_opt_algebraic); + OPT(nir_opt_constant_folding); + OPT(nir_opt_dead_cf); + OPT(nir_opt_remove_phis); + OPT(nir_opt_undef); } while (progress); + + return nir; } +/* Does some simple lowering and runs the standard suite of optimizations + * + * This is intended to be called more-or-less directly after you get the + * shader out of GLSL or some other source. While it is geared towards i965, + * it is not at all generator-specific except for the is_scalar flag. Even + * there, it is safe to call with is_scalar = false for a shader that is + * intended for the FS backend as long as nir_optimize is called again with + * is_scalar = true to scalarize everything prior to code gen. + */ nir_shader * -brw_create_nir(struct brw_context *brw, - const struct gl_shader_program *shader_prog, - const struct gl_program *prog, - gl_shader_stage stage, - bool is_scalar) +brw_preprocess_nir(nir_shader *nir, bool is_scalar) { - struct gl_context *ctx = &brw->ctx; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; - const nir_shader_compiler_options *options = - ctx->Const.ShaderCompilerOptions[stage].NirOptions; - nir_shader *nir; + bool progress; /* Written by OPT and OPT_V */ + (void)progress; - /* First, lower the GLSL IR or Mesa IR to NIR */ - if (shader_prog) { - nir = glsl_to_nir(shader_prog, stage, options); - } else { - nir = prog_to_nir(prog, options); - nir_convert_to_ssa(nir); /* turn registers into SSA */ - } - nir_validate_shader(nir); + if (nir->stage == MESA_SHADER_GEOMETRY) + OPT(nir_lower_gs_intrinsics); - brw_preprocess_nir(nir, brw->intelScreen->devinfo, is_scalar); + static const nir_lower_tex_options tex_options = { + .lower_txp = ~0, + }; - if (shader_prog) { - nir_lower_samplers(nir, shader_prog); - nir_validate_shader(nir); + OPT(nir_lower_tex, &tex_options); + OPT(nir_normalize_cubemap_coords); - nir_lower_atomics(nir, shader_prog); - nir_validate_shader(nir); - } + OPT(nir_lower_global_vars_to_local); - brw_postprocess_nir(nir, brw->intelScreen->devinfo, is_scalar); + OPT(nir_split_var_copies); - static GLuint msg_id = 0; - _mesa_gl_debug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, - "%s NIR shader:\n", - _mesa_shader_stage_to_abbrev(nir->stage)); + nir = nir_optimize(nir, is_scalar); - return nir; -} + /* Lower a bunch of stuff */ + OPT_V(nir_lower_var_copies); -void -brw_preprocess_nir(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar) -{ - static const nir_lower_tex_options tex_options = { - .lower_txp = ~0, - }; + /* Get rid of split copies */ + nir = nir_optimize(nir, is_scalar); - if (nir->stage == MESA_SHADER_GEOMETRY) { - nir_lower_gs_intrinsics(nir); - nir_validate_shader(nir); - } + OPT(nir_remove_dead_variables); - nir_lower_global_vars_to_local(nir); - nir_validate_shader(nir); + return nir; +} - nir_lower_tex(nir, &tex_options); - nir_validate_shader(nir); +/* Lowers inputs, outputs, uniforms, and samplers for i965 + * + * This function does all of the standard lowering prior to post-processing. + * The lowering done is highly gen, stage, and backend-specific. The + * shader_prog parameter is optional and is used only for lowering sampler + * derefs and atomics for GLSL shaders. + */ +nir_shader * +brw_lower_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + bool is_scalar) +{ + bool progress; /* Written by OPT and OPT_V */ + (void)progress; - nir_normalize_cubemap_coords(nir); - nir_validate_shader(nir); + OPT_V(brw_nir_lower_inputs, devinfo, is_scalar); + OPT_V(brw_nir_lower_outputs, is_scalar); + nir_assign_var_locations(&nir->uniforms, + &nir->num_uniforms, + is_scalar ? type_size_scalar : type_size_vec4); + OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4); - nir_split_var_copies(nir); - nir_validate_shader(nir); + if (shader_prog) { + OPT_V(nir_lower_samplers, shader_prog); + } - nir_optimize(nir, is_scalar); + OPT(nir_lower_system_values); - /* Lower a bunch of stuff */ - nir_lower_var_copies(nir); - nir_validate_shader(nir); + if (shader_prog) { + OPT_V(nir_lower_atomics, shader_prog); + } - /* Get rid of split copies */ - nir_optimize(nir, is_scalar); + return nir_optimize(nir, is_scalar); } -void +/* Prepare the given shader for codegen + * + * This function is intended to be called right before going into the actual + * backend and is highly backend-specific. Also, once this function has been + * called on a shader, it will no longer be in SSA form so most optimizations + * will not work. + */ +nir_shader * brw_postprocess_nir(nir_shader *nir, const struct brw_device_info *devinfo, bool is_scalar) @@ -302,40 +331,21 @@ brw_postprocess_nir(nir_shader *nir, bool debug_enabled = (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage)); - brw_nir_lower_inputs(devinfo, nir, is_scalar); - brw_nir_lower_outputs(nir, is_scalar); - nir_assign_var_locations(&nir->uniforms, - &nir->num_uniforms, - is_scalar ? type_size_scalar : type_size_vec4); - nir_lower_io(nir, -1, is_scalar ? type_size_scalar : type_size_vec4); - nir_validate_shader(nir); - - nir_remove_dead_variables(nir); - nir_validate_shader(nir); - - nir_lower_system_values(nir); - nir_validate_shader(nir); - - nir_optimize(nir, is_scalar); + bool progress; /* Written by OPT and OPT_V */ + (void)progress; if (devinfo->gen >= 6) { /* Try and fuse multiply-adds */ - brw_nir_opt_peephole_ffma(nir); - nir_validate_shader(nir); + OPT(brw_nir_opt_peephole_ffma); } - nir_opt_algebraic_late(nir); - nir_validate_shader(nir); + OPT(nir_opt_algebraic_late); - nir_lower_locals_to_regs(nir); - nir_validate_shader(nir); + OPT(nir_lower_locals_to_regs); - nir_lower_to_source_mods(nir); - nir_validate_shader(nir); - nir_copy_prop(nir); - nir_validate_shader(nir); - nir_opt_dce(nir); - nir_validate_shader(nir); + OPT_V(nir_lower_to_source_mods); + OPT(nir_copy_prop); + OPT(nir_opt_dce); if (unlikely(debug_enabled)) { /* Re-index SSA defs so we print more sensible numbers. */ @@ -349,15 +359,11 @@ brw_postprocess_nir(nir_shader *nir, nir_print_shader(nir, stderr); } - nir_convert_from_ssa(nir, true); - nir_validate_shader(nir); + OPT_V(nir_convert_from_ssa, true); if (!is_scalar) { - nir_move_vec_src_uses_to_dest(nir); - nir_validate_shader(nir); - - nir_lower_vec_to_movs(nir); - nir_validate_shader(nir); + OPT_V(nir_move_vec_src_uses_to_dest); + OPT(nir_lower_vec_to_movs); } /* This is the last pass we run before we start emitting stuff. It @@ -375,13 +381,83 @@ brw_postprocess_nir(nir_shader *nir, _mesa_shader_stage_to_string(nir->stage)); nir_print_shader(nir, stderr); } + + return nir; +} + +nir_shader * +brw_create_nir(struct brw_context *brw, + const struct gl_shader_program *shader_prog, + const struct gl_program *prog, + gl_shader_stage stage, + bool is_scalar) +{ + struct gl_context *ctx = &brw->ctx; + const struct brw_device_info *devinfo = brw->intelScreen->devinfo; + const nir_shader_compiler_options *options = + ctx->Const.ShaderCompilerOptions[stage].NirOptions; + bool progress; + nir_shader *nir; + + /* First, lower the GLSL IR or Mesa IR to NIR */ + if (shader_prog) { + nir = glsl_to_nir(shader_prog, stage, options); + } else { + nir = prog_to_nir(prog, options); + OPT_V(nir_convert_to_ssa); /* turn registers into SSA */ + } + nir_validate_shader(nir); + + (void)progress; + + nir = brw_preprocess_nir(nir, is_scalar); + nir = brw_lower_nir(nir, devinfo, shader_prog, is_scalar); + + return nir; +} + +nir_shader * +brw_nir_apply_sampler_key(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct brw_sampler_prog_key_data *key_tex, + bool is_scalar) +{ + nir_lower_tex_options tex_options = { 0 }; + + /* Iron Lake and prior require lowering of all rectangle textures */ + if (devinfo->gen < 6) + tex_options.lower_rect = true; + + /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */ + if (devinfo->gen < 8) { + tex_options.saturate_s = key_tex->gl_clamp_mask[0]; + tex_options.saturate_t = key_tex->gl_clamp_mask[1]; + tex_options.saturate_r = key_tex->gl_clamp_mask[2]; + } + + /* Prior to Haswell, we have to fake texture swizzle */ + for (unsigned s = 0; s < MAX_SAMPLERS; s++) { + if (key_tex->swizzles[s] == SWIZZLE_NOOP) + continue; + + tex_options.swizzle_result |= (1 << s); + for (unsigned c = 0; c < 4; c++) + tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); + } + + if (nir_lower_tex(nir, &tex_options)) { + nir_validate_shader(nir); + nir = nir_optimize(nir, is_scalar); + } + + return nir; } enum brw_reg_type brw_type_for_nir_type(nir_alu_type type) { switch (type) { - case nir_type_unsigned: + case nir_type_uint: return BRW_REGISTER_TYPE_UD; case nir_type_bool: case nir_type_int: @@ -408,7 +484,7 @@ brw_glsl_base_type_for_nir_type(nir_alu_type type) case nir_type_int: return GLSL_TYPE_INT; - case nir_type_unsigned: + case nir_type_uint: return GLSL_TYPE_UINT; default: diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index d259777e1c9..0a8a5a280b1 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -81,19 +81,25 @@ nir_shader *brw_create_nir(struct brw_context *brw, gl_shader_stage stage, bool is_scalar); +nir_shader *brw_preprocess_nir(nir_shader *nir, bool is_scalar); +nir_shader *brw_lower_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + bool is_scalar); +nir_shader *brw_postprocess_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar); + + +nir_shader *brw_nir_apply_sampler_key(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct brw_sampler_prog_key_data *key, + bool is_scalar); + enum brw_reg_type brw_type_for_nir_type(nir_alu_type type); enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type); -void -brw_preprocess_nir(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar); -void -brw_postprocess_nir(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar); - void brw_nir_setup_glsl_uniforms(nir_shader *shader, struct gl_shader_program *shader_prog, const struct gl_program *prog, diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 6433dec9041..3da8e9e8a97 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -126,6 +126,7 @@ brwProgramStringNotify(struct gl_context *ctx, struct gl_program *prog) { struct brw_context *brw = brw_context(ctx); + const struct brw_compiler *compiler = brw->intelScreen->compiler; switch (target) { case GL_FRAGMENT_PROGRAM_ARB: { @@ -165,7 +166,7 @@ brwProgramStringNotify(struct gl_context *ctx, brw_add_texrect_params(prog); prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX, - brw->intelScreen->compiler->scalar_vs); + compiler->scalar_stage[MESA_SHADER_VERTEX]); brw_vs_precompile(ctx, NULL, prog); break; @@ -343,6 +344,8 @@ brw_report_shader_time(struct brw_context *brw) switch (type) { case ST_VS: + case ST_TCS: + case ST_TES: case ST_GS: case ST_FS8: case ST_FS16: @@ -369,6 +372,8 @@ brw_report_shader_time(struct brw_context *brw) switch (type) { case ST_VS: + case ST_TCS: + case ST_TES: case ST_GS: case ST_FS8: case ST_FS16: @@ -406,6 +411,12 @@ brw_report_shader_time(struct brw_context *brw) case ST_VS: stage = "vs"; break; + case ST_TCS: + stage = "tcs"; + break; + case ST_TES: + stage = "tes"; + break; case ST_GS: stage = "gs"; break; @@ -429,6 +440,8 @@ brw_report_shader_time(struct brw_context *brw) fprintf(stderr, "\n"); print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total); + print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total); + print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total); print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total); print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total); print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total); diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 3da83b43b5d..fa912c96c36 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -43,7 +43,6 @@ #define BRW_REG_H #include <stdbool.h> -#include "main/imports.h" #include "main/compiler.h" #include "main/macros.h" #include "program/prog_instruction.h" @@ -619,57 +618,37 @@ static inline struct brw_reg brw_imm_v(unsigned v) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); - imm.vstride = BRW_VERTICAL_STRIDE_0; - imm.width = BRW_WIDTH_8; - imm.hstride = BRW_HORIZONTAL_STRIDE_1; imm.ud = v; return imm; } +/** Construct vector of eight unsigned half-byte values */ +static inline struct brw_reg +brw_imm_uv(unsigned uv) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UV); + imm.ud = uv; + return imm; +} + /** Construct vector of four 8-bit float values */ static inline struct brw_reg brw_imm_vf(unsigned v) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); - imm.vstride = BRW_VERTICAL_STRIDE_0; - imm.width = BRW_WIDTH_4; - imm.hstride = BRW_HORIZONTAL_STRIDE_1; imm.ud = v; return imm; } -/** - * Convert an integer into a "restricted" 8-bit float, used in vector - * immediates. The 8-bit floating point format has a sign bit, an - * excess-3 3-bit exponent, and a 4-bit mantissa. All integer values - * from -31 to 31 can be represented exactly. - */ -static inline uint8_t -int_to_float8(int x) -{ - if (x == 0) { - return 0; - } else if (x < 0) { - return 1 << 7 | int_to_float8(-x); - } else { - const unsigned exponent = _mesa_logbase2(x); - const unsigned mantissa = (x - (1 << exponent)) << (4 - exponent); - assert(exponent <= 4); - return (exponent + 3) << 4 | mantissa; - } -} - -/** - * Construct a floating-point packed vector immediate from its integer - * values. \sa int_to_float8() - */ static inline struct brw_reg -brw_imm_vf4(int v0, int v1, int v2, int v3) +brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) { - return brw_imm_vf((int_to_float8(v0) << 0) | - (int_to_float8(v1) << 8) | - (int_to_float8(v2) << 16) | - (int_to_float8(v3) << 24)); + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24)); + return imm; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 1f3ae7ab5e6..2f0e8b680ab 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -72,22 +72,6 @@ shader_perf_log_mesa(void *data, const char *fmt, ...) va_end(args); } -bool -is_scalar_shader_stage(const struct brw_compiler *compiler, int stage) -{ - switch (stage) { - case MESA_SHADER_FRAGMENT: - case MESA_SHADER_COMPUTE: - return true; - case MESA_SHADER_GEOMETRY: - return compiler->scalar_gs; - case MESA_SHADER_VERTEX: - return compiler->scalar_vs; - default: - return false; - } -} - struct brw_compiler * brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) { @@ -100,11 +84,12 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) brw_fs_alloc_reg_sets(compiler); brw_vec4_alloc_reg_set(compiler); - if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) - compiler->scalar_vs = true; - - if (devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false)) - compiler->scalar_gs = true; + compiler->scalar_stage[MESA_SHADER_VERTEX] = + devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); + compiler->scalar_stage[MESA_SHADER_GEOMETRY] = + devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false); + compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; + compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; nir_shader_compiler_options *nir_options = rzalloc(compiler, nir_shader_compiler_options); @@ -139,7 +124,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; compiler->glsl_compiler_options[i].LowerClipDistance = true; - bool is_scalar = is_scalar_shader_stage(compiler, i); + bool is_scalar = compiler->scalar_stage[i]; compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; @@ -154,6 +139,9 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; } + if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) + compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; + return compiler; } @@ -557,6 +545,8 @@ brw_instruction_name(enum opcode op) return "barrier"; case SHADER_OPCODE_MULH: return "mulh"; + case SHADER_OPCODE_MOV_INDIRECT: + return "mov_indirect"; } unreachable("not reached"); @@ -574,16 +564,12 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) switch (type) { case BRW_REGISTER_TYPE_UD: case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_Q: /* Nothing to do. */ return false; - case BRW_REGISTER_TYPE_UW: - sat_imm.ud = CLAMP(imm.ud, 0, USHRT_MAX); - break; - case BRW_REGISTER_TYPE_W: - sat_imm.d = CLAMP(imm.d, SHRT_MIN, SHRT_MAX); - break; case BRW_REGISTER_TYPE_F: sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f); break; diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index c4a37187ce2..9555406c777 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -252,8 +252,6 @@ int type_size_scalar(const struct glsl_type *type); int type_size_vec4(const struct glsl_type *type); int type_size_vec4_times_4(const struct glsl_type *type); -bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage); - #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 0d49ab7b431..69eed4bc629 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -32,8 +32,8 @@ /* This macro allows us to write the table almost as it appears in the PRM, * while restructuring it to turn it into the C code we want. */ -#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \ - [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, #sf}, +#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, sf) \ + [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, #sf}, #define Y 0 #define x 999 @@ -61,6 +61,7 @@ * VB - Input Vertex Buffer * SO - Steamed Output Vertex Buffers (transform feedback) * color - Color Processing + * ccs_e - Lossless Compression Support (gen9+ only) * sf - Surface Format * * See page 88 of the Sandybridge PRM VOL4_Part1 PDF. @@ -71,257 +72,258 @@ * - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch). * - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping. * - VOL4_Part1 section 3.9.11 Render Target Write. + * - Render Target Surface Types [SKL+] */ const struct brw_surface_format_info surface_formats[] = { -/* smpl filt shad CK RT AB VB SO color */ - SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32B32A32_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_UINT) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R64G64_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, R32G32B32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_USCALED) - SF( x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU) - SF( Y, 50, x, x, x, x, Y, Y, x, R32G32B32_FLOAT) - SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_SINT) - SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_UINT) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_USCALED) - SF( x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED) - SF( Y, Y, x, x, Y, 45, Y, x, 60, R16G16B16A16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16B16A16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16B16A16_FLOAT) - SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32_FLOAT) - SF( Y, 70, x, x, Y, Y, Y, Y, x, R32G32_FLOAT_LD) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_UINT) - SF( Y, 50, Y, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT) - SF( Y, 50, x, x, x, x, x, x, x, L32A32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R32G32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, A32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, L32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, I32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32_USCALED) - SF( x, x, x, x, x, x, x, x, x, R32G32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, R64_PASSTHRU) - SF( Y, Y, x, Y, Y, Y, Y, x, 60, B8G8R8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B8G8R8A8_UNORM_SRGB) -/* smpl filt shad CK RT AB VB SO color */ - SF( Y, Y, x, x, Y, Y, Y, x, 60, R10G10B10A2_UNORM) - SF( Y, Y, x, x, x, x, x, x, 60, R10G10B10A2_UNORM_SRGB) - SF( Y, x, x, x, Y, x, Y, x, x, R10G10B10A2_UINT) - SF( Y, Y, x, x, x, Y, Y, x, x, R10G10B10_SNORM_A2_UNORM) - SF( Y, Y, x, x, Y, Y, Y, x, 60, R8G8B8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, R8G8B8A8_UNORM_SRGB) - SF( Y, Y, x, x, Y, 60, Y, x, x, R8G8B8A8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_UINT) - SF( Y, Y, x, x, Y, 45, Y, x, x, R16G16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16_FLOAT) - SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, R11G11B10_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32_UINT) - SF( Y, 50, Y, x, Y, Y, Y, Y, x, R32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT) - SF( Y, Y, x, x, x, x, x, x, x, L16A16_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, I24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, L24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, A24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, I32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, L32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, A32_FLOAT) - SF( Y, Y, x, Y, x, x, x, x, 60, B8G8R8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) - SF( Y, Y, x, x, x, x, x, x, x, B10G10R10X2_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, L16A16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32_SNORM) -/* smpl filt shad CK RT AB VB SO color */ - SF( x, x, x, x, x, x, Y, x, x, R10G10B10X2_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32_USCALED) - SF( Y, Y, x, Y, Y, Y, x, x, x, B5G6R5_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B5G6R5_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, B5G5R5A1_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B5G5R5A1_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, B4G4R4A4_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B4G4R4A4_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, R8G8_UNORM) - SF( Y, Y, x, Y, Y, 60, Y, x, x, R8G8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8_UINT) - SF( Y, Y, Y, x, Y, 45, Y, x, 70, R16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, R16_FLOAT) - SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1) - SF( Y, Y, Y, x, x, x, x, x, x, I16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, L16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, A16_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, L8A8_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, I16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, L16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, A16_FLOAT) - SF(45, 45, x, x, x, x, x, x, x, L8A8_UNORM_SRGB) - SF( Y, Y, x, Y, x, x, x, x, x, R5G5_SNORM_B6_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM_SRGB) - SF( x, x, x, x, x, x, Y, x, x, R8G8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8_USCALED) -/* smpl filt shad CK RT AB VB SO color */ - SF( x, x, x, x, x, x, Y, x, x, R16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16_USCALED) - SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) - SF( x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM) - SF( x, x, x, x, x, x, x, x, x, L8A8_UINT) - SF( x, x, x, x, x, x, x, x, x, L8A8_SINT) - SF( Y, Y, x, 45, Y, Y, Y, x, x, R8_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R8_UINT) - SF( Y, Y, x, Y, Y, Y, x, x, x, A8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, I8_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, L8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, P4A4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, A4P4_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8_USCALED) - SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, L8_UNORM_SRGB) - SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, Y8_SNORM) - SF( x, x, x, x, x, x, x, x, x, L8_UINT) - SF( x, x, x, x, x, x, x, x, x, L8_SINT) - SF( x, x, x, x, x, x, x, x, x, I8_UINT) - SF( x, x, x, x, x, x, x, x, x, I8_SINT) - SF(45, 45, x, x, x, x, x, x, x, DXT1_RGB_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, R1_UINT) - SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_NORMAL) - SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_SWAPUVY) - SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE1) - SF( Y, Y, x, Y, x, x, x, x, x, BC1_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BC2_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BC3_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC5_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC1_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BC2_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BC3_UNORM_SRGB) - SF( Y, x, x, x, x, x, x, x, x, MONO8) - SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPUV) - SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPY) - SF( Y, Y, x, x, x, x, x, x, x, DXT1_RGB) -/* smpl filt shad CK RT AB VB SO color */ - SF( Y, Y, x, x, x, x, x, x, x, FXT1) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R64G64B64A64_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R64G64B64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, BC4_SNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC5_SNORM) - SF(50, 50, x, x, x, x, 60, x, x, R16G16B16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_USCALED) - SF(70, 70, x, x, x, x, x, x, x, BC6H_SF16) - SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM) - SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM_SRGB) - SF(70, 70, x, x, x, x, x, x, x, BC6H_UF16) - SF( x, x, x, x, x, x, x, x, x, PLANAR_420_8) - SF( x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) - SF( x, x, x, x, x, x, x, x, x, ETC1_RGB8) - SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8) - SF( x, x, x, x, x, x, x, x, x, EAC_R11) - SF( x, x, x, x, x, x, x, x, x, EAC_RG11) - SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) - SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) - SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8) - SF( x, x, x, x, x, x, x, x, x, R16G16B16_UINT) - SF( x, x, x, x, x, x, x, x, x, R16G16B16_SINT) - SF( x, x, x, x, x, x, x, x, x, R32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT) - SF( x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) - SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) - SF( x, x, x, x, x, x, x, x, x, R8G8B8_UINT) - SF( x, x, x, x, x, x, x, x, x, R8G8B8_SINT) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB) +/* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32B32A32_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_UINT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R64G64_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, R32G32B32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU) + SF( Y, 50, x, x, x, x, Y, Y, x, x, R32G32B32_FLOAT) + SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_SINT) + SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_UINT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED) + SF( Y, Y, x, x, Y, 45, Y, x, 60, 90, R16G16B16A16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16B16A16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16B16A16_FLOAT) + SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32_FLOAT) + SF( Y, 70, x, x, Y, Y, Y, Y, x, x, R32G32_FLOAT_LD) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_UINT) + SF( Y, 50, Y, x, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT) + SF( Y, 50, x, x, x, x, x, x, x, x, L32A32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, x, R16G16B16X16_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, 90, R16G16B16X16_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, A32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, L32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, I32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R32G32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, x, R64_PASSTHRU) + SF( Y, Y, x, Y, Y, Y, Y, x, 60, 90, B8G8R8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B8G8R8A8_UNORM_SRGB) +/* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( Y, Y, x, x, Y, Y, Y, x, 60, x, R10G10B10A2_UNORM) + SF( Y, Y, x, x, x, x, x, x, 60, x, R10G10B10A2_UNORM_SRGB) + SF( Y, x, x, x, Y, x, Y, x, x, x, R10G10B10A2_UINT) + SF( Y, Y, x, x, x, Y, Y, x, x, x, R10G10B10_SNORM_A2_UNORM) + SF( Y, Y, x, x, Y, Y, Y, x, 60, 90, R8G8B8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, R8G8B8A8_UNORM_SRGB) + SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R8G8B8A8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_UINT) + SF( Y, Y, x, x, Y, 45, Y, x, x, 90, R16G16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16_FLOAT) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, B10G10R10A2_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, B10G10R10A2_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, x, R11G11B10_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_UINT) + SF( Y, 50, Y, x, Y, Y, Y, Y, x, 90, R32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT) + SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, I24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, L24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, A24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, I32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, L32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, A32_FLOAT) + SF( Y, Y, x, Y, x, x, x, x, 60, 90, B8G8R8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) + SF( Y, Y, x, x, x, x, x, x, x, x, B10G10R10X2_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32_SNORM) +/* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( x, x, x, x, x, x, Y, x, x, x, R10G10B10X2_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32_USCALED) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G6R5_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G6R5_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G5R5A1_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G5R5A1_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, B4G4R4A4_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B4G4R4A4_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, x, R8G8_UNORM) + SF( Y, Y, x, Y, Y, 60, Y, x, x, x, R8G8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_UINT) + SF( Y, Y, Y, x, Y, 45, Y, x, 70, x, R16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, x, R16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, x, R16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, x, R16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, x, R16_FLOAT) + SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1) + SF( Y, Y, Y, x, x, x, x, x, x, x, I16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, L16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, A16_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, L8A8_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, I16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, x, L16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, x, A16_FLOAT) + SF(45, 45, x, x, x, x, x, x, x, x, L8A8_UNORM_SRGB) + SF( Y, Y, x, Y, x, x, x, x, x, x, R5G5_SNORM_B6_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM_SRGB) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8_USCALED) +/* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( x, x, x, x, x, x, Y, x, x, x, R16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16_USCALED) + SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) + SF( x, x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM) + SF( x, x, x, x, x, x, x, x, x, x, L8A8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, L8A8_SINT) + SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, x, R8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8_UINT) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, A8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, I8_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, L8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, P4A4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, A4P4_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8_USCALED) + SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, x, L8_UNORM_SRGB) + SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, x, Y8_SNORM) + SF( x, x, x, x, x, x, x, x, x, x, L8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, L8_SINT) + SF( x, x, x, x, x, x, x, x, x, x, I8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, I8_SINT) + SF(45, 45, x, x, x, x, x, x, x, x, DXT1_RGB_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, R1_UINT) + SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_NORMAL) + SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_SWAPUVY) + SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE1) + SF( Y, Y, x, Y, x, x, x, x, x, x, BC1_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, BC2_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, BC3_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC5_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC1_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, BC2_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, BC3_UNORM_SRGB) + SF( Y, x, x, x, x, x, x, x, x, x, MONO8) + SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPUV) + SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPY) + SF( Y, Y, x, x, x, x, x, x, x, x, DXT1_RGB) +/* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( Y, Y, x, x, x, x, x, x, x, x, FXT1) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64A64_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, x, BC4_SNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC5_SNORM) + SF(50, 50, x, x, x, x, 60, x, x, x, R16G16B16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_USCALED) + SF(70, 70, x, x, x, x, x, x, x, x, BC6H_SF16) + SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM) + SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM_SRGB) + SF(70, 70, x, x, x, x, x, x, x, x, BC6H_UF16) + SF( x, x, x, x, x, x, x, x, x, x, PLANAR_420_8) + SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) + SF( x, x, x, x, x, x, x, x, x, x, ETC1_RGB8) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8) + SF( x, x, x, x, x, x, x, x, x, x, EAC_R11) + SF( x, x, x, x, x, x, x, x, x, x, EAC_RG11) + SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) + SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8) + SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_UINT) + SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_SINT) + SF( x, x, x, x, x, x, x, x, x, x, R32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) + SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_SINT) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB) }; #undef x #undef Y @@ -771,6 +773,26 @@ brw_render_target_supported(struct brw_context *brw, return brw->format_supported_as_render_target[format]; } +/* + * True if the underlying hardware format can support lossless color + * compression. + */ +bool +brw_losslessly_compressible_format(struct brw_context *brw, + uint32_t brw_format) +{ + const struct brw_surface_format_info * const sinfo = + &surface_formats[brw_format]; + const int gen = brw->gen * 10; + + assert(brw->gen >= 9); + + if (gen >= sinfo->lossless_compression) + return true; + + return false; +} + GLuint translate_tex_format(struct brw_context *brw, mesa_format mesa_format, diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.h b/src/mesa/drivers/dri/i965/brw_surface_formats.h index 5c7b60e680b..a5cd49f5260 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.h +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.h @@ -34,6 +34,7 @@ struct brw_surface_format_info { int input_vb; int streamed_output_vb; int color_processing; + int lossless_compression; const char *name; }; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index a086b43e11a..ae3cf728443 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -71,51 +71,6 @@ src_reg::src_reg() init(); } -src_reg::src_reg(float f) -{ - init(); - - this->file = IMM; - this->type = BRW_REGISTER_TYPE_F; - this->f = f; -} - -src_reg::src_reg(uint32_t u) -{ - init(); - - this->file = IMM; - this->type = BRW_REGISTER_TYPE_UD; - this->ud = u; -} - -src_reg::src_reg(int32_t i) -{ - init(); - - this->file = IMM; - this->type = BRW_REGISTER_TYPE_D; - this->d = i; -} - -src_reg::src_reg(uint8_t vf[4]) -{ - init(); - - this->file = IMM; - this->type = BRW_REGISTER_TYPE_VF; - memcpy(&this->ud, vf, sizeof(unsigned)); -} - -src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3) -{ - init(); - - this->file = IMM; - this->type = BRW_REGISTER_TYPE_VF; - this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24); -} - src_reg::src_reg(struct brw_reg reg) : backend_reg(reg) { @@ -382,7 +337,9 @@ vec4_visitor::opt_vector_float() remaining_channels &= ~inst->dst.writemask; if (remaining_channels == 0) { - vec4_instruction *mov = MOV(inst->dst, imm); + unsigned vf; + memcpy(&vf, imm, sizeof(vf)); + vec4_instruction *mov = MOV(inst->dst, brw_imm_vf(vf)); mov->dst.type = BRW_REGISTER_TYPE_F; mov->dst.writemask = WRITEMASK_XYZW; inst->insert_after(block, mov); @@ -657,13 +614,13 @@ vec4_visitor::opt_algebraic() inst->opcode = BRW_OPCODE_MOV; switch (inst->src[0].type) { case BRW_REGISTER_TYPE_F: - inst->src[0] = src_reg(0.0f); + inst->src[0] = brw_imm_f(0.0f); break; case BRW_REGISTER_TYPE_D: - inst->src[0] = src_reg(0); + inst->src[0] = brw_imm_d(0); break; case BRW_REGISTER_TYPE_UD: - inst->src[0] = src_reg(0u); + inst->src[0] = brw_imm_ud(0u); break; default: unreachable("not reached"); @@ -1232,7 +1189,7 @@ vec4_visitor::eliminate_find_live_channel() case SHADER_OPCODE_FIND_LIVE_CHANNEL: if (depth == 0) { inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = src_reg(0); + inst->src[0] = brw_imm_d(0); inst->force_writemask_all = true; progress = true; } @@ -1701,7 +1658,7 @@ vec4_visitor::emit_shader_time_end() */ src_reg reset_end = shader_end_time; reset_end.swizzle = BRW_SWIZZLE_ZZZZ; - vec4_instruction *test = emit(AND(dst_null_d(), reset_end, src_reg(1u))); + vec4_instruction *test = emit(AND(dst_null_ud(), reset_end, brw_imm_ud(1u))); test->conditional_mod = BRW_CONDITIONAL_Z; emit(IF(BRW_PREDICATE_NORMAL)); @@ -1715,12 +1672,12 @@ vec4_visitor::emit_shader_time_end() * is 2 cycles. Remove that overhead, so I can forget about that when * trying to determine the time taken for single instructions. */ - emit(ADD(diff, src_reg(diff), src_reg(-2u))); + emit(ADD(diff, src_reg(diff), brw_imm_ud(-2u))); emit_shader_time_write(0, src_reg(diff)); - emit_shader_time_write(1, src_reg(1u)); + emit_shader_time_write(1, brw_imm_ud(1u)); emit(BRW_OPCODE_ELSE); - emit_shader_time_write(2, src_reg(1u)); + emit_shader_time_write(2, brw_imm_ud(1u)); emit(BRW_OPCODE_ENDIF); } @@ -1736,7 +1693,7 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) offset.type = BRW_REGISTER_TYPE_UD; int index = shader_time_index * 3 + shader_time_subindex; - emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE))); + emit(MOV(offset, brw_imm_d(index * SHADER_TIME_STRIDE))); time.type = BRW_REGISTER_TYPE_UD; emit(MOV(time, value)); @@ -1762,11 +1719,6 @@ vec4_visitor::convert_to_hw_regs() reg.negate = src.negate; break; - case IMM: - reg = brw_imm_reg(src.type); - reg.ud = src.ud; - break; - case UNIFORM: reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg + (src.nr + src.reg_offset) / 2, @@ -1783,6 +1735,7 @@ vec4_visitor::convert_to_hw_regs() case ARF: case FIXED_GRF: + case IMM: continue; case BAD_FILE: @@ -1978,13 +1931,19 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *prog_data, - const nir_shader *shader, + const nir_shader *src_shader, gl_clip_plane *clip_planes, bool use_legacy_snorm_formula, int shader_time_index, unsigned *final_assembly_size, char **error_str) { + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, + compiler->scalar_stage[MESA_SHADER_VERTEX]); + shader = brw_postprocess_nir(shader, compiler->devinfo, + compiler->scalar_stage[MESA_SHADER_VERTEX]); + const unsigned *assembly = NULL; unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read); @@ -2002,7 +1961,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in * vec4 mode, the hardware appears to wedge unless we read something. */ - if (compiler->scalar_vs) + if (compiler->scalar_stage[MESA_SHADER_VERTEX]) prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2); else prog_data->base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attributes, 1), 2); @@ -2021,7 +1980,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, else prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4); - if (compiler->scalar_vs) { + if (compiler->scalar_stage[MESA_SHADER_VERTEX]) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 52d68c5a33d..f94f7128a07 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -276,14 +276,9 @@ public: uint32_t surface, src_reg surface_reg, uint32_t sampler, src_reg sampler_reg); - uint32_t gather_channel(unsigned gather_component, - uint32_t surface, uint32_t sampler); src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, src_reg sampler); void emit_gen6_gather_wa(uint8_t wa, dst_reg dst); - void swizzle_result(ir_texture_opcode op, dst_reg dest, - src_reg orig_val, uint32_t sampler, - const glsl_type *dest_type); void emit_ndc_computation(); void emit_psiz_and_flags(dst_reg reg); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h index a76a4ce4639..be1427c7db7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h @@ -484,7 +484,7 @@ namespace brw { const dst_reg x_times_one_minus_a = vgrf(dst.type); MUL(y_times_a, y, a); - ADD(one_minus_a, negate(a), src_reg(1.0f)); + ADD(one_minus_a, negate(a), brw_imm_f(1.0f)); MUL(x_times_one_minus_a, x, src_reg(one_minus_a)); return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 1a09f76a20c..b13d36e2c7d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -30,6 +30,7 @@ #include "brw_vec4_gs_visitor.h" #include "gen6_gs_visitor.h" #include "brw_fs.h" +#include "brw_nir.h" namespace brw { @@ -153,7 +154,7 @@ vec4_gs_visitor::emit_prolog() */ this->current_annotation = "clear r0.2"; dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD)); - vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, 0u); + vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u)); inst->force_writemask_all = true; /* Create a virtual register to hold the vertex count */ @@ -161,7 +162,7 @@ vec4_gs_visitor::emit_prolog() /* Initialize the vertex_count register to 0 */ this->current_annotation = "initialize vertex_count"; - inst = emit(MOV(dst_reg(this->vertex_count), 0u)); + inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u))); inst->force_writemask_all = true; if (c->control_data_header_size_bits > 0) { @@ -176,7 +177,7 @@ vec4_gs_visitor::emit_prolog() */ if (c->control_data_header_size_bits <= 32) { this->current_annotation = "initialize control data bits"; - inst = emit(MOV(dst_reg(this->control_data_bits), 0u)); + inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u))); inst->force_writemask_all = true; } } @@ -274,7 +275,7 @@ vec4_gs_visitor::emit_urb_write_header(int mrf) vec4_instruction *inst = emit(MOV(mrf_reg, r0)); inst->force_writemask_all = true; emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count, - (uint32_t) gs_prog_data->output_vertex_size_hwords); + brw_imm_ud(gs_prog_data->output_vertex_size_hwords)); } @@ -354,11 +355,12 @@ vec4_gs_visitor::emit_control_data_bits() src_reg dword_index(this, glsl_type::uint_type); if (urb_write_flags) { src_reg prev_count(this, glsl_type::uint_type); - emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); + emit(ADD(dst_reg(prev_count), this->vertex_count, + brw_imm_ud(0xffffffffu))); unsigned log2_bits_per_vertex = _mesa_fls(c->control_data_bits_per_vertex); emit(SHR(dst_reg(dword_index), prev_count, - (uint32_t) (6 - log2_bits_per_vertex))); + brw_imm_ud(6 - log2_bits_per_vertex))); } /* Start building the URB write message. The first MRF gets a copy of @@ -375,8 +377,9 @@ vec4_gs_visitor::emit_control_data_bits() * the appropriate OWORD within the control data header. */ src_reg per_slot_offset(this, glsl_type::uint_type); - emit(SHR(dst_reg(per_slot_offset), dword_index, 2u)); - emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u); + emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u))); + emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, + brw_imm_ud(1u)); } if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) { @@ -388,10 +391,10 @@ vec4_gs_visitor::emit_control_data_bits() * together. */ src_reg channel(this, glsl_type::uint_type); - inst = emit(AND(dst_reg(channel), dword_index, 3u)); + inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u))); inst->force_writemask_all = true; src_reg one(this, glsl_type::uint_type); - inst = emit(MOV(dst_reg(one), 1u)); + inst = emit(MOV(dst_reg(one), brw_imm_ud(1u))); inst->force_writemask_all = true; src_reg channel_mask(this, glsl_type::uint_type); inst = emit(SHL(dst_reg(channel_mask), one, channel)); @@ -441,11 +444,11 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id) /* reg::sid = stream_id */ src_reg sid(this, glsl_type::uint_type); - emit(MOV(dst_reg(sid), stream_id)); + emit(MOV(dst_reg(sid), brw_imm_ud(stream_id))); /* reg:shift_count = 2 * (vertex_count - 1) */ src_reg shift_count(this, glsl_type::uint_type); - emit(SHL(dst_reg(shift_count), this->vertex_count, 1u)); + emit(SHL(dst_reg(shift_count), this->vertex_count, brw_imm_ud(1u))); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this @@ -503,8 +506,8 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) * vertex_count & (32 / bits_per_vertex - 1) == 0 */ vec4_instruction *inst = - emit(AND(dst_null_d(), this->vertex_count, - (uint32_t) (32 / c->control_data_bits_per_vertex - 1))); + emit(AND(dst_null_ud(), this->vertex_count, + brw_imm_ud(32 / c->control_data_bits_per_vertex - 1))); inst->conditional_mod = BRW_CONDITIONAL_Z; emit(IF(BRW_PREDICATE_NORMAL)); @@ -512,7 +515,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) /* If vertex_count is 0, then no control data bits have been * accumulated yet, so we skip emitting them. */ - emit(CMP(dst_null_d(), this->vertex_count, 0u, + emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_NEQ)); emit(IF(BRW_PREDICATE_NORMAL)); emit_control_data_bits(); @@ -525,7 +528,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) * effect of any call to EndPrimitive() that the shader may have * made before outputting its first vertex. */ - inst = emit(MOV(dst_reg(this->control_data_bits), 0u)); + inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u))); inst->force_writemask_all = true; } emit(BRW_OPCODE_ENDIF); @@ -586,9 +589,9 @@ vec4_gs_visitor::gs_end_primitive() /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */ src_reg one(this, glsl_type::uint_type); - emit(MOV(dst_reg(one), 1u)); + emit(MOV(dst_reg(one), brw_imm_ud(1u))); src_reg prev_count(this, glsl_type::uint_type); - emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); + emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu))); src_reg mask(this, glsl_type::uint_type); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this @@ -604,7 +607,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_gs_prog_key *key, struct brw_gs_prog_data *prog_data, - const nir_shader *shader, + const nir_shader *src_shader, struct gl_shader_program *shader_prog, int shader_time_index, unsigned *final_assembly_size, @@ -614,6 +617,12 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, memset(&c, 0, sizeof(c)); c.key = *key; + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, + compiler->scalar_stage[MESA_SHADER_GEOMETRY]); + shader = brw_postprocess_nir(shader, compiler->devinfo, + compiler->scalar_stage[MESA_SHADER_GEOMETRY]); + prog_data->include_primitive_id = (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0; @@ -773,7 +782,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, if (compiler->devinfo->gen == 6) max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; if (output_size_bytes > max_output_size_bytes) - return false; + return NULL; /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and @@ -819,7 +828,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, brw_print_vue_map(stderr, &prog_data->base.vue_map); } - if (compiler->scalar_gs) { + if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) { /* TODO: Support instanced GS. We have basically no tests... */ assert(prog_data->invocations == 1); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index bf098b41590..260b515ad42 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -122,7 +122,7 @@ vec4_visitor::nir_setup_inputs() { nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs); for (unsigned i = 0; i < nir->num_inputs; i++) { - nir_inputs[i] = dst_reg(); + nir_inputs[i] = src_reg(); } nir_foreach_variable(var, &nir->inputs) { @@ -373,7 +373,7 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr) } reg.writemask = writemask; - emit(MOV(reg, src_reg(instr->value.i[i]))); + emit(MOV(reg, brw_imm_d(instr->value.i[i]))); remaining &= ~writemask; } @@ -444,10 +444,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) inst->base_mrf = 2; inst->mlen = 1; /* always at least one */ - inst->src[1] = src_reg(index); + inst->src[1] = brw_imm_ud(index); /* MRF for the first parameter */ - src_reg lod = src_reg(0); + src_reg lod = brw_imm_d(0); int param_base = inst->base_mrf; int writemask = WRITEMASK_X; emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod)); @@ -471,12 +471,12 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) if (const_uniform_block) { unsigned index = prog_data->base.binding_table.ssbo_start + const_uniform_block->u[0]; - surf_index = src_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(&prog_data->base, index); } else { surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1), - src_reg(prog_data->base.binding_table.ssbo_start))); + brw_imm_ud(prog_data->base.binding_table.ssbo_start))); surf_index = emit_uniformize(surf_index); brw_mark_surface_used(&prog_data->base, @@ -491,7 +491,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1))); } else { const_offset_bytes = instr->const_index[0]; - emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes))); + emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset_bytes))); } /* Value */ @@ -566,7 +566,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) if (skipped_channels > 0) { if (!has_indirect) { const_offset_bytes += 4 * skipped_channels; - offset_reg = src_reg(const_offset_bytes); + offset_reg = brw_imm_ud(const_offset_bytes); } else { emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(4 * skipped_channels))); @@ -614,13 +614,13 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) if (const_uniform_block) { unsigned index = prog_data->base.binding_table.ssbo_start + const_uniform_block->u[0]; - surf_index = src_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(&prog_data->base, index); } else { surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1), - src_reg(prog_data->base.binding_table.ssbo_start))); + brw_imm_ud(prog_data->base.binding_table.ssbo_start))); surf_index = emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide @@ -637,7 +637,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1))); } else { const_offset_bytes = instr->const_index[0]; - emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes))); + emit(MOV(dst_reg(offset_reg), brw_imm_ud((const_offset_bytes)))); } /* Read the vector */ @@ -762,7 +762,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ const unsigned index = prog_data->base.binding_table.ubo_start + const_block_index->u[0]; - surf_index = src_reg(index); + surf_index = brw_imm_ud(index); brw_mark_surface_used(&prog_data->base, index); } else { /* The block index is not a constant. Evaluate the index expression @@ -772,7 +772,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int, instr->num_components), - src_reg(prog_data->base.binding_table.ubo_start))); + brw_imm_ud(prog_data->base.binding_table.ubo_start))); surf_index = emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide @@ -787,11 +787,11 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg offset; if (!has_indirect) { - offset = src_reg(const_offset / 16); + offset = brw_imm_ud(const_offset / 16); } else { offset = src_reg(this, glsl_type::uint_type); emit(SHR(dst_reg(offset), get_nir_src(instr->src[1], nir_type_int, 1), - src_reg(4u))); + brw_imm_ud(4u))); } src_reg packed_consts = src_reg(this, glsl_type::vec4_type); @@ -848,12 +848,12 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr) if (const_surface) { unsigned surf_index = prog_data->base.binding_table.ssbo_start + const_surface->u[0]; - surface = src_reg(surf_index); + surface = brw_imm_ud(surf_index); brw_mark_surface_used(&prog_data->base, surf_index); } else { surface = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]), - src_reg(prog_data->base.binding_table.ssbo_start))); + brw_imm_ud(prog_data->base.binding_table.ssbo_start))); /* Assume this may touch any UBO. This is the same we do for other * UBO/SSBO accesses with non-constant surface. @@ -1174,8 +1174,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), brw_conditional_for_nir_comparison(instr->op))); - emit(MOV(dst, src_reg(0))); - inst = emit(MOV(dst, src_reg(~0))); + emit(MOV(dst, brw_imm_d(0))); + inst = emit(MOV(dst, brw_imm_d(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; break; } @@ -1192,8 +1192,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), brw_conditional_for_nir_comparison(instr->op))); - emit(MOV(dst, src_reg(0))); - inst = emit(MOV(dst, src_reg(~0))); + emit(MOV(dst, brw_imm_d(0))); + inst = emit(MOV(dst, brw_imm_d(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; } @@ -1235,11 +1235,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_f2b: - emit(CMP(dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); + emit(CMP(dst, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ)); break; case nir_op_i2b: - emit(CMP(dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ)); break; case nir_op_fnoise1_1: @@ -1321,9 +1321,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) * subtract the result from 31 to convert the MSB count into an LSB count. */ src_reg src(dst); - emit(CMP(dst_null_d(), src, src_reg(-1), BRW_CONDITIONAL_NZ)); + emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ)); - inst = emit(ADD(dst, src, src_reg(31))); + inst = emit(ADD(dst, src, brw_imm_d(31))); inst->predicate = BRW_PREDICATE_NORMAL; inst->src[0].negate = true; break; @@ -1364,13 +1364,13 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not * zero. */ - emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); + emit(CMP(dst_null_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ)); op[0].type = BRW_REGISTER_TYPE_UD; dst.type = BRW_REGISTER_TYPE_UD; - emit(AND(dst, op[0], src_reg(0x80000000u))); + emit(AND(dst, op[0], brw_imm_ud(0x80000000u))); - inst = emit(OR(dst, src_reg(dst), src_reg(0x3f800000u))); + inst = emit(OR(dst, src_reg(dst), brw_imm_ud(0x3f800000u))); inst->predicate = BRW_PREDICATE_NORMAL; dst.type = BRW_REGISTER_TYPE_F; @@ -1385,9 +1385,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) * -> non-negative val generates 0x00000000. * Predicated OR sets 1 if val is positive. */ - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G)); - emit(ASR(dst, op[0], src_reg(31))); - inst = emit(OR(dst, src_reg(dst), src_reg(1))); + emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G)); + emit(ASR(dst, op[0], brw_imm_d(31))); + inst = emit(OR(dst, src_reg(dst), brw_imm_d(1))); inst->predicate = BRW_PREDICATE_NORMAL; break; @@ -1418,7 +1418,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_bcsel: - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ)); inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]); switch (dst.writemask) { case WRITEMASK_X: @@ -1465,10 +1465,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) unsigned swiz = brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); - emit(CMP(dst_null_d(), swizzle(op[0], swiz), src_reg(0), + emit(CMP(dst_null_d(), swizzle(op[0], swiz), brw_imm_d(0), BRW_CONDITIONAL_NZ)); - emit(MOV(dst, src_reg(0))); - inst = emit(MOV(dst, src_reg(~0))); + emit(MOV(dst, brw_imm_d(0))); + inst = emit(MOV(dst, brw_imm_d(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; } @@ -1502,7 +1502,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { dst_reg masked = dst_reg(this, glsl_type::int_type); masked.writemask = dst.writemask; - emit(AND(masked, src_reg(dst), src_reg(1))); + emit(AND(masked, src_reg(dst), brw_imm_d(1))); src_reg masked_neg = src_reg(masked); masked_neg.negate = true; emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg)); @@ -1551,6 +1551,7 @@ ir_texture_opcode_for_nir_texop(nir_texop texop) case nir_texop_txf_ms: op = ir_txf_ms; break; case nir_texop_txl: op = ir_txl; break; case nir_texop_txs: op = ir_txs; break; + case nir_texop_samples_identical: op = ir_samples_identical; break; default: unreachable("unknown texture opcode"); } @@ -1566,7 +1567,7 @@ glsl_type_for_nir_alu_type(nir_alu_type alu_type, return glsl_type::vec(components); case nir_type_int: return glsl_type::ivec(components); - case nir_type_unsigned: + case nir_type_uint: return glsl_type::uvec(components); case nir_type_bool: return glsl_type::bvec(components); @@ -1582,8 +1583,8 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) { unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; - src_reg texture_reg = src_reg(texture); - src_reg sampler_reg = src_reg(sampler); + src_reg texture_reg = brw_imm_ud(texture); + src_reg sampler_reg = brw_imm_ud(sampler); src_reg coordinate; const glsl_type *coord_type = NULL; src_reg shadow_comparitor; @@ -1597,17 +1598,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) nir_tex_instr_dest_size(instr)); dst_reg dest = get_nir_dest(instr->dest, instr->dest_type); - /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother - * emitting anything other than setting up the constant result. - */ - if (instr->op == nir_texop_tg4) { - int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component); - if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { - emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f))); - return; - } - } - /* Load the texture operation sources */ for (unsigned i = 0; i < instr->num_srcs; i++) { switch (instr->src[i].src_type) { @@ -1622,6 +1612,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) switch (instr->op) { case nir_texop_txf: case nir_texop_txf_ms: + case nir_texop_samples_identical: coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, src_size); coord_type = glsl_type::ivec(src_size); @@ -1661,14 +1652,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) case nir_tex_src_ms_index: { sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1); - assert(coord_type != NULL); - if (devinfo->gen >= 7 && - key_tex->compressed_multisample_layout_mask & (1 << texture)) { - mcs = emit_mcs_fetch(coord_type, coordinate, texture_reg); - } else { - mcs = src_reg(0u); - } - mcs = retype(mcs, BRW_REGISTER_TYPE_UD); break; } @@ -1693,7 +1676,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ src_reg src = get_nir_src(instr->src[i].src, 1); src_reg temp(this, glsl_type::uint_type); - emit(ADD(dst_reg(temp), src, src_reg(texture))); + emit(ADD(dst_reg(temp), src, brw_imm_ud(texture))); texture_reg = emit_uniformize(temp); break; } @@ -1702,7 +1685,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ src_reg src = get_nir_src(instr->src[i].src, 1); src_reg temp(this, glsl_type::uint_type); - emit(ADD(dst_reg(temp), src, src_reg(sampler))); + emit(ADD(dst_reg(temp), src, brw_imm_ud(sampler))); sampler_reg = emit_uniformize(temp); break; } @@ -1718,6 +1701,17 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) } } + if (instr->op == nir_texop_txf_ms || + instr->op == nir_texop_samples_identical) { + assert(coord_type != NULL); + if (devinfo->gen >= 7 && + key_tex->compressed_multisample_layout_mask & (1 << sampler)) { + mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg); + } else { + mcs = brw_imm_ud(0u); + } + } + uint32_t constant_offset = 0; for (unsigned i = 0; i < 3; i++) { if (instr->const_offset[i] != 0) { @@ -1727,8 +1721,17 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) } /* Stuff the channel select bits in the top of the texture offset */ - if (instr->op == nir_texop_tg4) - constant_offset |= gather_channel(instr->component, texture, sampler) << 16; + if (instr->op == nir_texop_tg4) { + if (instr->component == 1 && + (key_tex->gather_channel_quirk_mask & (1 << texture))) { + /* gather4 sampler is broken for green channel on RG32F -- + * we must ask for blue instead. + */ + constant_offset |= 2 << 16; + } else { + constant_offset |= instr->component << 16; + } + } ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp index a7c286d3ac1..28002c56cdc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp @@ -71,7 +71,7 @@ namespace { bld.MOV(writemask(tmp, mask), src); if (n < 4) - bld.MOV(writemask(tmp, ~mask), 0); + bld.MOV(writemask(tmp, ~mask), brw_imm_d(0)); return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1); } @@ -143,7 +143,7 @@ namespace brw { /* Emit the message send instruction. */ const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz); vec4_instruction *inst = - bld.emit(op, dst, src_reg(payload), usurface, arg); + bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg)); inst->mlen = sz; inst->regs_written = ret_sz; inst->header_size = header_sz; @@ -235,7 +235,7 @@ namespace brw { const vec4_builder ubld = bld.exec_all(); const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD); - ubld.MOV(dst, src_reg(0)); + ubld.MOV(dst, brw_imm_d(0)); if (bld.shader->devinfo->gen == 7 && !bld.shader->devinfo->is_haswell) { @@ -243,7 +243,7 @@ namespace brw { * have no SIMD4x2 variant. We only use the two X channels * in that case, mask everything else out. */ - ubld.MOV(writemask(dst, WRITEMASK_W), src_reg(0x11)); + ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11)); } return src_reg(dst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 6b8798da71c..caf1ee02bf0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -408,7 +408,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) * You should inspect the disasm output in order to verify that the MOV is * not optimized away. */ - emit(MOV(tmp_dst, src_reg(0x12345678u))); + emit(MOV(tmp_dst, brw_imm_ud(0x12345678u))); #endif /* Give tmp the form below, where "." means untouched. @@ -427,7 +427,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) * 0xhhhh0000 */ tmp_src.swizzle = BRW_SWIZZLE_YYYY; - emit(SHL(dst, tmp_src, src_reg(16u))); + emit(SHL(dst, tmp_src, brw_imm_ud(16u))); /* Finally, give the write-channels of dst the form of packHalf2x16's * output: @@ -466,10 +466,10 @@ vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0) src_reg tmp_src(tmp_dst); tmp_dst.writemask = WRITEMASK_X; - emit(AND(tmp_dst, src0, src_reg(0xffffu))); + emit(AND(tmp_dst, src0, brw_imm_ud(0xffffu))); tmp_dst.writemask = WRITEMASK_Y; - emit(SHR(tmp_dst, src0, src_reg(16u))); + emit(SHR(tmp_dst, src0, brw_imm_ud(16u))); dst.writemask = WRITEMASK_XY; emit(F16TO32(dst, tmp_src)); @@ -484,7 +484,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0) * vector float and a type-converting MOV. */ dst_reg shift(this, glsl_type::uvec4_type); - emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78))); + emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78))); dst_reg shifted(this, glsl_type::uvec4_type); src0.swizzle = BRW_SWIZZLE_XXXX; @@ -494,7 +494,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0) dst_reg f(this, glsl_type::vec4_type); emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted)); - emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f))); + emit(MUL(dst, src_reg(f), brw_imm_f(1.0f / 255.0f))); } void @@ -506,7 +506,7 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0) * vector float and a type-converting MOV. */ dst_reg shift(this, glsl_type::uvec4_type); - emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78))); + emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78))); dst_reg shifted(this, glsl_type::uvec4_type); src0.swizzle = BRW_SWIZZLE_XXXX; @@ -517,11 +517,11 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0) emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted)); dst_reg scaled(this, glsl_type::vec4_type); - emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f))); + emit(MUL(scaled, src_reg(f), brw_imm_f(1.0f / 127.0f))); dst_reg max(this, glsl_type::vec4_type); - emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f)); - emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f)); + emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), brw_imm_f(-1.0f)); + emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), brw_imm_f(1.0f)); } void @@ -532,7 +532,7 @@ vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0) inst->saturate = true; dst_reg scaled(this, glsl_type::vec4_type); - emit(MUL(scaled, src_reg(saturated), src_reg(255.0f))); + emit(MUL(scaled, src_reg(saturated), brw_imm_f(255.0f))); dst_reg rounded(this, glsl_type::vec4_type); emit(RNDE(rounded, src_reg(scaled))); @@ -548,13 +548,13 @@ void vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) { dst_reg max(this, glsl_type::vec4_type); - emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f)); + emit_minmax(BRW_CONDITIONAL_GE, max, src0, brw_imm_f(-1.0f)); dst_reg min(this, glsl_type::vec4_type); - emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f)); + emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), brw_imm_f(1.0f)); dst_reg scaled(this, glsl_type::vec4_type); - emit(MUL(scaled, src_reg(min), src_reg(127.0f))); + emit(MUL(scaled, src_reg(min), brw_imm_f(127.0f))); dst_reg rounded(this, glsl_type::vec4_type); emit(RNDE(rounded, src_reg(scaled))); @@ -716,7 +716,7 @@ vec4_visitor::emit_lrp(const dst_reg &dst, x_times_one_minus_a.writemask = dst.writemask; emit(MUL(y_times_a, y, a)); - emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); + emit(ADD(one_minus_a, negate(a), brw_imm_f(1.0f))); emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); } @@ -850,7 +850,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type, coordinate)); emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask), - src_reg(0))); + brw_imm_d(0))); emit(inst); return src_reg(inst->dst); @@ -892,7 +892,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op, */ if (op == ir_tex || op == ir_query_levels) { assert(lod.file == BAD_FILE); - lod = src_reg(0.0f); + lod = brw_imm_f(0.0f); } enum opcode opcode; @@ -912,12 +912,18 @@ vec4_visitor::emit_texture(ir_texture_opcode op, unreachable("TXB is not valid for vertex shaders."); case ir_lod: unreachable("LOD is not valid for vertex shaders."); + case ir_samples_identical: { + /* There are some challenges implementing this for vec4, and it seems + * unlikely to be used anyway. For now, just return false ways. + */ + emit(MOV(dest, brw_imm_ud(0u))); + return; + } default: unreachable("Unrecognized tex op"); } - vec4_instruction *inst = new(mem_ctx) vec4_instruction( - opcode, dst_reg(this, dest_type)); + vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode, dest); inst->offset = constant_offset; @@ -963,7 +969,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op, if (zero_mask != 0) { emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask), - src_reg(0))); + brw_imm_d(0))); } /* Load the shadow comparitor */ if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) { @@ -1062,15 +1068,20 @@ vec4_visitor::emit_texture(ir_texture_opcode op, if (op == ir_txs && is_cube_array) { emit_math(SHADER_OPCODE_INT_QUOTIENT, writemask(inst->dst, WRITEMASK_Z), - src_reg(inst->dst), src_reg(6)); + src_reg(inst->dst), brw_imm_d(6)); } if (devinfo->gen == 6 && op == ir_tg4) { emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst); } - swizzle_result(op, dest, - src_reg(inst->dst), sampler, dest_type); + if (op == ir_query_levels) { + /* # levels is in .w */ + src_reg swizzled(dest); + swizzled.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, + SWIZZLE_W, SWIZZLE_W); + emit(MOV(dest, swizzled)); + } } /** @@ -1087,7 +1098,7 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst) dst_f.type = BRW_REGISTER_TYPE_F; /* Convert from UNORM to UINT */ - emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1)))); + emit(MUL(dst_f, src_reg(dst_f), brw_imm_f((float)((1 << width) - 1)))); emit(MOV(dst, src_reg(dst_f))); if (wa & WA_SIGN) { @@ -1095,90 +1106,8 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst) * shifting the sign bit into place, then shifting back * preserving sign. */ - emit(SHL(dst, src_reg(dst), src_reg(32 - width))); - emit(ASR(dst, src_reg(dst), src_reg(32 - width))); - } -} - -/** - * Set up the gather channel based on the swizzle, for gather4. - */ -uint32_t -vec4_visitor::gather_channel(unsigned gather_component, - uint32_t surface, uint32_t sampler) -{ - int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component); - switch (swiz) { - case SWIZZLE_X: return 0; - case SWIZZLE_Y: - /* gather4 sampler is broken for green channel on RG32F -- - * we must ask for blue instead. - */ - if (key_tex->gather_channel_quirk_mask & (1 << surface)) - return 2; - return 1; - case SWIZZLE_Z: return 2; - case SWIZZLE_W: return 3; - default: - unreachable("Not reached"); /* zero, one swizzles handled already */ - } -} - -void -vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest, - src_reg orig_val, uint32_t sampler, - const glsl_type *dest_type) -{ - int s = key_tex->swizzles[sampler]; - - dst_reg swizzled_result = dest; - - if (op == ir_query_levels) { - /* # levels is in .w */ - orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - emit(MOV(swizzled_result, orig_val)); - return; - } - - if (op == ir_txs || dest_type == glsl_type::float_type - || s == SWIZZLE_NOOP || op == ir_tg4) { - emit(MOV(swizzled_result, orig_val)); - return; - } - - - int zero_mask = 0, one_mask = 0, copy_mask = 0; - int swizzle[4] = {0}; - - for (int i = 0; i < 4; i++) { - switch (GET_SWZ(s, i)) { - case SWIZZLE_ZERO: - zero_mask |= (1 << i); - break; - case SWIZZLE_ONE: - one_mask |= (1 << i); - break; - default: - copy_mask |= (1 << i); - swizzle[i] = GET_SWZ(s, i); - break; - } - } - - if (copy_mask) { - orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - swizzled_result.writemask = copy_mask; - emit(MOV(swizzled_result, orig_val)); - } - - if (zero_mask) { - swizzled_result.writemask = zero_mask; - emit(MOV(swizzled_result, src_reg(0.0f))); - } - - if (one_mask) { - swizzled_result.writemask = one_mask; - emit(MOV(swizzled_result, src_reg(1.0f))); + emit(SHL(dst, src_reg(dst), brw_imm_d(32 - width))); + emit(ASR(dst, src_reg(dst), brw_imm_d(32 - width))); } } @@ -1225,7 +1154,7 @@ vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, */ vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload, - src_reg(surf_index), src_reg(atomic_op)); + brw_imm_ud(surf_index), brw_imm_ud(atomic_op)); inst->mlen = mlen; } @@ -1245,7 +1174,7 @@ vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst, */ vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_reg(offset), - src_reg(surf_index), src_reg(1)); + brw_imm_ud(surf_index), brw_imm_d(1)); inst->mlen = 1; } @@ -1286,14 +1215,14 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) dst_reg header1_w = header1; header1_w.writemask = WRITEMASK_W; - emit(MOV(header1, 0u)); + emit(MOV(header1, brw_imm_ud(0u))); if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]); current_annotation = "Point size"; - emit(MUL(header1_w, psiz, src_reg((float)(1 << 11)))); - emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8)); + emit(MUL(header1_w, psiz, brw_imm_f((float)(1 << 11)))); + emit(AND(header1_w, src_reg(header1_w), brw_imm_d(0x7ff << 8))); } if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) { @@ -1301,13 +1230,13 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) dst_reg flags0 = dst_reg(this, glsl_type::uint_type); dst_reg flags1 = dst_reg(this, glsl_type::uint_type); - emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L)); - emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0)); + emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); + emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0)); emit(OR(header1_w, src_reg(header1_w), src_reg(flags0))); - emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L)); - emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0)); - emit(SHL(flags1, src_reg(flags1), src_reg(4))); + emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); + emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0)); + emit(SHL(flags1, src_reg(flags1), brw_imm_d(4))); emit(OR(header1_w, src_reg(header1_w), src_reg(flags1))); } @@ -1324,20 +1253,20 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) { src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]); ndc_w.swizzle = BRW_SWIZZLE_WWWW; - emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L)); + emit(CMP(dst_null_f(), ndc_w, brw_imm_f(0.0f), BRW_CONDITIONAL_L)); vec4_instruction *inst; - inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6))); + inst = emit(OR(header1_w, src_reg(header1_w), brw_imm_ud(1u << 6))); inst->predicate = BRW_PREDICATE_NORMAL; output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F; - inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f))); + inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], brw_imm_f(0.0f))); inst->predicate = BRW_PREDICATE_NORMAL; } emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1))); } else if (devinfo->gen < 6) { - emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u)); + emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u))); } else { - emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0))); + emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), brw_imm_d(0))); if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { dst_reg reg_w = reg; reg_w.writemask = WRITEMASK_W; @@ -1529,13 +1458,13 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst, src_reg index = src_reg(this, glsl_type::int_type); emit_before(block, inst, ADD(dst_reg(index), *reladdr, - src_reg(reg_offset))); + brw_imm_d(reg_offset))); emit_before(block, inst, MUL(dst_reg(index), index, - src_reg(message_header_scale))); + brw_imm_d(message_header_scale))); return index; } else { - return src_reg(reg_offset * message_header_scale); + return brw_imm_d(reg_offset * message_header_scale); } } @@ -1547,24 +1476,24 @@ vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst, src_reg index = src_reg(this, glsl_type::int_type); emit_before(block, inst, ADD(dst_reg(index), *reladdr, - src_reg(reg_offset))); + brw_imm_d(reg_offset))); /* Pre-gen6, the message header uses byte offsets instead of vec4 * (16-byte) offset units. */ if (devinfo->gen < 6) { - emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16))); + emit_before(block, inst, MUL(dst_reg(index), index, brw_imm_d(16))); } return index; } else if (devinfo->gen >= 8) { /* Store the offset in a GRF so we can send-from-GRF. */ src_reg offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset))); + emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset))); return offset; } else { int message_header_scale = devinfo->gen < 6 ? 16 : 1; - return src_reg(reg_offset * message_header_scale); + return brw_imm_d(reg_offset * message_header_scale); } } @@ -1753,7 +1682,7 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, reg_offset); emit_pull_constant_load_reg(temp, - src_reg(index), + brw_imm_ud(index), offset, block, inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 5dd4f98cecc..fd8be7d972c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -50,7 +50,7 @@ vec4_vs_visitor::emit_prolog() dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1; - emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f))); + emit(MUL(dst, src_reg(dst), brw_imm_f(1.0f / 65536.0f))); } /* Do sign recovery for 2101010 formats if required. */ @@ -58,8 +58,8 @@ vec4_vs_visitor::emit_prolog() if (sign_recovery_shift.file == BAD_FILE) { /* shift constant: <22,22,22,30> */ sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type); - emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u))); - emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u))); + emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), brw_imm_ud(22u))); + emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), brw_imm_ud(30u))); } emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift))); @@ -87,16 +87,16 @@ vec4_vs_visitor::emit_prolog() /* mul constant: 1 / (2^(b-1) - 1) */ es3_normalize_factor = dst_reg(this, glsl_type::vec4_type); emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ), - src_reg(1.0f / ((1<<9) - 1)))); + brw_imm_f(1.0f / ((1<<9) - 1)))); emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W), - src_reg(1.0f / ((1<<1) - 1)))); + brw_imm_f(1.0f / ((1<<1) - 1)))); } dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg(reg_d))); emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor))); - emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), src_reg(-1.0f)); + emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), brw_imm_f(-1.0f)); } else { /* The following equations are from the OpenGL 3.2 specification: * @@ -113,9 +113,9 @@ vec4_vs_visitor::emit_prolog() /* 1 / (2^b - 1) for b=<10,10,10,2> */ normalize_factor = dst_reg(this, glsl_type::vec4_type); emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ), - src_reg(1.0f / ((1<<10) - 1)))); + brw_imm_f(1.0f / ((1<<10) - 1)))); emit(MOV(writemask(normalize_factor, WRITEMASK_W), - src_reg(1.0f / ((1<<2) - 1)))); + brw_imm_f(1.0f / ((1<<2) - 1)))); } dst_reg dst = reg; @@ -124,8 +124,8 @@ vec4_vs_visitor::emit_prolog() /* For signed normalization, we want the numerator to be 2c+1. */ if (wa_flags & BRW_ATTRIB_WA_SIGN) { - emit(MUL(dst, src_reg(dst), src_reg(2.0f))); - emit(ADD(dst, src_reg(dst), src_reg(1.0f))); + emit(MUL(dst, src_reg(dst), brw_imm_f(2.0f))); + emit(ADD(dst, src_reg(dst), brw_imm_f(1.0f))); } emit(MUL(dst, src_reg(dst), src_reg(normalize_factor))); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 967448e0e41..7c783f66864 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -48,6 +48,7 @@ brw_codegen_vs_prog(struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key) { + const struct brw_compiler *compiler = brw->intelScreen->compiler; GLuint program_size; const GLuint *program; struct brw_vs_prog_data prog_data; @@ -79,7 +80,7 @@ brw_codegen_vs_prog(struct brw_context *brw, * by the state cache. */ int param_count = vp->program.Base.nir->num_uniforms; - if (!brw->intelScreen->compiler->scalar_vs) + if (!compiler->scalar_stage[MESA_SHADER_VERTEX]) param_count *= 4; if (vs) @@ -102,7 +103,7 @@ brw_codegen_vs_prog(struct brw_context *brw, if (prog) { brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base, &prog_data.base.base, - brw->intelScreen->compiler->scalar_vs); + compiler->scalar_stage[MESA_SHADER_VERTEX]); } else { brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base, &prog_data.base.base); @@ -173,7 +174,7 @@ brw_codegen_vs_prog(struct brw_context *brw, /* Emit GEN4 code. */ char *error_str; - program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, key, + program = brw_compile_vs(compiler, brw, mem_ctx, key, &prog_data, vp->program.Base.nir, brw_select_clip_planes(&brw->ctx), !_mesa_is_gles3(&brw->ctx), diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 2fef188c17e..3840ce0fe57 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -65,7 +65,7 @@ gen6_gs_visitor::emit_prolog() (prog_data->vue_map.num_slots + 1) * nir->info.gs.vertices_out); this->vertex_output_offset = src_reg(this, glsl_type::uint_type); - emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u))); + emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u))); /* MRF 1 will be the header for all messages (FF_SYNC and URB_WRITES), * so initialize it once to R0. @@ -87,13 +87,13 @@ gen6_gs_visitor::emit_prolog() * headers. */ this->first_vertex = src_reg(this, glsl_type::uint_type); - emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START)); + emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(URB_WRITE_PRIM_START))); /* The FF_SYNC message requires to know the number of primitives generated, * so keep a counter for this. */ this->prim_count = src_reg(this, glsl_type::uint_type); - emit(MOV(dst_reg(this->prim_count), 0u)); + emit(MOV(dst_reg(this->prim_count), brw_imm_ud(0u))); if (gs_prog_data->gen6_xfb_enabled) { /* Create a virtual register to hold destination indices in SOL */ @@ -170,7 +170,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) } emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + this->vertex_output_offset, brw_imm_ud(1u))); } /* Now buffer flags for this vertex */ @@ -181,9 +181,9 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) /* If we are outputting points, then every vertex has PrimStart and * PrimEnd set. */ - emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | - URB_WRITE_PRIM_START | URB_WRITE_PRIM_END)); - emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u)); + emit(MOV(dst, brw_imm_d((_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | + URB_WRITE_PRIM_START | URB_WRITE_PRIM_END))); + emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u))); } else { /* Otherwise, we can only set the PrimStart flag, which we have stored * in the first_vertex register. We will have to wait until we execute @@ -191,11 +191,12 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) * vertex. */ emit(OR(dst, this->first_vertex, - (gs_prog_data->output_topology << URB_WRITE_PRIM_TYPE_SHIFT))); - emit(MOV(dst_reg(this->first_vertex), 0u)); + brw_imm_ud(gs_prog_data->output_topology << + URB_WRITE_PRIM_TYPE_SHIFT))); + emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(0u))); } emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + this->vertex_output_offset, brw_imm_ud(1u))); } void @@ -218,10 +219,10 @@ gen6_gs_visitor::gs_end_primitive() * below). */ unsigned num_output_vertices = nir->info.gs.vertices_out; - emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1), - BRW_CONDITIONAL_L)); - vec4_instruction *inst = emit(CMP(dst_null_d(), - this->vertex_count, 0u, + emit(CMP(dst_null_ud(), this->vertex_count, + brw_imm_ud(num_output_vertices + 1), BRW_CONDITIONAL_L)); + vec4_instruction *inst = emit(CMP(dst_null_ud(), + this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_NEQ)); inst->predicate = BRW_PREDICATE_NORMAL; emit(IF(BRW_PREDICATE_NORMAL)); @@ -231,19 +232,19 @@ gen6_gs_visitor::gs_end_primitive() * vertex. */ src_reg offset(this, glsl_type::uint_type); - emit(ADD(dst_reg(offset), this->vertex_output_offset, src_reg(-1))); + emit(ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1))); src_reg dst(this->vertex_output); dst.reladdr = ralloc(mem_ctx, src_reg); memcpy(dst.reladdr, &offset, sizeof(src_reg)); - emit(OR(dst_reg(dst), dst, URB_WRITE_PRIM_END)); - emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u)); + emit(OR(dst_reg(dst), dst, brw_imm_d(URB_WRITE_PRIM_END))); + emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u))); /* Set the first vertex flag to indicate that the next vertex will start * a primitive. */ - emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START)); + emit(MOV(dst_reg(this->first_vertex), brw_imm_d(URB_WRITE_PRIM_START))); } emit(BRW_OPCODE_ENDIF); } @@ -262,7 +263,8 @@ gen6_gs_visitor::emit_urb_write_header(int mrf) */ src_reg flags_offset(this, glsl_type::uint_type); emit(ADD(dst_reg(flags_offset), - this->vertex_output_offset, src_reg(prog_data->vue_map.num_slots))); + this->vertex_output_offset, + brw_imm_d(prog_data->vue_map.num_slots))); src_reg flags_data(this->vertex_output); flags_data.reladdr = ralloc(mem_ctx, src_reg); @@ -321,7 +323,7 @@ gen6_gs_visitor::emit_thread_end() * points because in the point case we set PrimEnd on all vertices. */ if (nir->info.gs.output_primitive != GL_POINTS) { - emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z)); + emit(CMP(dst_null_ud(), this->first_vertex, brw_imm_ud(0u), BRW_CONDITIONAL_Z)); emit(IF(BRW_PREDICATE_NORMAL)); gs_end_primitive(); emit(BRW_OPCODE_ENDIF); @@ -347,7 +349,7 @@ gen6_gs_visitor::emit_thread_end() int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen); /* Issue the FF_SYNC message and obtain the initial VUE handle. */ - emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G)); + emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_G)); emit(IF(BRW_PREDICATE_NORMAL)); { this->current_annotation = "gen6 thread end: ff_sync"; @@ -364,15 +366,15 @@ gen6_gs_visitor::emit_thread_end() dst_reg(this->temp), this->prim_count, this->svbi); } else { inst = emit(GS_OPCODE_FF_SYNC, - dst_reg(this->temp), this->prim_count, src_reg(0u)); + dst_reg(this->temp), this->prim_count, brw_imm_ud(0u)); } inst->base_mrf = base_mrf; /* Loop over all buffered vertices and emit URB write messages */ this->current_annotation = "gen6 thread end: urb writes init"; src_reg vertex(this, glsl_type::uint_type); - emit(MOV(dst_reg(vertex), 0u)); - emit(MOV(dst_reg(this->vertex_output_offset), 0u)); + emit(MOV(dst_reg(vertex), brw_imm_ud(0u))); + emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u))); this->current_annotation = "gen6 thread end: urb writes"; emit(BRW_OPCODE_DO); @@ -416,7 +418,7 @@ gen6_gs_visitor::emit_thread_end() mrf++; emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + this->vertex_output_offset, brw_imm_ud(1u))); /* If this was max_usable_mrf, we can't fit anything more into * this URB WRITE. Same if we reached the max. message length. @@ -437,9 +439,9 @@ gen6_gs_visitor::emit_thread_end() * writing the next vertex. */ emit(ADD(dst_reg(this->vertex_output_offset), - this->vertex_output_offset, 1u)); + this->vertex_output_offset, brw_imm_ud(1u))); - emit(ADD(dst_reg(vertex), vertex, 1u)); + emit(ADD(dst_reg(vertex), vertex, brw_imm_ud(1u))); } emit(BRW_OPCODE_WHILE); @@ -468,8 +470,8 @@ gen6_gs_visitor::emit_thread_end() if (gs_prog_data->gen6_xfb_enabled) { /* When emitting EOT, set SONumPrimsWritten Increment Value. */ src_reg data(this, glsl_type::uint_type); - emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu))); - emit(SHL(dst_reg(data), data, src_reg(16u))); + emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu))); + emit(SHL(dst_reg(data), data, brw_imm_ud(16u))); emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data); } @@ -588,8 +590,8 @@ gen6_gs_visitor::xfb_write() this->current_annotation = "gen6 thread end: svb writes init"; - emit(MOV(dst_reg(this->vertex_output_offset), 0u)); - emit(MOV(dst_reg(this->sol_prim_written), 0u)); + emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u))); + emit(MOV(dst_reg(this->sol_prim_written), brw_imm_ud(0u))); /* Check that at least one primitive can be written * @@ -600,7 +602,7 @@ gen6_gs_visitor::xfb_write() * transform feedback is in interleaved or separate attribs mode. */ src_reg sol_temp(this, glsl_type::uvec4_type); - emit(ADD(dst_reg(sol_temp), this->svbi, src_reg(num_verts))); + emit(ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts))); /* Compare SVBI calculated number with the maximum value, which is * in R1.4 (previously saved in this->max_svbi) for gen6. @@ -623,7 +625,7 @@ gen6_gs_visitor::xfb_write() /* Write transform feedback data for all processed vertices. */ for (int i = 0; i < (int)nir->info.gs.vertices_out; i++) { - emit(MOV(dst_reg(sol_temp), i)); + emit(MOV(dst_reg(sol_temp), brw_imm_d(i))); emit(CMP(dst_null_d(), sol_temp, this->vertex_count, BRW_CONDITIONAL_L)); emit(IF(BRW_PREDICATE_NORMAL)); @@ -644,8 +646,8 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) /* Check for buffer overflow: we need room to write the complete primitive * (all vertices). Otherwise, avoid writing any vertices for it */ - emit(ADD(dst_reg(sol_temp), this->sol_prim_written, 1u)); - emit(MUL(dst_reg(sol_temp), sol_temp, src_reg(num_verts))); + emit(ADD(dst_reg(sol_temp), this->sol_prim_written, brw_imm_ud(1u))); + emit(MUL(dst_reg(sol_temp), sol_temp, brw_imm_ud(num_verts))); emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi)); emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE)); emit(IF(BRW_PREDICATE_NORMAL)); @@ -683,7 +685,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) src_reg data(this->vertex_output); data.reladdr = ralloc(mem_ctx, src_reg); int offset = get_vertex_output_offset_for_varying(vertex, varying); - emit(MOV(dst_reg(this->vertex_output_offset), offset)); + emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_d(offset))); memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg)); data.type = output_reg[varying].type; @@ -710,9 +712,9 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) */ emit(ADD(dst_reg(this->destination_indices), this->destination_indices, - src_reg(num_verts))); + brw_imm_ud(num_verts))); emit(ADD(dst_reg(this->sol_prim_written), - this->sol_prim_written, 1u)); + this->sol_prim_written, brw_imm_ud(1u))); } } diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index 9f4a5db3592..d508c4c9278 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -136,8 +136,8 @@ emit_pipeline_stat(struct brw_context *brw, drm_intel_bo *bo, IA_VERTICES_COUNT, /* VERTICES_SUBMITTED */ IA_PRIMITIVES_COUNT, /* PRIMITIVES_SUBMITTED */ VS_INVOCATION_COUNT, /* VERTEX_SHADER_INVOCATIONS */ - 0, /* HS_INVOCATION_COUNT,*/ /* TESS_CONTROL_SHADER_PATCHES */ - 0, /* DS_INVOCATION_COUNT,*/ /* TESS_EVALUATION_SHADER_INVOCATIONS */ + HS_INVOCATION_COUNT, /* TESS_CONTROL_SHADER_PATCHES */ + DS_INVOCATION_COUNT, /* TESS_EVALUATION_SHADER_INVOCATIONS */ GS_PRIMITIVES_COUNT, /* GEOMETRY_SHADER_PRIMITIVES_EMITTED */ PS_INVOCATION_COUNT, /* FRAGMENT_SHADER_INVOCATIONS */ CS_INVOCATION_COUNT, /* COMPUTE_SHADER_INVOCATIONS */ @@ -231,6 +231,8 @@ gen6_queryobj_get_results(struct gl_context *ctx, case GL_CLIPPING_INPUT_PRIMITIVES_ARB: case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: query->Base.Result = results[1] - results[0]; break; @@ -250,8 +252,6 @@ gen6_queryobj_get_results(struct gl_context *ctx, query->Base.Result /= 4; break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: default: unreachable("Unrecognized query target in brw_queryobj_get_results()"); } @@ -329,11 +329,11 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q) case GL_CLIPPING_INPUT_PRIMITIVES_ARB: case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 0); break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: default: unreachable("Unrecognized query target in brw_begin_query()"); } @@ -381,12 +381,12 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q) case GL_CLIPPING_INPUT_PRIMITIVES_ARB: case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: case GL_GEOMETRY_SHADER_INVOCATIONS: + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 1); break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: default: unreachable("Unrecognized query target in brw_end_query()"); } diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 69162171c4e..161de77e156 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -118,7 +118,7 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS): * - * A PIPE_CONTOL command with the CS Stall bit set must be programmed + * A PIPE_CONTROL command with the CS Stall bit set must be programmed * in the ring after this instruction. * * No such restriction exists for Haswell or Baytrail. diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 140a6544983..9cdd1c71b4d 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -187,7 +187,13 @@ gen8_emit_fast_clear_color(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t *surf) { - surf[7] |= mt->fast_clear_color_value; + if (brw->gen >= 9) { + surf[12] = mt->gen9_fast_clear_color.ui[0]; + surf[13] = mt->gen9_fast_clear_color.ui[1]; + surf[14] = mt->gen9_fast_clear_color.ui[2]; + surf[15] = mt->gen9_fast_clear_color.ui[3]; + } else + surf[7] |= mt->fast_clear_color_value; } static void @@ -208,6 +214,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw, int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; unsigned tiling_mode, pitch; const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode); + const uint32_t surf_type = translate_tex_target(target); if (mt->format == MESA_FORMAT_S_UINT8) { tiling_mode = GEN8_SURFACE_TILING_W; @@ -231,9 +238,14 @@ gen8_emit_texture_surface_state(struct brw_context *brw, */ if (brw->gen >= 9 || mt->num_samples == 1) assert(mt->halign == 16); + + if (brw->gen >= 9) { + assert(mt->num_samples > 1 || + brw_losslessly_compressible_format(brw, surf_type)); + } + } - const uint32_t surf_type = translate_tex_target(target); uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); surf[0] = SET_FIELD(surf_type, BRW_SURFACE_TYPE) | diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index c00d2e786f3..f53c4ab518a 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -75,6 +75,10 @@ static const struct debug_control debug_control[] = { { "cs", DEBUG_CS }, { "hex", DEBUG_HEX }, { "nocompact", DEBUG_NO_COMPACTION }, + { "hs", DEBUG_TCS }, + { "tcs", DEBUG_TCS }, + { "ds", DEBUG_TES }, + { "tes", DEBUG_TES }, { NULL, 0 } }; @@ -83,8 +87,8 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage) { uint64_t flags[] = { [MESA_SHADER_VERTEX] = DEBUG_VS, - [MESA_SHADER_TESS_CTRL] = 0, - [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_TESS_CTRL] = DEBUG_TCS, + [MESA_SHADER_TESS_EVAL] = DEBUG_TES, [MESA_SHADER_GEOMETRY] = DEBUG_GS, [MESA_SHADER_FRAGMENT] = DEBUG_WM, [MESA_SHADER_COMPUTE] = DEBUG_CS, diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index 98bd7e93956..9c6030a6d7d 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -69,6 +69,8 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_CS (1ull << 33) #define DEBUG_HEX (1ull << 34) #define DEBUG_NO_COMPACTION (1ull << 35) +#define DEBUG_TCS (1ull << 36) +#define DEBUG_TES (1ull << 37) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 386b63c123d..2e2459c125b 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -333,6 +333,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_texture_compression_bptc = true; ctx->Extensions.ARB_texture_view = true; ctx->Extensions.ARB_shader_storage_buffer_object = true; + ctx->Extensions.EXT_shader_samples_identical = true; if (can_do_pipelined_register_writes(brw)) { ctx->Extensions.ARB_draw_indirect = true; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index b1a7632d82f..87e01366932 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -35,6 +35,7 @@ #include "brw_blorp.h" #include "brw_context.h" +#include "brw_state.h" #include "main/enums.h" #include "main/fbobject.h" @@ -192,6 +193,12 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) * * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, * 64bpp, and 128bpp. + * + * From the Skylake documentation, it is made clear that X-tiling is no longer + * supported: + * + * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf + * non-MSRTs only. */ static bool intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, @@ -201,14 +208,6 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, if (brw->gen < 7) return false; - if (brw->gen >= 9) { - /* FINISHME: Enable singlesample fast MCS clears on SKL after all GPU - * FINISHME: hangs are resolved. - */ - perf_debug("singlesample fast MCS clears disabled on gen9"); - return false; - } - if (mt->disable_aux_buffers) return false; @@ -259,7 +258,11 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, if (!brw->format_supported_as_render_target[mt->format]) return false; - return true; + if (brw->gen >= 9) { + const uint32_t brw_format = brw_format_for_mesa_format(mt->format); + return brw_losslessly_compressible_format(brw, brw_format); + } else + return true; } @@ -1495,6 +1498,17 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height); unsigned width_divisor = block_width_px * 4; unsigned height_divisor = block_height * 8; + + /* The Skylake MCS is twice as tall as the Broadwell MCS. + * + * In pre-Skylake, each bit in the MCS contained the state of 2 cachelines + * in the main surface. In Skylake, it's two bits. The extra bit + * doubles the MCS height, not width, because in Skylake the MCS is always + * Y-tiled. + */ + if (brw->gen >= 9) + height_divisor /= 2; + unsigned mcs_width = ALIGN(mt->logical_width0, width_divisor) / width_divisor; unsigned mcs_height = diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 805cd714d88..64f73ea9ae5 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -633,15 +633,22 @@ struct intel_mipmap_tree * The SURFACE_STATE bits associated with the last fast color clear to this * color mipmap tree, if any. * - * This value will only ever contain ones in bits 28-31, so it is safe to - * OR into dword 7 of SURFACE_STATE. + * Prior to GEN9 there is a single bit for RGBA clear values which gives you + * the option of 2^4 clear colors. Each bit determines if the color channel + * is fully saturated or unsaturated (Cherryview does add a 32b value per + * channel, but it is globally applied instead of being part of the render + * surface state). Starting with GEN9, the surface state accepts a 32b value + * for each color channel. * * @see RENDER_SURFACE_STATE.RedClearColor * @see RENDER_SURFACE_STATE.GreenClearColor * @see RENDER_SURFACE_STATE.BlueClearColor * @see RENDER_SURFACE_STATE.AlphaClearColor */ - uint32_t fast_clear_color_value; + union { + uint32_t fast_clear_color_value; + union gl_color_union gen9_fast_clear_color; + }; /** * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index 62d39f70ec4..034d8a507fe 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -107,7 +107,7 @@ TEST_F(cmod_propagation_test, basic) fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); @@ -139,7 +139,7 @@ TEST_F(cmod_propagation_test, cmp_nonzero) fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - fs_reg nonzero(1.0f); + fs_reg nonzero(brw_imm_f(1.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE); @@ -171,7 +171,7 @@ TEST_F(cmod_propagation_test, non_cmod_instruction) const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::uint_type); fs_reg src0 = v->vgrf(glsl_type::uint_type); - fs_reg zero(0u); + fs_reg zero(brw_imm_ud(0u)); bld.FBL(dest, src0); bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE); @@ -205,7 +205,7 @@ TEST_F(cmod_propagation_test, intervening_flag_write) fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE); bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); @@ -244,7 +244,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read) fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(dest0, src0, src1); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); @@ -282,7 +282,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write) fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::vec2_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(offset(dest, bld, 2), src0, src1); bld.emit(SHADER_OPCODE_TEX, dest, src2) ->regs_written = 4; @@ -323,7 +323,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value) fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1)); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); @@ -360,7 +360,7 @@ TEST_F(cmod_propagation_test, negate) fs_reg dest = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); dest.negate = true; bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); @@ -425,7 +425,7 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero) fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::int_type); fs_reg src1 = v->vgrf(glsl_type::int_type); - fs_reg zero(0.0f); + fs_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), retype(dest, BRW_REGISTER_TYPE_F), zero, BRW_CONDITIONAL_GE); @@ -458,8 +458,8 @@ TEST_F(cmod_propagation_test, andnz_one) const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); - fs_reg one(1); + fs_reg zero(brw_imm_f(0.0f)); + fs_reg one(brw_imm_d(1)); bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_NZ, @@ -493,8 +493,8 @@ TEST_F(cmod_propagation_test, andnz_non_one) const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); - fs_reg nonone(38); + fs_reg zero(brw_imm_f(0.0f)); + fs_reg nonone(brw_imm_d(38)); bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_NZ, @@ -528,8 +528,8 @@ TEST_F(cmod_propagation_test, andz_one) const fs_builder &bld = v->bld; fs_reg dest = v->vgrf(glsl_type::int_type); fs_reg src0 = v->vgrf(glsl_type::float_type); - fs_reg zero(0.0f); - fs_reg one(1); + fs_reg zero(brw_imm_f(0.0f)); + fs_reg one(brw_imm_d(1)); bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_Z, diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp index 9aa2fcc7907..e5e566c60bc 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp @@ -145,7 +145,7 @@ TEST_F(cmod_propagation_test, basic) dst_reg dest = dst_reg(v, glsl_type::float_type); src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); dst_reg dest_null = bld.null_reg_f(); dest_null.writemask = WRITEMASK_X; @@ -181,7 +181,7 @@ TEST_F(cmod_propagation_test, basic_different_dst_writemask) dst_reg dest = dst_reg(v, glsl_type::float_type); src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); dst_reg dest_null = bld.null_reg_f(); bld.ADD(dest, src0, src1); @@ -217,8 +217,8 @@ TEST_F(cmod_propagation_test, andz_one) const vec4_builder bld = vec4_builder(v).at_end(); dst_reg dest = dst_reg(v, glsl_type::int_type); src_reg src0 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); - src_reg one(1); + src_reg zero(brw_imm_f(0.0f)); + src_reg one(brw_imm_d(1)); bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_Z, @@ -253,7 +253,7 @@ TEST_F(cmod_propagation_test, non_cmod_instruction) const vec4_builder bld = vec4_builder(v).at_end(); dst_reg dest = dst_reg(v, glsl_type::uint_type); src_reg src0 = src_reg(v, glsl_type::uint_type); - src_reg zero(0u); + src_reg zero(brw_imm_ud(0u)); bld.FBL(dest, src0); bld.CMP(bld.null_reg_ud(), src_reg(dest), zero, BRW_CONDITIONAL_GE); @@ -288,7 +288,7 @@ TEST_F(cmod_propagation_test, intervening_flag_write) src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); src_reg src2 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE); bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_GE); @@ -328,7 +328,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read) src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); src_reg src2 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.ADD(dest0, src0, src1); set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); bld.CMP(bld.null_reg_f(), src_reg(dest0), zero, BRW_CONDITIONAL_GE); @@ -367,7 +367,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write) src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); src_reg src2 = src_reg(v, glsl_type::vec2_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.ADD(offset(dest, 2), src0, src1); bld.emit(SHADER_OPCODE_TEX, dest, src2) ->regs_written = 4; @@ -409,7 +409,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value) src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); src_reg src2 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); dst_reg dest_null = bld.null_reg_f(); dest_null.writemask = WRITEMASK_X; @@ -449,7 +449,7 @@ TEST_F(cmod_propagation_test, negate) dst_reg dest = dst_reg(v, glsl_type::float_type); src_reg src0 = src_reg(v, glsl_type::float_type); src_reg src1 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); src_reg tmp_src = src_reg(dest); tmp_src.negate = true; @@ -521,7 +521,7 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero) dst_reg dest = dst_reg(v, glsl_type::int_type); src_reg src0 = src_reg(v, glsl_type::int_type); src_reg src1 = src_reg(v, glsl_type::int_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.ADD(dest, src0, src1); bld.CMP(bld.null_reg_f(), retype(src_reg(dest), BRW_REGISTER_TYPE_F), zero, BRW_CONDITIONAL_GE); @@ -555,8 +555,8 @@ TEST_F(cmod_propagation_test, andnz_non_one) const vec4_builder bld = vec4_builder(v).at_end(); dst_reg dest = dst_reg(v, glsl_type::int_type); src_reg src0 = src_reg(v, glsl_type::float_type); - src_reg zero(0.0f); - src_reg nonone(38); + src_reg zero(brw_imm_f(0.0f)); + src_reg nonone(brw_imm_d(38)); bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); set_condmod(BRW_CONDITIONAL_NZ, @@ -594,7 +594,7 @@ TEST_F(cmod_propagation_test, basic_vec4) dst_reg dest = dst_reg(v, glsl_type::vec4_type); src_reg src0 = src_reg(v, glsl_type::vec4_type); src_reg src1 = src_reg(v, glsl_type::vec4_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); bld.MUL(dest, src0, src1); bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_NZ); @@ -628,7 +628,7 @@ TEST_F(cmod_propagation_test, basic_vec4_different_dst_writemask) dest.writemask = WRITEMASK_X; src_reg src0 = src_reg(v, glsl_type::vec4_type); src_reg src1 = src_reg(v, glsl_type::vec4_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); dst_reg dest_null = bld.null_reg_f(); bld.MUL(dest, src0, src1); @@ -668,7 +668,7 @@ TEST_F(cmod_propagation_test, mad_one_component_vec4) src_reg src2 = src_reg(v, glsl_type::vec4_type); src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX; src2.negate = true; - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); src_reg tmp(dest); tmp.swizzle = BRW_SWIZZLE_XXXX; dst_reg dest_null = bld.null_reg_f(); @@ -710,7 +710,7 @@ TEST_F(cmod_propagation_test, mad_more_one_component_vec4) src_reg src2 = src_reg(v, glsl_type::vec4_type); src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX; src2.negate = true; - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); src_reg tmp(dest); tmp.swizzle = BRW_SWIZZLE_XXXX; dst_reg dest_null = bld.null_reg_f(); @@ -751,7 +751,7 @@ TEST_F(cmod_propagation_test, cmp_mov_vec4) src_reg src0 = src_reg(v, glsl_type::ivec4_type); src0.swizzle = BRW_SWIZZLE_XXXX; src0.file = UNIFORM; - src_reg nonone = retype(src_reg(16), BRW_REGISTER_TYPE_D); + src_reg nonone = retype(brw_imm_d(16), BRW_REGISTER_TYPE_D); src_reg mov_src = src_reg(dest); mov_src.swizzle = BRW_SWIZZLE_XXXX; dst_reg dest_null = bld.null_reg_d(); @@ -790,7 +790,7 @@ TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4) dst_reg dest = dst_reg(v, glsl_type::vec4_type); src_reg src0 = src_reg(v, glsl_type::vec4_type); src_reg src1 = src_reg(v, glsl_type::vec4_type); - src_reg zero(0.0f); + src_reg zero(brw_imm_f(0.0f)); src_reg cmp_src = src_reg(dest); cmp_src.swizzle = BRW_SWIZZLE4(0,1,3,2); diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index a1f91d9c56a..ede409b6919 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -162,7 +162,7 @@ TEST_F(copy_propagation_test, test_swizzle_writemask) SWIZZLE_X, SWIZZLE_Z)))); - v->emit(v->MOV(writemask(a, WRITEMASK_XYZ), src_reg(1.0f))); + v->emit(v->MOV(writemask(a, WRITEMASK_XYZ), brw_imm_f(1.0f))); vec4_instruction *test_mov = v->MOV(c, swizzle(src_reg(b), BRW_SWIZZLE4(SWIZZLE_W, diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index d84e2e98ec0..90a6bc3618f 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -135,7 +135,7 @@ TEST_F(register_coalesce_test, test_compute_to_mrf) m0.writemask = WRITEMASK_X; m0.type = BRW_REGISTER_TYPE_F; - vec4_instruction *mul = v->emit(v->MUL(temp, something, src_reg(1.0f))); + vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f))); v->emit(v->MOV(m0, src_reg(temp))); register_coalesce(v); @@ -159,7 +159,7 @@ TEST_F(register_coalesce_test, test_multiple_use) m1.type = BRW_REGISTER_TYPE_F; src_reg src = src_reg(temp); - vec4_instruction *mul = v->emit(v->MUL(temp, something, src_reg(1.0f))); + vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f))); src.swizzle = BRW_SWIZZLE_XXXX; v->emit(v->MOV(m0, src)); src.swizzle = BRW_SWIZZLE_XYZW; diff --git a/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp b/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp index 6a8bceabf16..7af97d0d097 100644 --- a/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp +++ b/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp @@ -40,15 +40,10 @@ void vf_float_conversion_test::SetUp() { int ebits = (vf >> 4) & 0x7; int mbits = vf & 0xf; - int e = ebits - 3; + float x = 1.0f + mbits / 16.0f; + int exp = ebits - 3; - float value = 1.0f; - - value += mbits / 16.0f; - - value *= exp2f(e); - - vf_to_float[vf] = value; + vf_to_float[vf] = ldexpf(x, exp); } } @@ -98,3 +93,18 @@ TEST_F(vf_float_conversion_test, test_special_case_0) EXPECT_EQ(f2u(brw_vf_to_float(brw_float_to_vf(+0.0f))), f2u(+0.0f)); EXPECT_EQ(f2u(brw_vf_to_float(brw_float_to_vf(-0.0f))), f2u(-0.0f)); } + +TEST_F(vf_float_conversion_test, test_nonrepresentable_float_input) +{ + EXPECT_EQ(brw_float_to_vf(+32.0f), -1); + EXPECT_EQ(brw_float_to_vf(-32.0f), -1); + + EXPECT_EQ(brw_float_to_vf(+16.5f), -1); + EXPECT_EQ(brw_float_to_vf(-16.5f), -1); + + EXPECT_EQ(brw_float_to_vf(+8.25f), -1); + EXPECT_EQ(brw_float_to_vf(-8.25f), -1); + + EXPECT_EQ(brw_float_to_vf(+4.125f), -1); + EXPECT_EQ(brw_float_to_vf(-4.125f), -1); +} diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h index a8c31b741ed..14f5e71fadf 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.h +++ b/src/mesa/drivers/dri/r200/r200_tex.h @@ -63,7 +63,9 @@ static const struct tx_table tx_table_be[] = [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B8G8R8X8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_X8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_BGR_UNORM8 ] = { 0xffffffff, 0 }, [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, @@ -91,7 +93,9 @@ static const struct tx_table tx_table_le[] = [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B8G8R8X8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_X8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_BGR_UNORM8 ] = { R200_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h index f8ec432755a..37c2fa0dc2f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.h +++ b/src/mesa/drivers/dri/radeon/radeon_tex.h @@ -63,6 +63,8 @@ static const struct tx_table tx_table[] = [ MESA_FORMAT_R8G8B8A8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_B8G8R8A8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, [ MESA_FORMAT_A8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B8G8R8X8_UNORM ] = { RADEON_TXFORMAT_ARGB8888, 0 }, + [ MESA_FORMAT_X8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_BGR_UNORM8 ] = { RADEON_TXFORMAT_ARGB8888, 0 }, [ MESA_FORMAT_B5G6R5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 }, [ MESA_FORMAT_R5G6B5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 }, diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index ddf7f497f1e..2ae22e9e691 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -67,7 +67,7 @@ legal_src_factor(const struct gl_context *ctx, GLenum factor) case GL_SRC1_ALPHA: case GL_ONE_MINUS_SRC1_COLOR: case GL_ONE_MINUS_SRC1_ALPHA: - return _mesa_is_desktop_gl(ctx) + return ctx->API != API_OPENGLES && ctx->Extensions.ARB_blend_func_extended; default: return GL_FALSE; @@ -100,14 +100,14 @@ legal_dst_factor(const struct gl_context *ctx, GLenum factor) case GL_ONE_MINUS_CONSTANT_ALPHA: return _mesa_is_desktop_gl(ctx) || ctx->API == API_OPENGLES2; case GL_SRC_ALPHA_SATURATE: - return (_mesa_is_desktop_gl(ctx) + return (ctx->API != API_OPENGLES && ctx->Extensions.ARB_blend_func_extended) || _mesa_is_gles3(ctx); case GL_SRC1_COLOR: case GL_SRC1_ALPHA: case GL_ONE_MINUS_SRC1_COLOR: case GL_ONE_MINUS_SRC1_ALPHA: - return _mesa_is_desktop_gl(ctx) + return ctx->API != API_OPENGLES && ctx->Extensions.ARB_blend_func_extended; default: return GL_FALSE; @@ -404,7 +404,7 @@ _mesa_BlendEquation( GLenum mode ) ctx->Color._BlendEquationPerBuffer = GL_FALSE; if (ctx->Driver.BlendEquationSeparate) - (*ctx->Driver.BlendEquationSeparate)( ctx, mode, mode ); + ctx->Driver.BlendEquationSeparate(ctx, mode, mode); } @@ -582,7 +582,7 @@ _mesa_BlendColor( GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha ) ctx->Color.BlendColor[3] = CLAMP(tmp[3], 0.0F, 1.0F); if (ctx->Driver.BlendColor) - (*ctx->Driver.BlendColor)(ctx, ctx->Color.BlendColor); + ctx->Driver.BlendColor(ctx, ctx->Color.BlendColor); } diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 93588a2ee18..83e238ae825 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -731,7 +731,7 @@ _mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, /* Call the device driver function only if fb is the bound read buffer */ if (fb == ctx->ReadBuffer) { if (ctx->Driver.ReadBuffer) - (*ctx->Driver.ReadBuffer)(ctx, buffer); + ctx->Driver.ReadBuffer(ctx, buffer); } } diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index 42f67990784..a8a667e3c12 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -369,10 +369,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) break; case GL_DEBUG_OUTPUT: case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB: - if (!_mesa_is_desktop_gl(ctx)) - goto invalid_enum_error; - else - _mesa_set_debug_state_int(ctx, cap, state); + _mesa_set_debug_state_int(ctx, cap, state); break; case GL_DITHER: if (ctx->Color.DitherFlag == state) @@ -1225,10 +1222,7 @@ _mesa_IsEnabled( GLenum cap ) return ctx->Polygon.CullFlag; case GL_DEBUG_OUTPUT: case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB: - if (!_mesa_is_desktop_gl(ctx)) - goto invalid_enum_error; - else - return (GLboolean) _mesa_get_debug_state_int(ctx, cap); + return (GLboolean) _mesa_get_debug_state_int(ctx, cap); case GL_DEPTH_TEST: return ctx->Depth.Test; case GL_DITHER: diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c index f720de316e4..366b119aba3 100644 --- a/src/mesa/main/errors.c +++ b/src/mesa/main/errors.c @@ -978,9 +978,13 @@ _mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id, GLenum severity, GLint length, const GLchar *buf) { - const char *callerstr = "glDebugMessageInsert"; - GET_CURRENT_CONTEXT(ctx); + const char *callerstr; + + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glDebugMessageInsert"; + else + callerstr = "glDebugMessageInsertKHR"; if (!validate_params(ctx, INSERT, callerstr, source, type, severity)) return; /* GL_INVALID_ENUM */ @@ -1004,15 +1008,21 @@ _mesa_GetDebugMessageLog(GLuint count, GLsizei logSize, GLenum *sources, { GET_CURRENT_CONTEXT(ctx); struct gl_debug_state *debug; + const char *callerstr; GLuint ret; + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glGetDebugMessageLog"; + else + callerstr = "glGetDebugMessageLogKHR"; + if (!messageLog) logSize = 0; if (logSize < 0) { _mesa_error(ctx, GL_INVALID_VALUE, - "glGetDebugMessageLog(logSize=%d : logSize must not be" - " negative)", logSize); + "%s(logSize=%d : logSize must not be negative)", + callerstr, logSize); return 0; } @@ -1066,9 +1076,14 @@ _mesa_DebugMessageControl(GLenum gl_source, GLenum gl_type, enum mesa_debug_source source = gl_enum_to_debug_source(gl_source); enum mesa_debug_type type = gl_enum_to_debug_type(gl_type); enum mesa_debug_severity severity = gl_enum_to_debug_severity(gl_severity); - const char *callerstr = "glDebugMessageControl"; + const char *callerstr; struct gl_debug_state *debug; + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glDebugMessageControl"; + else + callerstr = "glDebugMessageControlKHR"; + if (count < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(count=%d : count must not be negative)", callerstr, @@ -1124,10 +1139,15 @@ _mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length, const GLchar *message) { GET_CURRENT_CONTEXT(ctx); - const char *callerstr = "glPushDebugGroup"; + const char *callerstr; struct gl_debug_state *debug; struct gl_debug_message *emptySlot; + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glPushDebugGroup"; + else + callerstr = "glPushDebugGroupKHR"; + switch(source) { case GL_DEBUG_SOURCE_APPLICATION: case GL_DEBUG_SOURCE_THIRD_PARTY: @@ -1176,10 +1196,15 @@ void GLAPIENTRY _mesa_PopDebugGroup(void) { GET_CURRENT_CONTEXT(ctx); - const char *callerstr = "glPopDebugGroup"; + const char *callerstr; struct gl_debug_state *debug; struct gl_debug_message *gdmessage, msg; + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glPopDebugGroup"; + else + callerstr = "glPopDebugGroupKHR"; + debug = _mesa_lock_debug_state(ctx); if (!debug) return; diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index e94d2b74749..fa50cb68cca 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -40,7 +40,6 @@ struct gl_extensions _mesa_extension_override_enables; struct gl_extensions _mesa_extension_override_disables; static char *extra_extensions = NULL; -static char *cant_disable_extensions = NULL; /** @@ -68,29 +67,30 @@ const struct mesa_extension _mesa_extension_table[] = { #undef EXT }; +static bool disabled_extensions[ARRAY_SIZE(_mesa_extension_table)]; /** * Given an extension name, lookup up the corresponding member of struct - * gl_extensions and return that member's offset (in bytes). If the name is - * not found in the \c _mesa_extension_table, return 0. + * gl_extensions and return that member's index. If the name is + * not found in the \c _mesa_extension_table, return -1. * * \param name Name of extension. - * \return Offset of member in struct gl_extensions. + * \return Index of member in struct gl_extensions. */ -static size_t -name_to_offset(const char* name) +static int +name_to_index(const char* name) { unsigned i; if (name == 0) - return 0; + return -1; for (i = 0; i < ARRAY_SIZE(_mesa_extension_table); ++i) { if (strcmp(name, _mesa_extension_table[i].name) == 0) - return _mesa_extension_table[i].offset; + return i; } - return 0; + return -1; } /** @@ -206,11 +206,11 @@ _mesa_enable_sw_extensions(struct gl_context *ctx) * \return offset of extensions withint `ext' or 0 if extension is not known */ static size_t -set_extension(struct gl_extensions *ext, const char *name, GLboolean state) +set_extension(struct gl_extensions *ext, int i, GLboolean state) { size_t offset; - offset = name_to_offset(name); + offset = i < 0 ? 0 : _mesa_extension_table[i].offset; if (offset != 0 && (offset != o(dummy_true) || state != GL_FALSE)) { ((GLboolean *) ext)[offset] = state; } @@ -240,12 +240,6 @@ get_extension_override( struct gl_context *ctx ) { override_extensions_in_context(ctx); - if (cant_disable_extensions != NULL) { - _mesa_problem(ctx, - "Trying to disable permanently enabled extensions: %s", - cant_disable_extensions); - } - if (extra_extensions == NULL) { return calloc(1, sizeof(char)); } else { @@ -257,7 +251,7 @@ get_extension_override( struct gl_context *ctx ) /** - * \brief Free extra_extensions and cant_disable_extensions strings + * \brief Free extra_extensions string * * These strings are allocated early during the first context creation by * _mesa_one_time_init_extension_overrides. @@ -266,7 +260,6 @@ static void free_unknown_extensions_strings(void) { free(extra_extensions); - free(cant_disable_extensions); } @@ -295,21 +288,20 @@ _mesa_one_time_init_extension_overrides(void) /* extra_exts: List of unrecognized extensions. */ extra_extensions = calloc(ALIGN(strlen(env_const) + 2, 4), sizeof(char)); - cant_disable_extensions = calloc(ALIGN(strlen(env_const) + 2, 4), sizeof(char)); /* Copy env_const because strtok() is destructive. */ env = strdup(env_const); - if (env == NULL || extra_extensions == NULL || - cant_disable_extensions == NULL) { - free(env); - free(extra_extensions); - free(cant_disable_extensions); - return; + if (env == NULL || + extra_extensions == NULL) { + free(env); + free(extra_extensions); + return; } for (ext = strtok(env, " "); ext != NULL; ext = strtok(NULL, " ")) { int enable; + int i; bool recognized; switch (ext[0]) { case '+': @@ -325,7 +317,8 @@ _mesa_one_time_init_extension_overrides(void) break; } - offset = set_extension(&_mesa_extension_override_enables, ext, enable); + i = name_to_index(ext); + offset = set_extension(&_mesa_extension_override_enables, i, enable); if (offset != 0 && (offset != o(dummy_true) || enable != GL_FALSE)) { ((GLboolean *) &_mesa_extension_override_disables)[offset] = !enable; recognized = true; @@ -333,14 +326,12 @@ _mesa_one_time_init_extension_overrides(void) recognized = false; } - if (!recognized) { - if (enable) { - strcat(extra_extensions, ext); - strcat(extra_extensions, " "); - } else if (offset == o(dummy_true)) { - strcat(cant_disable_extensions, ext); - strcat(cant_disable_extensions, " "); - } + if (i >= 0) + disabled_extensions[i] = !enable; + + if (!recognized && enable) { + strcat(extra_extensions, ext); + strcat(extra_extensions, " "); } } @@ -354,13 +345,6 @@ _mesa_one_time_init_extension_overrides(void) } else if (extra_extensions[len - 1] == ' ') { extra_extensions[len - 1] = '\0'; } - len = strlen(cant_disable_extensions); - if (len == 0) { - free(cant_disable_extensions); - cant_disable_extensions = NULL; - } else if (cant_disable_extensions[len - 1] == ' ') { - cant_disable_extensions[len - 1] = '\0'; - } } @@ -401,7 +385,8 @@ _mesa_extension_supported(const struct gl_context *ctx, extension_index i) const bool *base = (bool *) &ctx->Extensions; const struct mesa_extension *ext = _mesa_extension_table + i; - return (ctx->Version >= ext->version[ctx->API]) && base[ext->offset]; + return !disabled_extensions[i] && + (ctx->Version >= ext->version[ctx->API]) && base[ext->offset]; } /** diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index d12fd9f1c8d..051d69a3613 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -1,8 +1,31 @@ +/* The extension table is alphabetically sorted by the extension name string column. */ + #define GLL 0 #define GLC 0 #define ES1 0 #define ES2 0 #define x ~0 + +EXT(3DFX_texture_compression_FXT1 , TDFX_texture_compression_FXT1 , GLL, GLC, x , x , 1999) + +EXT(AMD_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2009) +EXT(AMD_draw_buffers_blend , ARB_draw_buffers_blend , GLL, GLC, x , x , 2009) +EXT(AMD_performance_monitor , AMD_performance_monitor , GLL, GLC, x , x , 2007) +EXT(AMD_pinned_memory , AMD_pinned_memory , GLL, GLC, x , x , 2013) +EXT(AMD_seamless_cubemap_per_texture , AMD_seamless_cubemap_per_texture , GLL, GLC, x , x , 2009) +EXT(AMD_shader_stencil_export , ARB_shader_stencil_export , GLL, GLC, x , x , 2009) +EXT(AMD_shader_trinary_minmax , dummy_true , GLL, GLC, x , x , 2012) +EXT(AMD_vertex_shader_layer , AMD_vertex_shader_layer , x , GLC, x , x , 2012) +EXT(AMD_vertex_shader_viewport_index , AMD_vertex_shader_viewport_index , x , GLC, x , x , 2012) + +EXT(ANGLE_texture_compression_dxt3 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011) +EXT(ANGLE_texture_compression_dxt5 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011) + +EXT(APPLE_object_purgeable , APPLE_object_purgeable , GLL, GLC, x , x , 2006) +EXT(APPLE_packed_pixels , dummy_true , GLL, x , x , x , 2002) +EXT(APPLE_texture_max_level , dummy_true , x , x , ES1, ES2, 2009) +EXT(APPLE_vertex_array_object , dummy_true , GLL, x , x , x , 2002) + EXT(ARB_ES2_compatibility , ARB_ES2_compatibility , GLL, GLC, x , x , 2009) EXT(ARB_ES3_compatibility , ARB_ES3_compatibility , GLL, GLC, x , x , 2012) EXT(ARB_arrays_of_arrays , ARB_arrays_of_arrays , GLL, GLC, x , x , 2012) @@ -16,9 +39,9 @@ EXT(ARB_color_buffer_float , ARB_color_buffer_float EXT(ARB_compressed_texture_pixel_storage , dummy_true , GLL, GLC, x , x , 2011) EXT(ARB_compute_shader , ARB_compute_shader , GLL, GLC, x , x , 2012) EXT(ARB_conditional_render_inverted , ARB_conditional_render_inverted , GLL, GLC, x , x , 2014) +EXT(ARB_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2011) EXT(ARB_copy_buffer , dummy_true , GLL, GLC, x , x , 2008) EXT(ARB_copy_image , ARB_copy_image , GLL, GLC, x , x , 2012) -EXT(ARB_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2011) EXT(ARB_debug_output , dummy_true , GLL, GLC, x , x , 2009) EXT(ARB_depth_buffer_float , ARB_depth_buffer_float , GLL, GLC, x , x , 2008) EXT(ARB_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2003) @@ -56,8 +79,8 @@ EXT(ARB_multi_bind , dummy_true EXT(ARB_multi_draw_indirect , ARB_draw_indirect , x , GLC, x , x , 2012) EXT(ARB_multisample , dummy_true , GLL, x , x , x , 1994) EXT(ARB_multitexture , dummy_true , GLL, x , x , x , 1998) -EXT(ARB_occlusion_query2 , ARB_occlusion_query2 , GLL, GLC, x , x , 2003) EXT(ARB_occlusion_query , ARB_occlusion_query , GLL, x , x , x , 2001) +EXT(ARB_occlusion_query2 , ARB_occlusion_query2 , GLL, GLC, x , x , 2003) EXT(ARB_pipeline_statistics_query , ARB_pipeline_statistics_query , GLL, GLC, x , x , 2014) EXT(ARB_pixel_buffer_object , EXT_pixel_buffer_object , GLL, GLC, x , x , 2004) EXT(ARB_point_parameters , EXT_point_parameters , GLL, x , x , x , 1997) @@ -83,13 +106,13 @@ EXT(ARB_shader_subroutine , ARB_shader_subroutine EXT(ARB_shader_texture_image_samples , ARB_shader_texture_image_samples , GLL, GLC, x , x , 2014) EXT(ARB_shader_texture_lod , ARB_shader_texture_lod , GLL, GLC, x , x , 2009) EXT(ARB_shading_language_100 , dummy_true , GLL, x , x , x , 2003) -EXT(ARB_shading_language_packing , ARB_shading_language_packing , GLL, GLC, x , x , 2011) EXT(ARB_shading_language_420pack , ARB_shading_language_420pack , GLL, GLC, x , x , 2011) +EXT(ARB_shading_language_packing , ARB_shading_language_packing , GLL, GLC, x , x , 2011) EXT(ARB_shadow , ARB_shadow , GLL, x , x , x , 2001) EXT(ARB_stencil_texturing , ARB_stencil_texturing , GLL, GLC, x , x , 2012) EXT(ARB_sync , ARB_sync , GLL, GLC, x , x , 2003) -EXT(ARB_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2014) EXT(ARB_tessellation_shader , ARB_tessellation_shader , x , GLC, x , x , 2009) +EXT(ARB_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2014) EXT(ARB_texture_border_clamp , ARB_texture_border_clamp , GLL, x , x , x , 2000) EXT(ARB_texture_buffer_object , ARB_texture_buffer_object , x , GLC, x , x , 2008) EXT(ARB_texture_buffer_object_rgb32 , ARB_texture_buffer_object_rgb32 , x , GLC, x , x , 2009) @@ -105,20 +128,20 @@ EXT(ARB_texture_env_crossbar , ARB_texture_env_crossbar EXT(ARB_texture_env_dot3 , ARB_texture_env_dot3 , GLL, x , x , x , 2001) EXT(ARB_texture_float , ARB_texture_float , GLL, GLC, x , x , 2004) EXT(ARB_texture_gather , ARB_texture_gather , GLL, GLC, x , x , 2009) -EXT(ARB_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 2001) EXT(ARB_texture_mirror_clamp_to_edge , ARB_texture_mirror_clamp_to_edge , GLL, GLC, x , x , 2013) +EXT(ARB_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 2001) EXT(ARB_texture_multisample , ARB_texture_multisample , GLL, GLC, x , x , 2009) EXT(ARB_texture_non_power_of_two , ARB_texture_non_power_of_two , GLL, GLC, x , x , 2003) EXT(ARB_texture_query_levels , ARB_texture_query_levels , GLL, GLC, x , x , 2012) EXT(ARB_texture_query_lod , ARB_texture_query_lod , GLL, GLC, x , x , 2009) EXT(ARB_texture_rectangle , NV_texture_rectangle , GLL, GLC, x , x , 2004) -EXT(ARB_texture_rgb10_a2ui , ARB_texture_rgb10_a2ui , GLL, GLC, x , x , 2009) EXT(ARB_texture_rg , ARB_texture_rg , GLL, GLC, x , x , 2008) +EXT(ARB_texture_rgb10_a2ui , ARB_texture_rgb10_a2ui , GLL, GLC, x , x , 2009) EXT(ARB_texture_stencil8 , ARB_texture_stencil8 , GLL, GLC, x , x , 2013) EXT(ARB_texture_storage , dummy_true , GLL, GLC, x , x , 2011) EXT(ARB_texture_storage_multisample , ARB_texture_multisample , GLL, GLC, x , x , 2012) -EXT(ARB_texture_view , ARB_texture_view , GLL, GLC, x , x , 2012) EXT(ARB_texture_swizzle , EXT_texture_swizzle , GLL, GLC, x , x , 2008) +EXT(ARB_texture_view , ARB_texture_view , GLL, GLC, x , x , 2012) EXT(ARB_timer_query , ARB_timer_query , GLL, GLC, x , x , 2010) EXT(ARB_transform_feedback2 , ARB_transform_feedback2 , GLL, GLC, x , x , 2010) EXT(ARB_transform_feedback3 , ARB_transform_feedback3 , GLL, GLC, x , x , 2010) @@ -127,28 +150,39 @@ EXT(ARB_transpose_matrix , dummy_true EXT(ARB_uniform_buffer_object , ARB_uniform_buffer_object , GLL, GLC, x , x , 2009) EXT(ARB_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008) EXT(ARB_vertex_array_object , dummy_true , GLL, GLC, x , x , 2006) +EXT(ARB_vertex_attrib_64bit , ARB_vertex_attrib_64bit , x , GLC, x , x , 2010) EXT(ARB_vertex_attrib_binding , dummy_true , GLL, GLC, x , x , 2012) EXT(ARB_vertex_buffer_object , dummy_true , GLL, x , x , x , 2003) EXT(ARB_vertex_program , ARB_vertex_program , GLL, x , x , x , 2002) EXT(ARB_vertex_shader , ARB_vertex_shader , GLL, GLC, x , x , 2002) -EXT(ARB_vertex_attrib_64bit , ARB_vertex_attrib_64bit , x , GLC, x , x , 2010) EXT(ARB_vertex_type_10f_11f_11f_rev , ARB_vertex_type_10f_11f_11f_rev , GLL, GLC, x , x , 2013) EXT(ARB_vertex_type_2_10_10_10_rev , ARB_vertex_type_2_10_10_10_rev , GLL, GLC, x , x , 2009) EXT(ARB_viewport_array , ARB_viewport_array , x , GLC, x , x , 2010) EXT(ARB_window_pos , dummy_true , GLL, x , x , x , 2001) +EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003) +EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002) +EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001) +EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006) +EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004) +EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002) +EXT(ATI_texture_float , ARB_texture_float , GLL, GLC, x , x , 2002) +EXT(ATI_texture_mirror_once , ATI_texture_mirror_once , GLL, GLC, x , x , 2006) + EXT(EXT_abgr , dummy_true , GLL, GLC, x , x , 1995) EXT(EXT_bgra , dummy_true , GLL, x , x , x , 1995) EXT(EXT_blend_color , EXT_blend_color , GLL, x , x , x , 1995) EXT(EXT_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003) +EXT(EXT_blend_func_extended , ARB_blend_func_extended , x , x , x , ES2, 2015) EXT(EXT_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999) -EXT(EXT_buffer_storage , ARB_buffer_storage , x , x , x , 31, 2015) -EXT(EXT_discard_framebuffer , dummy_true , x , x , ES1, ES2, 2009) EXT(EXT_blend_minmax , EXT_blend_minmax , GLL, x , ES1, ES2, 1995) EXT(EXT_blend_subtract , dummy_true , GLL, x , x , x , 1995) +EXT(EXT_buffer_storage , ARB_buffer_storage , x , x , x , 31, 2015) +EXT(EXT_color_buffer_float , dummy_true , x , x , ES1, 30, 2013) EXT(EXT_compiled_vertex_array , dummy_true , GLL, x , x , x , 1996) EXT(EXT_copy_texture , dummy_true , GLL, x , x , x , 1995) EXT(EXT_depth_bounds_test , EXT_depth_bounds_test , GLL, GLC, x , x , 2002) +EXT(EXT_discard_framebuffer , dummy_true , x , x , ES1, ES2, 2009) EXT(EXT_draw_buffers , dummy_true , x , x , x , ES2, 2012) EXT(EXT_draw_buffers2 , EXT_draw_buffers2 , GLL, GLC, x , x , 2006) EXT(EXT_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014) @@ -172,20 +206,21 @@ EXT(EXT_point_parameters , EXT_point_parameters EXT(EXT_polygon_offset , dummy_true , GLL, x , x , x , 1995) EXT(EXT_polygon_offset_clamp , EXT_polygon_offset_clamp , GLL, GLC, x , x , 2014) EXT(EXT_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009) +EXT(EXT_read_format_bgra , dummy_true , x , x , ES1, ES2, 2009) EXT(EXT_rescale_normal , dummy_true , GLL, x , x , x , 1997) EXT(EXT_secondary_color , dummy_true , GLL, x , x , x , 1999) EXT(EXT_separate_shader_objects , dummy_true , x , x , x , ES2, 2013) EXT(EXT_separate_specular_color , dummy_true , GLL, x , x , x , 1997) -EXT(EXT_shader_integer_mix , EXT_shader_integer_mix , GLL, GLC, ES1, 30, 2013) +EXT(EXT_shader_integer_mix , EXT_shader_integer_mix , GLL, GLC, x , 30, 2013) +EXT(EXT_shader_samples_identical , EXT_shader_samples_identical , GLL, GLC, x , 31, 2015) EXT(EXT_shadow_funcs , ARB_shadow , GLL, x , x , x , 2002) EXT(EXT_stencil_two_side , EXT_stencil_two_side , GLL, x , x , x , 2001) EXT(EXT_stencil_wrap , dummy_true , GLL, x , x , x , 2002) EXT(EXT_subtexture , dummy_true , GLL, x , x , x , 1995) +EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture3D , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture_array , EXT_texture_array , GLL, GLC, x , x , 2006) EXT(EXT_texture_compression_dxt1 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2004) -EXT(ANGLE_texture_compression_dxt3 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011) -EXT(ANGLE_texture_compression_dxt5 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011) EXT(EXT_texture_compression_latc , EXT_texture_compression_latc , GLL, x , x , x , 2006) EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004) EXT(EXT_texture_compression_s3tc , EXT_texture_compression_s3tc , GLL, GLC, x , x , 2000) @@ -196,28 +231,66 @@ EXT(EXT_texture_env_combine , dummy_true EXT(EXT_texture_env_dot3 , EXT_texture_env_dot3 , GLL, x , x , x , 2000) EXT(EXT_texture_filter_anisotropic , EXT_texture_filter_anisotropic , GLL, GLC, ES1, ES2, 1999) EXT(EXT_texture_format_BGRA8888 , dummy_true , x , x , ES1, ES2, 2005) -EXT(EXT_texture_rg , ARB_texture_rg , x , x , x , ES2, 2011) -EXT(EXT_read_format_bgra , dummy_true , x , x , ES1, ES2, 2009) EXT(EXT_texture_integer , EXT_texture_integer , GLL, GLC, x , x , 2006) EXT(EXT_texture_lod_bias , dummy_true , GLL, x , ES1, x , 1999) EXT(EXT_texture_mirror_clamp , EXT_texture_mirror_clamp , GLL, GLC, x , x , 2004) EXT(EXT_texture_object , dummy_true , GLL, x , x , x , 1995) -EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2004) -EXT(EXT_texture_shared_exponent , EXT_texture_shared_exponent , GLL, GLC, x , x , 2004) -EXT(EXT_texture_snorm , EXT_texture_snorm , GLL, GLC, x , x , 2009) +EXT(EXT_texture_rg , ARB_texture_rg , x , x , x , ES2, 2011) EXT(EXT_texture_sRGB , EXT_texture_sRGB , GLL, GLC, x , x , 2004) EXT(EXT_texture_sRGB_decode , EXT_texture_sRGB_decode , GLL, GLC, x , x , 2006) +EXT(EXT_texture_shared_exponent , EXT_texture_shared_exponent , GLL, GLC, x , x , 2004) +EXT(EXT_texture_snorm , EXT_texture_snorm , GLL, GLC, x , x , 2009) EXT(EXT_texture_swizzle , EXT_texture_swizzle , GLL, GLC, x , x , 2008) EXT(EXT_texture_type_2_10_10_10_REV , dummy_true , x , x , x , ES2, 2008) EXT(EXT_timer_query , EXT_timer_query , GLL, GLC, x , x , 2006) EXT(EXT_transform_feedback , EXT_transform_feedback , GLL, GLC, x , x , 2011) EXT(EXT_unpack_subimage , dummy_true , x , x , x , ES2, 2011) -EXT(EXT_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008) EXT(EXT_vertex_array , dummy_true , GLL, x , x , x , 1995) -EXT(EXT_color_buffer_float , dummy_true , x , x , ES1, 30, 2013) +EXT(EXT_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008) +EXT(IBM_multimode_draw_arrays , dummy_true , GLL, GLC, x , x , 1998) +EXT(IBM_rasterpos_clip , dummy_true , GLL, x , x , x , 1996) +EXT(IBM_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 1998) +EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999) + +EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013) + +EXT(KHR_context_flush_control , dummy_true , GLL, GLC, x , ES2, 2014) +EXT(KHR_debug , dummy_true , GLL, GLC, ES1, ES2, 2012) +EXT(KHR_texture_compression_astc_hdr , KHR_texture_compression_astc_hdr , GLL, GLC, x , ES2, 2012) +EXT(KHR_texture_compression_astc_ldr , KHR_texture_compression_astc_ldr , GLL, GLC, x , ES2, 2012) + +EXT(MESA_pack_invert , MESA_pack_invert , GLL, GLC, x , x , 2002) +EXT(MESA_texture_signed_rgba , EXT_texture_snorm , GLL, GLC, x , x , 2009) +EXT(MESA_window_pos , dummy_true , GLL, x , x , x , 2000) +EXT(MESA_ycbcr_texture , MESA_ycbcr_texture , GLL, GLC, x , x , 2002) + +EXT(NV_blend_square , dummy_true , GLL, x , x , x , 1999) +EXT(NV_conditional_render , NV_conditional_render , GLL, GLC, x , x , 2008) +EXT(NV_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2001) +EXT(NV_draw_buffers , dummy_true , x , x , x , ES2, 2011) +EXT(NV_fbo_color_attachments , dummy_true , x , x , x , ES2, 2010) +EXT(NV_fog_distance , NV_fog_distance , GLL, x , x , x , 2001) +EXT(NV_fragment_program_option , NV_fragment_program_option , GLL, x , x , x , 2005) +EXT(NV_light_max_exponent , dummy_true , GLL, x , x , x , 1999) +EXT(NV_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2000) +EXT(NV_point_sprite , NV_point_sprite , GLL, GLC, x , x , 2001) +EXT(NV_primitive_restart , NV_primitive_restart , GLL, x , x , x , 2002) +EXT(NV_read_buffer , dummy_true , x , x , x , ES2, 2011) +EXT(NV_read_depth , dummy_true , x , x , x , ES2, 2011) +EXT(NV_read_depth_stencil , dummy_true , x , x , x , ES2, 2011) +EXT(NV_read_stencil , dummy_true , x , x , x , ES2, 2011) +EXT(NV_texgen_reflection , dummy_true , GLL, x , x , x , 1999) +EXT(NV_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2009) +EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999) +EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000) +EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010) + +EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */ +EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010) +EXT(OES_EGL_sync , dummy_true , x , x , ES1, ES2, 2010) EXT(OES_blend_equation_separate , EXT_blend_equation_separate , x , x , ES1, x , 2009) EXT(OES_blend_func_separate , EXT_blend_func_separate , x , x , ES1, x , 2009) EXT(OES_blend_subtract , dummy_true , x , x , ES1, x , 2009) @@ -230,9 +303,6 @@ EXT(OES_depth_texture , ARB_depth_texture EXT(OES_depth_texture_cube_map , OES_depth_texture_cube_map , x , x , x , ES2, 2012) EXT(OES_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014) EXT(OES_draw_texture , OES_draw_texture , x , x , ES1, x , 2004) -EXT(OES_EGL_sync , dummy_true , x , x , ES1, ES2, 2010) -EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */ -EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010) EXT(OES_element_index_uint , dummy_true , x , x , ES1, ES2, 2005) EXT(OES_fbo_render_mipmap , dummy_true , x , x , ES1, ES2, 2005) EXT(OES_fixed_point , dummy_true , x , x , ES1, x , 2002) @@ -260,73 +330,17 @@ EXT(OES_texture_float_linear , OES_texture_float_linear EXT(OES_texture_half_float , OES_texture_half_float , x , x , x , ES2, 2005) EXT(OES_texture_half_float_linear , OES_texture_half_float_linear , x , x , x , ES2, 2005) EXT(OES_texture_mirrored_repeat , dummy_true , x , x , ES1, x , 2005) -EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014) EXT(OES_texture_npot , ARB_texture_non_power_of_two , x , x , ES1, ES2, 2005) +EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014) EXT(OES_vertex_array_object , dummy_true , x , x , ES1, ES2, 2010) - -EXT(KHR_debug , dummy_true , GLL, GLC, x , x , 2012) -EXT(KHR_context_flush_control , dummy_true , GLL, GLC, x , ES2, 2014) -EXT(KHR_texture_compression_astc_hdr , KHR_texture_compression_astc_hdr , GLL, GLC, x , ES2, 2012) -EXT(KHR_texture_compression_astc_ldr , KHR_texture_compression_astc_ldr , GLL, GLC, x , ES2, 2012) - - -EXT(3DFX_texture_compression_FXT1 , TDFX_texture_compression_FXT1 , GLL, GLC, x , x , 1999) -EXT(AMD_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2009) -EXT(AMD_draw_buffers_blend , ARB_draw_buffers_blend , GLL, GLC, x , x , 2009) -EXT(AMD_performance_monitor , AMD_performance_monitor , GLL, GLC, x , x , 2007) -EXT(AMD_pinned_memory , AMD_pinned_memory , GLL, GLC, x , x , 2013) -EXT(AMD_seamless_cubemap_per_texture , AMD_seamless_cubemap_per_texture , GLL, GLC, x , x , 2009) -EXT(AMD_shader_stencil_export , ARB_shader_stencil_export , GLL, GLC, x , x , 2009) -EXT(AMD_shader_trinary_minmax , dummy_true , GLL, GLC, x , x , 2012) -EXT(AMD_vertex_shader_layer , AMD_vertex_shader_layer , x , GLC, x , x , 2012) -EXT(AMD_vertex_shader_viewport_index , AMD_vertex_shader_viewport_index , x , GLC, x , x , 2012) -EXT(APPLE_object_purgeable , APPLE_object_purgeable , GLL, GLC, x , x , 2006) -EXT(APPLE_packed_pixels , dummy_true , GLL, x , x , x , 2002) -EXT(APPLE_texture_max_level , dummy_true , x , x , ES1, ES2, 2009) -EXT(APPLE_vertex_array_object , dummy_true , GLL, x , x , x , 2002) -EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003) -EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002) -EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001) -EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006) -EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004) -EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002) -EXT(ATI_texture_float , ARB_texture_float , GLL, GLC, x , x , 2002) -EXT(ATI_texture_mirror_once , ATI_texture_mirror_once , GLL, GLC, x , x , 2006) -EXT(IBM_multimode_draw_arrays , dummy_true , GLL, GLC, x , x , 1998) -EXT(IBM_rasterpos_clip , dummy_true , GLL, x , x , x , 1996) -EXT(IBM_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 1998) -EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999) -EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013) -EXT(MESA_pack_invert , MESA_pack_invert , GLL, GLC, x , x , 2002) -EXT(MESA_texture_signed_rgba , EXT_texture_snorm , GLL, GLC, x , x , 2009) -EXT(MESA_window_pos , dummy_true , GLL, x , x , x , 2000) -EXT(MESA_ycbcr_texture , MESA_ycbcr_texture , GLL, GLC, x , x , 2002) -EXT(NV_blend_square , dummy_true , GLL, x , x , x , 1999) -EXT(NV_conditional_render , NV_conditional_render , GLL, GLC, x , x , 2008) -EXT(NV_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2001) -EXT(NV_draw_buffers , dummy_true , x , x , x , ES2, 2011) -EXT(NV_fbo_color_attachments , dummy_true , x , x , x , ES2, 2010) -EXT(NV_fog_distance , NV_fog_distance , GLL, x , x , x , 2001) -EXT(NV_fragment_program_option , NV_fragment_program_option , GLL, x , x , x , 2005) -EXT(NV_light_max_exponent , dummy_true , GLL, x , x , x , 1999) -EXT(NV_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2000) -EXT(NV_point_sprite , NV_point_sprite , GLL, GLC, x , x , 2001) -EXT(NV_primitive_restart , NV_primitive_restart , GLL, x , x , x , 2002) -EXT(NV_read_buffer , dummy_true , x , x , x , ES2, 2011) -EXT(NV_read_depth , dummy_true , x , x , x , ES2, 2011) -EXT(NV_read_depth_stencil , dummy_true , x , x , x , ES2, 2011) -EXT(NV_read_stencil , dummy_true , x , x , x , ES2, 2011) -EXT(NV_texgen_reflection , dummy_true , GLL, x , x , x , 1999) -EXT(NV_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2009) -EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999) -EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000) -EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010) EXT(S3_s3tc , ANGLE_texture_compression_dxt , GLL, GLC, x , x , 1999) + EXT(SGIS_generate_mipmap , dummy_true , GLL, x , x , x , 1997) EXT(SGIS_texture_border_clamp , ARB_texture_border_clamp , GLL, x , x , x , 1997) EXT(SGIS_texture_edge_clamp , dummy_true , GLL, x , x , x , 1997) EXT(SGIS_texture_lod , dummy_true , GLL, x , x , x , 1997) + EXT(SUN_multi_draw_arrays , dummy_true , GLL, x , x , x , 1999) #undef GLL #undef GLC diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c index 45f343d61c8..1ad939cfde6 100644 --- a/src/mesa/main/fog.c +++ b/src/mesa/main/fog.c @@ -190,7 +190,7 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params ) } if (ctx->Driver.Fogfv) { - (*ctx->Driver.Fogfv)( ctx, pname, params ); + ctx->Driver.Fogfv( ctx, pname, params ); } return; diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index fbc7b8f8602..9b22b91ac1b 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -330,6 +330,9 @@ descriptor=[ # GL_KHR_context_flush_control [ "CONTEXT_RELEASE_BEHAVIOR", "CONTEXT_ENUM(Const.ContextReleaseBehavior), NO_EXTRA" ], + +# blend_func_extended + [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ], ]}, # GLES3 is not a typo. @@ -801,7 +804,6 @@ descriptor=[ # GL_ARB_robustness [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ], - [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ], # GL_ARB_uniform_buffer_object [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ], diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c index 9873fdbf1a4..87c5a3a194f 100644 --- a/src/mesa/main/getstring.c +++ b/src/mesa/main/getstring.c @@ -121,7 +121,7 @@ _mesa_GetString( GLenum name ) assert(ctx->Driver.GetString); { /* Give the driver the chance to handle this query */ - const GLubyte *str = (*ctx->Driver.GetString)(ctx, name); + const GLubyte *str = ctx->Driver.GetString(ctx, name); if (str) return str; } @@ -203,12 +203,18 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params ) { GET_CURRENT_CONTEXT(ctx); const GLuint clientUnit = ctx->Array.ActiveTexture; + const char *callerstr; + + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glGetPointerv"; + else + callerstr = "glGetPointervKHR"; if (!params) return; if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glGetPointerv %s\n", _mesa_enum_to_string(pname)); + _mesa_debug(ctx, "%s %s\n", callerstr, _mesa_enum_to_string(pname)); switch (pname) { case GL_VERTEX_ARRAY_POINTER: @@ -268,10 +274,7 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params ) break; case GL_DEBUG_CALLBACK_FUNCTION_ARB: case GL_DEBUG_CALLBACK_USER_PARAM_ARB: - if (!_mesa_is_desktop_gl(ctx)) - goto invalid_pname; - else - *params = _mesa_get_debug_state_ptr(ctx, pname); + *params = _mesa_get_debug_state_ptr(ctx, pname); break; default: goto invalid_pname; @@ -280,7 +283,7 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params ) return; invalid_pname: - _mesa_error( ctx, GL_INVALID_ENUM, "glGetPointerv" ); + _mesa_error( ctx, GL_INVALID_ENUM, "%s", callerstr); return; } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 95cbba4ed57..4a849fb090d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2193,6 +2193,7 @@ struct gl_ati_fragment_shader_state struct gl_subroutine_function { char *name; + int index; int num_compat_types; const struct glsl_type **types; }; @@ -3766,6 +3767,7 @@ struct gl_extensions GLboolean EXT_polygon_offset_clamp; GLboolean EXT_provoking_vertex; GLboolean EXT_shader_integer_mix; + GLboolean EXT_shader_samples_identical; GLboolean EXT_stencil_two_side; GLboolean EXT_texture_array; GLboolean EXT_texture_compression_latc; diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c index 1019f893ba8..41f370ce485 100644 --- a/src/mesa/main/objectlabel.c +++ b/src/mesa/main/objectlabel.c @@ -243,13 +243,19 @@ _mesa_ObjectLabel(GLenum identifier, GLuint name, GLsizei length, const GLchar *label) { GET_CURRENT_CONTEXT(ctx); + const char *callerstr; char **labelPtr; - labelPtr = get_label_pointer(ctx, identifier, name, "glObjectLabel"); + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glObjectLabel"; + else + callerstr = "glObjectLabelKHR"; + + labelPtr = get_label_pointer(ctx, identifier, name, callerstr); if (!labelPtr) return; - set_label(ctx, labelPtr, label, length, "glObjectLabel"); + set_label(ctx, labelPtr, label, length, callerstr); } void GLAPIENTRY @@ -257,15 +263,21 @@ _mesa_GetObjectLabel(GLenum identifier, GLuint name, GLsizei bufSize, GLsizei *length, GLchar *label) { GET_CURRENT_CONTEXT(ctx); + const char *callerstr; char **labelPtr; + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glGetObjectLabel"; + else + callerstr = "glGetObjectLabelKHR"; + if (bufSize < 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectLabel(bufSize = %d)", + _mesa_error(ctx, GL_INVALID_VALUE, "%s(bufSize = %d)", callerstr, bufSize); return; } - labelPtr = get_label_pointer(ctx, identifier, name, "glGetObjectLabel"); + labelPtr = get_label_pointer(ctx, identifier, name, callerstr); if (!labelPtr) return; @@ -276,17 +288,24 @@ void GLAPIENTRY _mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label) { GET_CURRENT_CONTEXT(ctx); - char **labelPtr; struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr; + const char *callerstr; + char **labelPtr; + + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glObjectPtrLabel"; + else + callerstr = "glObjectPtrLabelKHR"; if (!_mesa_validate_sync(ctx, syncObj)) { - _mesa_error(ctx, GL_INVALID_VALUE, "glObjectPtrLabel (not a valid sync object)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)", + callerstr); return; } labelPtr = &syncObj->Label; - set_label(ctx, labelPtr, label, length, "glObjectPtrLabel"); + set_label(ctx, labelPtr, label, length, callerstr); } void GLAPIENTRY @@ -294,17 +313,24 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label) { GET_CURRENT_CONTEXT(ctx); - char **labelPtr; struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr; + const char *callerstr; + char **labelPtr; + + if (_mesa_is_desktop_gl(ctx)) + callerstr = "glGetObjectPtrLabel"; + else + callerstr = "glGetObjectPtrLabelKHR"; if (bufSize < 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectPtrLabel(bufSize = %d)", + _mesa_error(ctx, GL_INVALID_VALUE, "%s(bufSize = %d)", callerstr, bufSize); return; } if (!_mesa_validate_sync(ctx, syncObj)) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectPtrLabel (not a valid sync object)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)", + callerstr); return; } diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c index 863e3c1af32..c2f2b6399cb 100644 --- a/src/mesa/main/points.c +++ b/src/mesa/main/points.c @@ -209,7 +209,7 @@ _mesa_PointParameterfv( GLenum pname, const GLfloat *params) } if (ctx->Driver.PointParameterfv) - (*ctx->Driver.PointParameterfv)(ctx, pname, params); + ctx->Driver.PointParameterfv(ctx, pname, params); } diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 58ba04153e6..79a91b5b6bd 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -661,6 +661,13 @@ _mesa_program_resource_index(struct gl_shader_program *shProg, switch (res->Type) { case GL_ATOMIC_COUNTER_BUFFER: return RESOURCE_ATC(res) - shProg->AtomicBuffers; + case GL_VERTEX_SUBROUTINE: + case GL_GEOMETRY_SUBROUTINE: + case GL_FRAGMENT_SUBROUTINE: + case GL_COMPUTE_SUBROUTINE: + case GL_TESS_CONTROL_SUBROUTINE: + case GL_TESS_EVALUATION_SUBROUTINE: + return RESOURCE_SUB(res)->index; case GL_UNIFORM_BLOCK: case GL_SHADER_STORAGE_BLOCK: case GL_TRANSFORM_FEEDBACK_VARYING: @@ -1413,9 +1420,19 @@ _mesa_validate_pipeline_io(struct gl_pipeline_object *pipeline) for (idx = prev + 1; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) { if (shProg[idx]) { - if (!validate_io(shProg[prev]->_LinkedShaders[prev], - shProg[idx]->_LinkedShaders[idx])) - return false; + /* Since we now only validate precision, we can skip this step for + * desktop GLSL shaders, there precision qualifier is ignored. + * + * From OpenGL 4.50 Shading Language spec, section 4.7: + * "For the purposes of determining if an output from one shader + * stage matches an input of the next stage, the precision + * qualifier need not match." + */ + if (shProg[prev]->IsES || shProg[idx]->IsES) { + if (!validate_io(shProg[prev]->_LinkedShaders[prev], + shProg[idx]->_LinkedShaders[idx])) + return false; + } prev = idx; } } diff --git a/src/mesa/main/tests/Makefile.am b/src/mesa/main/tests/Makefile.am index bd7ab7365c0..d6977e20e85 100644 --- a/src/mesa/main/tests/Makefile.am +++ b/src/mesa/main/tests/Makefile.am @@ -27,6 +27,7 @@ AM_CPPFLAGS += -DHAVE_SHARED_GLAPI main_test_SOURCES += \ dispatch_sanity.cpp \ mesa_formats.cpp \ + mesa_extensions.cpp \ program_state_string.cpp main_test_LDADD += \ diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index abe0f432572..97f81f932f6 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2421,6 +2421,11 @@ const struct function gles3_functions_possible[] = { { "glProgramUniform4uiEXT", 30, -1 }, { "glProgramUniform4uivEXT", 30, -1 }, + /* GL_EXT_blend_func_extended */ + { "glBindFragDataLocationIndexedEXT", 30, -1 }, + { "glGetFragDataIndexEXT", 30, -1 }, + { "glBindFragDataLocationEXT", 30, -1 }, + { NULL, 0, -1 } }; @@ -2509,5 +2514,8 @@ const struct function gles31_functions_possible[] = { /* GL_EXT_buffer_storage */ { "glBufferStorageEXT", 31, -1 }, + /* GL_EXT_blend_func_extended */ + { "glGetProgramResourceLocationIndexEXT", 31, -1 }, + { NULL, 0, -1 }, }; diff --git a/src/mesa/main/tests/mesa_extensions.cpp b/src/mesa/main/tests/mesa_extensions.cpp new file mode 100644 index 00000000000..0c7addd4282 --- /dev/null +++ b/src/mesa/main/tests/mesa_extensions.cpp @@ -0,0 +1,51 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \name mesa_extensions.cpp + * + * Verify that the extensions table is sorted. + */ + +#include <gtest/gtest.h> +#include "util/macros.h" + +/** + * Debug/test: verify the extension table is alphabetically sorted. + */ +TEST(MesaExtensionsTest, AlphabeticallySorted) +{ + const char *ext_names[] = { + #define EXT(name_str, ...) #name_str, + #include "main/extensions_table.h" + #undef EXT + }; + + for (unsigned i = 0; i < ARRAY_SIZE(ext_names) - 1; ++i) { + const char *current_str = ext_names[i]; + const char *next_str = ext_names[i+1]; + + /* We expect the extension table to be alphabetically sorted */ + ASSERT_LT(strcmp(current_str, next_str), 0); + } +} diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c index 091922161c5..93c680650bb 100644 --- a/src/mesa/main/texenv.c +++ b/src/mesa/main/texenv.c @@ -495,7 +495,7 @@ _mesa_TexEnvfv( GLenum target, GLenum pname, const GLfloat *param ) /* Tell device driver about the new texture environment */ if (ctx->Driver.TexEnv) { - (*ctx->Driver.TexEnv)( ctx, target, pname, param ); + ctx->Driver.TexEnv(ctx, target, pname, param); } } diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index d9453e3a281..ac7599f9fd4 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1333,21 +1333,6 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, break; case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_CUBE_MAP_ARRAY: - /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec: - * - * "The ETC2/EAC texture compression algorithm supports only - * two-dimensional images. If internalformat is an ETC2/EAC format, - * glCompressedTexImage3D will generate an INVALID_OPERATION error if - * target is not TEXTURE_2D_ARRAY." - * - * This should also be applicable for glTexStorage3D(). Other available - * targets for these functions are: TEXTURE_3D and TEXTURE_CUBE_MAP_ARRAY. - */ - if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx)) - return write_error(error, GL_INVALID_OPERATION); - - target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map_array; - /* From the KHR_texture_compression_astc_hdr spec: * * Add a second new column "3D Tex." which is empty for all non-ASTC @@ -1368,16 +1353,24 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, * 8.19 is *not* checked' * * The instances of <internalformat> above should say <target>. + * + * ETC2/EAC formats are the only alternative in GLES and thus such errors + * have already been handled by normal ETC2/EAC behavior. */ - /* Throw an INVALID_OPERATION error if the target is - * TEXTURE_CUBE_MAP_ARRAY and the format is not ASTC. + /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec: + * + * "The ETC2/EAC texture compression algorithm supports only + * two-dimensional images. If internalformat is an ETC2/EAC format, + * glCompressedTexImage3D will generate an INVALID_OPERATION error if + * target is not TEXTURE_2D_ARRAY." + * + * This should also be applicable for glTexStorage3D(). Other available + * targets for these functions are: TEXTURE_3D and TEXTURE_CUBE_MAP_ARRAY. */ - if (target_can_be_compresed && - ctx->Extensions.KHR_texture_compression_astc_ldr && - layout != MESA_FORMAT_LAYOUT_ASTC) - return write_error(error, GL_INVALID_OPERATION); - + if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx)) + return write_error(error, GL_INVALID_OPERATION); + target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map_array; break; case GL_TEXTURE_3D: switch (layout) { @@ -1401,12 +1394,6 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, return write_error(error, GL_INVALID_OPERATION); break; default: - /* Throw an INVALID_OPERATION error if the target is TEXTURE_3D and - * the format is not ASTC. - * See comment in switch case GL_TEXTURE_CUBE_MAP_ARRAY for more info. - */ - if (ctx->Extensions.KHR_texture_compression_astc_ldr) - return write_error(error, GL_INVALID_OPERATION); break; } default: diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1099d79d834..c5d8c483429 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1920,6 +1920,8 @@ ir_to_mesa_visitor::visit(ir_texture *ir) case ir_query_levels: assert(!"Unexpected ir_query_levels opcode"); break; + case ir_samples_identical: + unreachable("Unexpected ir_samples_identical opcode"); case ir_texture_samples: unreachable("Unexpected ir_texture_samples opcode"); } diff --git a/src/mesa/state_tracker/st_cb_perfmon.c b/src/mesa/state_tracker/st_cb_perfmon.c index 1bb5be397ae..8fdf0e8497f 100644 --- a/src/mesa/state_tracker/st_cb_perfmon.c +++ b/src/mesa/state_tracker/st_cb_perfmon.c @@ -36,69 +36,24 @@ #include "pipe/p_screen.h" #include "util/u_memory.h" -/** - * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if - * the driver-specific query doesn't exist. - */ -static int -find_query_type(struct pipe_screen *screen, const char *name) -{ - int num_queries; - int type = -1; - int i; - - num_queries = screen->get_driver_query_info(screen, 0, NULL); - if (!num_queries) - return type; - - for (i = 0; i < num_queries; i++) { - struct pipe_driver_query_info info; - - if (!screen->get_driver_query_info(screen, i, &info)) - continue; - - if (!strncmp(info.name, name, strlen(name))) { - type = info.query_type; - break; - } - } - return type; -} - -/** - * Return TRUE if the underlying driver expose GPU counters. - */ -static bool -has_gpu_counters(struct pipe_screen *screen) -{ - int num_groups, gid; - - num_groups = screen->get_driver_query_group_info(screen, 0, NULL); - for (gid = 0; gid < num_groups; gid++) { - struct pipe_driver_query_group_info group_info; - - if (!screen->get_driver_query_group_info(screen, gid, &group_info)) - continue; - - if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU) - return true; - } - return false; -} - static bool init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m) { + struct st_context *st = st_context(ctx); struct st_perf_monitor_object *stm = st_perf_monitor_object(m); - struct pipe_screen *screen = st_context(ctx)->pipe->screen; - struct pipe_context *pipe = st_context(ctx)->pipe; + struct pipe_context *pipe = st->pipe; + unsigned *batch = NULL; + unsigned num_active_counters = 0; + unsigned max_batch_counters = 0; + unsigned num_batch_counters = 0; int gid, cid; - st_flush_bitmap_cache(st_context(ctx)); + st_flush_bitmap_cache(st); - /* Create a query for each active counter. */ + /* Determine the number of active counters. */ for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) { const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid]; + const struct st_perf_monitor_group *stg = &st->perfmon[gid]; if (m->ActiveGroups[gid] > g->MaxActiveCounters) { /* Maximum number of counters reached. Cannot start the session. */ @@ -109,53 +64,96 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m) return false; } - for (cid = 0; cid < g->NumCounters; cid++) { - const struct gl_perf_monitor_counter *c = &g->Counters[cid]; - struct st_perf_counter_object *cntr; - int query_type; + num_active_counters += m->ActiveGroups[gid]; + if (stg->has_batch) + max_batch_counters += m->ActiveGroups[gid]; + } - if (!BITSET_TEST(m->ActiveCounters[gid], cid)) - continue; + if (!num_active_counters) + return true; + + stm->active_counters = CALLOC(num_active_counters, + sizeof(*stm->active_counters)); + if (!stm->active_counters) + return false; - query_type = find_query_type(screen, c->Name); - assert(query_type != -1); + if (max_batch_counters) { + batch = CALLOC(max_batch_counters, sizeof(*batch)); + if (!batch) + return false; + } + + /* Create a query for each active counter. */ + for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) { + const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid]; + const struct st_perf_monitor_group *stg = &st->perfmon[gid]; + BITSET_WORD tmp; - cntr = CALLOC_STRUCT(st_perf_counter_object); - if (!cntr) - return false; + BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) { + const struct st_perf_monitor_counter *stc = &stg->counters[cid]; + struct st_perf_counter_object *cntr = + &stm->active_counters[stm->num_active_counters]; - cntr->query = pipe->create_query(pipe, query_type, 0); cntr->id = cid; cntr->group_id = gid; - - list_addtail(&cntr->list, &stm->active_counters); + if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) { + cntr->batch_index = num_batch_counters; + batch[num_batch_counters++] = stc->query_type; + } else { + cntr->query = pipe->create_query(pipe, stc->query_type, 0); + if (!cntr->query) + goto fail; + } + ++stm->num_active_counters; } } + + /* Create the batch query. */ + if (num_batch_counters) { + stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters, + batch); + stm->batch_result = CALLOC(num_batch_counters, sizeof(stm->batch_result->batch[0])); + if (!stm->batch_query || !stm->batch_result) + goto fail; + } + + FREE(batch); return true; + +fail: + FREE(batch); + return false; } static void reset_perf_monitor(struct st_perf_monitor_object *stm, struct pipe_context *pipe) { - struct st_perf_counter_object *cntr, *tmp; + unsigned i; - LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, &stm->active_counters, list) { - if (cntr->query) - pipe->destroy_query(pipe, cntr->query); - list_del(&cntr->list); - free(cntr); + for (i = 0; i < stm->num_active_counters; ++i) { + struct pipe_query *query = stm->active_counters[i].query; + if (query) + pipe->destroy_query(pipe, query); } + FREE(stm->active_counters); + stm->active_counters = NULL; + stm->num_active_counters = 0; + + if (stm->batch_query) { + pipe->destroy_query(pipe, stm->batch_query); + stm->batch_query = NULL; + } + FREE(stm->batch_result); + stm->batch_result = NULL; } static struct gl_perf_monitor_object * st_NewPerfMonitor(struct gl_context *ctx) { struct st_perf_monitor_object *stq = ST_CALLOC_STRUCT(st_perf_monitor_object); - if (stq) { - list_inithead(&stq->active_counters); + if (stq) return &stq->base; - } return NULL; } @@ -174,9 +172,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m) { struct st_perf_monitor_object *stm = st_perf_monitor_object(m); struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_perf_counter_object *cntr; + unsigned i; - if (LIST_IS_EMPTY(&stm->active_counters)) { + if (!stm->num_active_counters) { /* Create a query for each active counter before starting * a new monitoring session. */ if (!init_perf_monitor(ctx, m)) @@ -184,10 +182,15 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m) } /* Start the query for each active counter. */ - LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) { - if (!pipe->begin_query(pipe, cntr->query)) + for (i = 0; i < stm->num_active_counters; ++i) { + struct pipe_query *query = stm->active_counters[i].query; + if (query && !pipe->begin_query(pipe, query)) goto fail; } + + if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query)) + goto fail; + return true; fail: @@ -201,11 +204,17 @@ st_EndPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m) { struct st_perf_monitor_object *stm = st_perf_monitor_object(m); struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_perf_counter_object *cntr; + unsigned i; /* Stop the query for each active counter. */ - LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) - pipe->end_query(pipe, cntr->query); + for (i = 0; i < stm->num_active_counters; ++i) { + struct pipe_query *query = stm->active_counters[i].query; + if (query) + pipe->end_query(pipe, query); + } + + if (stm->batch_query) + pipe->end_query(pipe, stm->batch_query); } static void @@ -229,20 +238,26 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx, { struct st_perf_monitor_object *stm = st_perf_monitor_object(m); struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_perf_counter_object *cntr; + unsigned i; - if (LIST_IS_EMPTY(&stm->active_counters)) + if (!stm->num_active_counters) return false; /* The result of a monitoring session is only available if the query of * each active counter is idle. */ - LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) { + for (i = 0; i < stm->num_active_counters; ++i) { + struct pipe_query *query = stm->active_counters[i].query; union pipe_query_result result; - if (!pipe->get_query_result(pipe, cntr->query, FALSE, &result)) { + if (query && !pipe->get_query_result(pipe, query, FALSE, &result)) { /* The query is busy. */ return false; } } + + if (stm->batch_query && + !pipe->get_query_result(pipe, stm->batch_query, FALSE, stm->batch_result)) + return false; + return true; } @@ -255,7 +270,7 @@ st_GetPerfMonitorResult(struct gl_context *ctx, { struct st_perf_monitor_object *stm = st_perf_monitor_object(m); struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_perf_counter_object *cntr; + unsigned i; /* Copy data to the supplied array (data). * @@ -263,9 +278,15 @@ st_GetPerfMonitorResult(struct gl_context *ctx, * active counter. The API allows counters to appear in any order. */ GLsizei offset = 0; + bool have_batch_query = false; + + if (stm->batch_query) + have_batch_query = pipe->get_query_result(pipe, stm->batch_query, TRUE, + stm->batch_result); /* Read query results for each active counter. */ - LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) { + for (i = 0; i < stm->num_active_counters; ++i) { + struct st_perf_counter_object *cntr = &stm->active_counters[i]; union pipe_query_result result = { 0 }; int gid, cid; GLenum type; @@ -274,8 +295,14 @@ st_GetPerfMonitorResult(struct gl_context *ctx, gid = cntr->group_id; type = ctx->PerfMonitor.Groups[gid].Counters[cid].Type; - if (!pipe->get_query_result(pipe, cntr->query, TRUE, &result)) - continue; + if (cntr->query) { + if (!pipe->get_query_result(pipe, cntr->query, TRUE, &result)) + continue; + } else { + if (!have_batch_query) + continue; + result.batch[0] = stm->batch_result->batch[cntr->batch_index]; + } data[offset++] = gid; data[offset++] = cid; @@ -307,18 +334,13 @@ st_init_perfmon(struct st_context *st) struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor; struct pipe_screen *screen = st->pipe->screen; struct gl_perf_monitor_group *groups = NULL; + struct st_perf_monitor_group *stgroups = NULL; int num_counters, num_groups; int gid, cid; if (!screen->get_driver_query_info || !screen->get_driver_query_group_info) return false; - if (!has_gpu_counters(screen)) { - /* According to the spec, GL_AMD_performance_monitor must only - * expose GPU counters. */ - return false; - } - /* Get the number of available queries. */ num_counters = screen->get_driver_query_info(screen, 0, NULL); if (!num_counters) @@ -331,29 +353,37 @@ st_init_perfmon(struct st_context *st) if (!groups) return false; + stgroups = CALLOC(num_groups, sizeof(*stgroups)); + if (!stgroups) + goto fail_only_groups; + for (gid = 0; gid < num_groups; gid++) { struct gl_perf_monitor_group *g = &groups[perfmon->NumGroups]; + struct st_perf_monitor_group *stg = &stgroups[perfmon->NumGroups]; struct pipe_driver_query_group_info group_info; struct gl_perf_monitor_counter *counters = NULL; + struct st_perf_monitor_counter *stcounters = NULL; if (!screen->get_driver_query_group_info(screen, gid, &group_info)) continue; - if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU) - continue; - g->Name = group_info.name; g->MaxActiveCounters = group_info.max_active_queries; - g->NumCounters = 0; - g->Counters = NULL; if (group_info.num_queries) counters = CALLOC(group_info.num_queries, sizeof(*counters)); if (!counters) goto fail; + g->Counters = counters; + + stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters)); + if (!stcounters) + goto fail; + stg->counters = stcounters; for (cid = 0; cid < num_counters; cid++) { struct gl_perf_monitor_counter *c = &counters[g->NumCounters]; + struct st_perf_monitor_counter *stc = &stcounters[g->NumCounters]; struct pipe_driver_query_info info; if (!screen->get_driver_query_info(screen, cid, &info)) @@ -364,6 +394,9 @@ st_init_perfmon(struct st_context *st) c->Name = info.name; switch (info.type) { case PIPE_DRIVER_QUERY_TYPE_UINT64: + case PIPE_DRIVER_QUERY_TYPE_BYTES: + case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS: + case PIPE_DRIVER_QUERY_TYPE_HZ: c->Minimum.u64 = 0; c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1; c->Type = GL_UNSIGNED_INT64_AMD; @@ -386,18 +419,28 @@ st_init_perfmon(struct st_context *st) default: unreachable("Invalid driver query type!"); } + + stc->query_type = info.query_type; + stc->flags = info.flags; + if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) + stg->has_batch = true; + g->NumCounters++; } - g->Counters = counters; perfmon->NumGroups++; } perfmon->Groups = groups; + st->perfmon = stgroups; return true; fail: - for (gid = 0; gid < num_groups; gid++) + for (gid = 0; gid < num_groups; gid++) { + FREE(stgroups[gid].counters); FREE((void *)groups[gid].Counters); + } + FREE(stgroups); +fail_only_groups: FREE(groups); return false; } @@ -408,8 +451,11 @@ st_destroy_perfmon(struct st_context *st) struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor; int gid; - for (gid = 0; gid < perfmon->NumGroups; gid++) + for (gid = 0; gid < perfmon->NumGroups; gid++) { + FREE(st->perfmon[gid].counters); FREE((void *)perfmon->Groups[gid].Counters); + } + FREE(st->perfmon); FREE((void *)perfmon->Groups); } diff --git a/src/mesa/state_tracker/st_cb_perfmon.h b/src/mesa/state_tracker/st_cb_perfmon.h index 0b195de47fe..29732866bf8 100644 --- a/src/mesa/state_tracker/st_cb_perfmon.h +++ b/src/mesa/state_tracker/st_cb_perfmon.h @@ -26,21 +26,41 @@ #include "util/list.h" +struct st_perf_counter_object +{ + struct pipe_query *query; + int id; + int group_id; + unsigned batch_index; +}; + /** * Subclass of gl_perf_monitor_object */ struct st_perf_monitor_object { struct gl_perf_monitor_object base; - struct list_head active_counters; + unsigned num_active_counters; + struct st_perf_counter_object *active_counters; + + struct pipe_query *batch_query; + union pipe_query_result *batch_result; }; -struct st_perf_counter_object +/** + * Extra data per counter, supplementing gl_perf_monitor_counter with + * driver-specific information. + */ +struct st_perf_monitor_counter { - struct list_head list; - struct pipe_query *query; - int id; - int group_id; + unsigned query_type; + unsigned flags; +}; + +struct st_perf_monitor_group +{ + struct st_perf_monitor_counter *counters; + bool has_batch; }; /** diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index c243f5cd966..60a9a4bb0d5 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -46,6 +46,7 @@ struct draw_stage; struct gen_mipmap_state; struct st_context; struct st_fragment_program; +struct st_perf_monitor_group; struct u_upload_mgr; @@ -217,6 +218,8 @@ struct st_context int32_t read_stamp; struct st_config_options options; + + struct st_perf_monitor_group *perfmon; }; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 99e96e1f3ae..a2418e28a91 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -343,7 +343,7 @@ struct st_extension_cap_mapping { struct st_extension_format_mapping { int extension_offset[2]; - enum pipe_format format[8]; + enum pipe_format format[32]; /* If TRUE, at least one format must be supported for the extensions to be * advertised. If FALSE, all the formats must be supported. */ @@ -569,6 +569,36 @@ void st_init_extensions(struct pipe_screen *screen, PIPE_FORMAT_BPTC_RGB_FLOAT, PIPE_FORMAT_BPTC_RGB_UFLOAT } }, + { { o(KHR_texture_compression_astc_ldr) }, + { PIPE_FORMAT_ASTC_4x4, + PIPE_FORMAT_ASTC_5x4, + PIPE_FORMAT_ASTC_5x5, + PIPE_FORMAT_ASTC_6x5, + PIPE_FORMAT_ASTC_6x6, + PIPE_FORMAT_ASTC_8x5, + PIPE_FORMAT_ASTC_8x6, + PIPE_FORMAT_ASTC_8x8, + PIPE_FORMAT_ASTC_10x5, + PIPE_FORMAT_ASTC_10x6, + PIPE_FORMAT_ASTC_10x8, + PIPE_FORMAT_ASTC_10x10, + PIPE_FORMAT_ASTC_12x10, + PIPE_FORMAT_ASTC_12x12, + PIPE_FORMAT_ASTC_4x4_SRGB, + PIPE_FORMAT_ASTC_5x4_SRGB, + PIPE_FORMAT_ASTC_5x5_SRGB, + PIPE_FORMAT_ASTC_6x5_SRGB, + PIPE_FORMAT_ASTC_6x6_SRGB, + PIPE_FORMAT_ASTC_8x5_SRGB, + PIPE_FORMAT_ASTC_8x6_SRGB, + PIPE_FORMAT_ASTC_8x8_SRGB, + PIPE_FORMAT_ASTC_10x5_SRGB, + PIPE_FORMAT_ASTC_10x6_SRGB, + PIPE_FORMAT_ASTC_10x8_SRGB, + PIPE_FORMAT_ASTC_10x10_SRGB, + PIPE_FORMAT_ASTC_12x10_SRGB, + PIPE_FORMAT_ASTC_12x12_SRGB } }, + { { o(EXT_texture_shared_exponent) }, { PIPE_FORMAT_R9G9B9E5_FLOAT } }, diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 144b7d6f659..2b92bade440 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -465,6 +465,64 @@ st_mesa_format_to_pipe_format(struct st_context *st, mesa_format mesaFormat) case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: return st->has_etc2 ? PIPE_FORMAT_ETC2_SRGB8A1 : PIPE_FORMAT_B8G8R8A8_SRGB; + case MESA_FORMAT_RGBA_ASTC_4x4: + return PIPE_FORMAT_ASTC_4x4; + case MESA_FORMAT_RGBA_ASTC_5x4: + return PIPE_FORMAT_ASTC_5x4; + case MESA_FORMAT_RGBA_ASTC_5x5: + return PIPE_FORMAT_ASTC_5x5; + case MESA_FORMAT_RGBA_ASTC_6x5: + return PIPE_FORMAT_ASTC_6x5; + case MESA_FORMAT_RGBA_ASTC_6x6: + return PIPE_FORMAT_ASTC_6x6; + case MESA_FORMAT_RGBA_ASTC_8x5: + return PIPE_FORMAT_ASTC_8x5; + case MESA_FORMAT_RGBA_ASTC_8x6: + return PIPE_FORMAT_ASTC_8x6; + case MESA_FORMAT_RGBA_ASTC_8x8: + return PIPE_FORMAT_ASTC_8x8; + case MESA_FORMAT_RGBA_ASTC_10x5: + return PIPE_FORMAT_ASTC_10x5; + case MESA_FORMAT_RGBA_ASTC_10x6: + return PIPE_FORMAT_ASTC_10x6; + case MESA_FORMAT_RGBA_ASTC_10x8: + return PIPE_FORMAT_ASTC_10x8; + case MESA_FORMAT_RGBA_ASTC_10x10: + return PIPE_FORMAT_ASTC_10x10; + case MESA_FORMAT_RGBA_ASTC_12x10: + return PIPE_FORMAT_ASTC_12x10; + case MESA_FORMAT_RGBA_ASTC_12x12: + return PIPE_FORMAT_ASTC_12x12; + + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4: + return PIPE_FORMAT_ASTC_4x4_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4: + return PIPE_FORMAT_ASTC_5x4_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5: + return PIPE_FORMAT_ASTC_5x5_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5: + return PIPE_FORMAT_ASTC_6x5_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6: + return PIPE_FORMAT_ASTC_6x6_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5: + return PIPE_FORMAT_ASTC_8x5_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6: + return PIPE_FORMAT_ASTC_8x6_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8: + return PIPE_FORMAT_ASTC_8x8_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5: + return PIPE_FORMAT_ASTC_10x5_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6: + return PIPE_FORMAT_ASTC_10x6_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8: + return PIPE_FORMAT_ASTC_10x8_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10: + return PIPE_FORMAT_ASTC_10x10_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10: + return PIPE_FORMAT_ASTC_12x10_SRGB; + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12: + return PIPE_FORMAT_ASTC_12x12_SRGB; + default: return PIPE_FORMAT_NONE; } @@ -883,6 +941,64 @@ st_pipe_format_to_mesa_format(enum pipe_format format) case PIPE_FORMAT_ETC2_RG11_SNORM: return MESA_FORMAT_ETC2_SIGNED_RG11_EAC; + case PIPE_FORMAT_ASTC_4x4: + return MESA_FORMAT_RGBA_ASTC_4x4; + case PIPE_FORMAT_ASTC_5x4: + return MESA_FORMAT_RGBA_ASTC_5x4; + case PIPE_FORMAT_ASTC_5x5: + return MESA_FORMAT_RGBA_ASTC_5x5; + case PIPE_FORMAT_ASTC_6x5: + return MESA_FORMAT_RGBA_ASTC_6x5; + case PIPE_FORMAT_ASTC_6x6: + return MESA_FORMAT_RGBA_ASTC_6x6; + case PIPE_FORMAT_ASTC_8x5: + return MESA_FORMAT_RGBA_ASTC_8x5; + case PIPE_FORMAT_ASTC_8x6: + return MESA_FORMAT_RGBA_ASTC_8x6; + case PIPE_FORMAT_ASTC_8x8: + return MESA_FORMAT_RGBA_ASTC_8x8; + case PIPE_FORMAT_ASTC_10x5: + return MESA_FORMAT_RGBA_ASTC_10x5; + case PIPE_FORMAT_ASTC_10x6: + return MESA_FORMAT_RGBA_ASTC_10x6; + case PIPE_FORMAT_ASTC_10x8: + return MESA_FORMAT_RGBA_ASTC_10x8; + case PIPE_FORMAT_ASTC_10x10: + return MESA_FORMAT_RGBA_ASTC_10x10; + case PIPE_FORMAT_ASTC_12x10: + return MESA_FORMAT_RGBA_ASTC_12x10; + case PIPE_FORMAT_ASTC_12x12: + return MESA_FORMAT_RGBA_ASTC_12x12; + + case PIPE_FORMAT_ASTC_4x4_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4; + case PIPE_FORMAT_ASTC_5x4_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4; + case PIPE_FORMAT_ASTC_5x5_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5; + case PIPE_FORMAT_ASTC_6x5_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5; + case PIPE_FORMAT_ASTC_6x6_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6; + case PIPE_FORMAT_ASTC_8x5_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5; + case PIPE_FORMAT_ASTC_8x6_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6; + case PIPE_FORMAT_ASTC_8x8_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8; + case PIPE_FORMAT_ASTC_10x5_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5; + case PIPE_FORMAT_ASTC_10x6_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6; + case PIPE_FORMAT_ASTC_10x8_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8; + case PIPE_FORMAT_ASTC_10x10_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10; + case PIPE_FORMAT_ASTC_12x10_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10; + case PIPE_FORMAT_ASTC_12x12_SRGB: + return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12; + default: return MESA_FORMAT_NONE; } @@ -1386,6 +1502,121 @@ static const struct format_mapping format_map[] = { { PIPE_FORMAT_BPTC_RGB_UFLOAT, 0 }, }, + /* ASTC */ + { + { GL_COMPRESSED_RGBA_ASTC_4x4_KHR, 0 }, + { PIPE_FORMAT_ASTC_4x4, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_5x4_KHR, 0 }, + { PIPE_FORMAT_ASTC_5x4, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_5x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_5x5, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_6x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_6x5, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_6x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_6x6, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_8x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x5, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_8x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x6, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_8x8_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x8, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_10x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x5, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_10x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x6, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_10x8_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x8, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_10x10_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x10, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_12x10_KHR, 0 }, + { PIPE_FORMAT_ASTC_12x10, 0}, + }, + { + { GL_COMPRESSED_RGBA_ASTC_12x12_KHR, 0 }, + { PIPE_FORMAT_ASTC_12x12, 0}, + }, + + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, 0 }, + { PIPE_FORMAT_ASTC_4x4_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR, 0 }, + { PIPE_FORMAT_ASTC_5x4_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_5x5_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_6x5_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_6x6_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x5_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x6_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, 0 }, + { PIPE_FORMAT_ASTC_8x8_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x5_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x6_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x8_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, 0 }, + { PIPE_FORMAT_ASTC_10x10_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, 0 }, + { PIPE_FORMAT_ASTC_12x10_SRGB, 0}, + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, 0 }, + { PIPE_FORMAT_ASTC_12x12_SRGB, 0}, + }, + /* signed/unsigned integer formats. */ { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3ad1afdecda..40c77258de7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3236,6 +3236,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) case ir_texture_samples: opcode = TGSI_OPCODE_TXQS; break; + case ir_samples_identical: + unreachable("Unexpected ir_samples_identical opcode"); } if (ir->projector) { diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 1193d1e7a5d..3d9e0705626 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -125,17 +125,7 @@ bool anv_is_scalar_shader_stage(const struct brw_compiler *compiler, VkShaderStage stage) { - switch (stage) { - case VK_SHADER_STAGE_VERTEX: - return compiler->scalar_vs; - case VK_SHADER_STAGE_GEOMETRY: - return false; - case VK_SHADER_STAGE_FRAGMENT: - case VK_SHADER_STAGE_COMPUTE: - return true; - default: - unreachable("Unsupported shader stage"); - } + return compiler->scalar_stage[vk_shader_stage_to_mesa_stage[stage]]; } /* Eventually, this will become part of anv_CreateShader. Unfortunately, @@ -187,8 +177,7 @@ anv_shader_compile_to_nir(struct anv_device *device, } assert(entrypoint != NULL); - brw_preprocess_nir(nir, &device->info, - anv_is_scalar_shader_stage(compiler, vk_stage)); + nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); nir_shader_gather_info(nir, entrypoint); @@ -411,7 +400,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, prog_data->binding_table.image_start = bias; /* Finish the optimization and compilation process */ - brw_postprocess_nir(nir, &pipeline->device->info, + nir = brw_lower_nir(nir, &pipeline->device->info, NULL, anv_is_scalar_shader_stage(compiler, stage)); /* nir_lower_io will only handle the push constants; we need to set this |