summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2015-11-23 14:03:47 -0800
committerJason Ekstrand <[email protected]>2015-11-23 14:03:47 -0800
commit179fc4aae8f782453f0488e8dd508f9a01117376 (patch)
tree5f0cc77b30d86b581fb968a71ba83c5e4c2546d7 /src
parente14b2c76b40398a61f45f5d058079641661a66cb (diff)
parentd9b8fde963a53d4e06570d8bece97f806714507a (diff)
Merge remote-tracking branch 'mesa-public/master' into vulkan
This pulls in nir cloning and some much-needed upstream refactors.
Diffstat (limited to 'src')
-rw-r--r--src/egl/Makefile.am15
-rw-r--r--src/egl/drivers/dri2/egl_dri2.c118
-rw-r--r--src/egl/drivers/dri2/egl_dri2.h19
-rw-r--r--src/egl/drivers/dri2/platform_android.c1
-rw-r--r--src/egl/drivers/dri2/platform_drm.c1
-rw-r--r--src/egl/drivers/dri2/platform_wayland.c2
-rw-r--r--src/egl/drivers/dri2/platform_x11.c125
-rw-r--r--src/egl/drivers/dri2/platform_x11_dri3.c547
-rw-r--r--src/egl/drivers/dri2/platform_x11_dri3.h41
-rwxr-xr-xsrc/egl/egl-symbols-check55
-rw-r--r--src/gallium/Android.mk1
-rw-r--r--src/gallium/Automake.inc6
-rw-r--r--src/gallium/Makefile.am6
-rw-r--r--src/gallium/SConscript1
-rw-r--r--src/gallium/auxiliary/Makefile.am14
-rw-r--r--src/gallium/auxiliary/Makefile.sources2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp9
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c7
-rw-r--r--src/gallium/auxiliary/hud/hud_context.c59
-rw-r--r--src/gallium/auxiliary/hud/hud_driver_query.c271
-rw-r--r--src/gallium/auxiliary/hud/hud_private.h13
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c29
-rw-r--r--src/gallium/auxiliary/os/os_process.c47
-rw-r--r--src/gallium/auxiliary/pipe-loader/Android.mk49
-rw-r--r--src/gallium/auxiliary/pipe-loader/Makefile.am32
-rw-r--r--src/gallium/auxiliary/pipe-loader/SConscript33
-rw-r--r--src/gallium/auxiliary/pipe-loader/pipe_loader.c12
-rw-r--r--src/gallium/auxiliary/pipe-loader/pipe_loader.h22
-rw-r--r--src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c179
-rw-r--r--src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h3
-rw-r--r--src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c223
-rw-r--r--src/gallium/auxiliary/target-helpers/drm_helper.h275
-rw-r--r--src/gallium/auxiliary/target-helpers/drm_helper_public.h37
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_drm_helper.h531
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_sw_helper.h65
-rw-r--r--src/gallium/auxiliary/util/u_dl.c2
-rw-r--r--src/gallium/auxiliary/util/u_format.csv30
-rw-r--r--src/gallium/auxiliary/util/u_format.h65
-rw-r--r--src/gallium/auxiliary/util/u_format_fake.c37
-rw-r--r--src/gallium/auxiliary/util/u_format_fake.h66
-rw-r--r--src/gallium/auxiliary/util/u_format_pack.py2
-rwxr-xr-xsrc/gallium/auxiliary/util/u_format_table.py10
-rw-r--r--src/gallium/auxiliary/vl/vl_winsys.h37
-rw-r--r--src/gallium/auxiliary/vl/vl_winsys_dri.c108
-rw-r--r--src/gallium/auxiliary/vl/vl_winsys_drm.c42
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h56
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c60
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_format.c24
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_format.h1
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.c23
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h80
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_blend.c27
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_blend.h7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.c11
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.h7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c132
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_format.c147
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_gmem.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.c7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c12
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h1
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_screen.c2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.c56
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.h3
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h23
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.c8
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query.c11
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.c221
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.h3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c36
-rw-r--r--src/gallium/drivers/freedreno/freedreno_texture.c34
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c56
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h6
-rw-r--r--src/gallium/drivers/nouveau/Makefile.sources6
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp46
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.c8
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.c320
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h444
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c45
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h24
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c27
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.h9
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_push.c42
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.c77
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.h6
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw.c47
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw.h16
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c207
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h34
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c417
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h45
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c65
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.h19
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c99
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c18
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c15
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_winsys.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c17
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c12
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources2
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c59
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h18
-rw-r--r--src/gallium/drivers/radeon/r600_query.c1017
-rw-r--r--src/gallium/drivers/radeon/r600_query.h136
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.c24
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.h3
-rw-r--r--src/gallium/drivers/radeon/radeon_vce_52.c242
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c28
-rw-r--r--src/gallium/drivers/softpipe/Automake.inc5
-rw-r--r--src/gallium/drivers/svga/svga_context.h15
-rw-r--r--src/gallium/drivers/svga/svga_format.c148
-rw-r--r--src/gallium/drivers/svga/svga_format.h4
-rw-r--r--src/gallium/drivers/svga/svga_pipe_query.c9
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.c5
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.c4
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.c6
-rw-r--r--src/gallium/drivers/svga/svga_screen.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_sampler.c3
-rw-r--r--src/gallium/drivers/trace/tr_screen.c3
-rw-r--r--src/gallium/drivers/vc4/Automake.inc4
-rw-r--r--src/gallium/drivers/vc4/Makefile.am1
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_blend.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c7
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_algebraic.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c8
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h8
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c56
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_reorder_uniforms.c26
-rw-r--r--src/gallium/include/pipe/p_context.h19
-rw-r--r--src/gallium/include/pipe/p_defines.h36
-rw-r--r--src/gallium/include/pipe/p_format.h30
-rw-r--r--src/gallium/include/state_tracker/drm_driver.h6
-rw-r--r--src/gallium/include/state_tracker/sw_driver.h21
-rw-r--r--src/gallium/state_trackers/clover/Makefile.am2
-rw-r--r--src/gallium/state_trackers/clover/core/device.cpp2
-rw-r--r--src/gallium/state_trackers/dri/Android.mk3
-rw-r--r--src/gallium/state_trackers/dri/Makefile.am7
-rw-r--r--src/gallium/state_trackers/dri/SConscript4
-rw-r--r--src/gallium/state_trackers/dri/dri2.c42
-rw-r--r--src/gallium/state_trackers/dri/dri_screen.c7
-rw-r--r--src/gallium/state_trackers/dri/drisw.c12
-rw-r--r--src/gallium/state_trackers/omx/entrypoint.c37
-rw-r--r--src/gallium/state_trackers/va/context.c23
-rw-r--r--src/gallium/state_trackers/va/picture.c2
-rw-r--r--src/gallium/state_trackers/va/surface.c13
-rw-r--r--src/gallium/state_trackers/vdpau/device.c6
-rw-r--r--src/gallium/state_trackers/vdpau/presentation.c18
-rw-r--r--src/gallium/state_trackers/xa/Makefile.am9
-rw-r--r--src/gallium/state_trackers/xa/xa_tracker.c18
-rw-r--r--src/gallium/state_trackers/xvmc/context.c12
-rw-r--r--src/gallium/state_trackers/xvmc/surface.c13
-rw-r--r--src/gallium/targets/d3dadapter9/Makefile.am24
-rw-r--r--src/gallium/targets/d3dadapter9/drm.c83
-rw-r--r--src/gallium/targets/dri/Android.mk3
-rw-r--r--src/gallium/targets/dri/Makefile.am11
-rw-r--r--src/gallium/targets/dri/SConscript2
-rw-r--r--src/gallium/targets/dri/target.c165
-rw-r--r--src/gallium/targets/omx/Makefile.am10
-rw-r--r--src/gallium/targets/omx/target.c2
-rw-r--r--src/gallium/targets/opencl/Makefile.am3
-rw-r--r--src/gallium/targets/pipe-loader/Makefile.am5
-rw-r--r--src/gallium/targets/pipe-loader/pipe.sym2
-rw-r--r--src/gallium/targets/pipe-loader/pipe_swrast.c34
-rw-r--r--src/gallium/targets/va/Makefile.am10
-rw-r--r--src/gallium/targets/va/target.c2
-rw-r--r--src/gallium/targets/vdpau/Makefile.am10
-rw-r--r--src/gallium/targets/vdpau/target.c2
-rw-r--r--src/gallium/targets/xa/Makefile.am10
-rw-r--r--src/gallium/targets/xa/target.c2
-rw-r--r--src/gallium/targets/xvmc/Makefile.am10
-rw-r--r--src/gallium/targets/xvmc/target.c2
-rw-r--r--src/gallium/tests/trivial/Makefile.am9
-rw-r--r--src/gallium/tests/trivial/compute.c2
-rw-r--r--src/gallium/tests/trivial/quad-tex.c2
-rw-r--r--src/gallium/tests/trivial/tri.c2
-rw-r--r--src/glsl/Android.gen.mk3
-rw-r--r--src/glsl/Makefile.sources3
-rw-r--r--src/glsl/ast.h57
-rw-r--r--src/glsl/ast_to_hir.cpp1055
-rw-r--r--src/glsl/ast_type.cpp153
-rw-r--r--src/glsl/builtin_functions.cpp59
-rw-r--r--src/glsl/builtin_variables.cpp70
-rw-r--r--src/glsl/glcpp/glcpp-parse.y5
-rw-r--r--src/glsl/glsl_parser.yy117
-rw-r--r--src/glsl/glsl_parser_extras.cpp54
-rw-r--r--src/glsl/glsl_parser_extras.h7
-rw-r--r--src/glsl/ir.cpp11
-rw-r--r--src/glsl/ir.h4
-rw-r--r--src/glsl/ir_clone.cpp2
-rw-r--r--src/glsl/ir_equals.cpp10
-rw-r--r--src/glsl/ir_hv_accept.cpp1
-rw-r--r--src/glsl/ir_print_visitor.cpp10
-rw-r--r--src/glsl/ir_rvalue_visitor.cpp1
-rw-r--r--src/glsl/link_varyings.cpp46
-rw-r--r--src/glsl/linker.cpp33
-rw-r--r--src/glsl/nir/glsl_to_nir.cpp41
-rw-r--r--src/glsl/nir/glsl_types.cpp1
-rw-r--r--src/glsl/nir/glsl_types.h5
-rw-r--r--src/glsl/nir/nir.c79
-rw-r--r--src/glsl/nir/nir.h79
-rw-r--r--src/glsl/nir/nir_builder.h23
-rw-r--r--src/glsl/nir/nir_clone.c674
-rw-r--r--src/glsl/nir/nir_constant_expressions.py2
-rw-r--r--src/glsl/nir/nir_intrinsics.h1
-rw-r--r--src/glsl/nir/nir_lower_clip.c2
-rw-r--r--src/glsl/nir/nir_lower_idiv.c8
-rw-r--r--src/glsl/nir/nir_lower_io.c2
-rw-r--r--src/glsl/nir/nir_lower_tex.c87
-rw-r--r--src/glsl/nir/nir_lower_two_sided_color.c2
-rw-r--r--src/glsl/nir/nir_metadata.c36
-rw-r--r--src/glsl/nir/nir_opcodes.py82
-rw-r--r--src/glsl/nir/nir_opt_copy_propagate.c7
-rw-r--r--src/glsl/nir/nir_print.c14
-rw-r--r--src/glsl/nir/nir_search.c4
-rw-r--r--src/glsl/nir/nir_validate.c18
-rw-r--r--src/glsl/nir/spirv_to_nir.c2
-rw-r--r--src/glsl/opt_tree_grafting.cpp1
-rw-r--r--src/glx/Makefile.am2
-rw-r--r--src/glx/dri3_glx.c1407
-rw-r--r--src/glx/dri3_priv.h94
-rw-r--r--src/loader/Makefile.am17
-rw-r--r--src/loader/loader_dri3_helper.c1396
-rw-r--r--src/loader/loader_dri3_helper.h241
-rw-r--r--src/mapi/glapi/gen/EXT_gpu_shader4.xml3
-rw-r--r--src/mapi/glapi/gen/es_EXT.xml26
-rw-r--r--src/mesa/drivers/common/meta_generate_mipmap.c13
-rw-r--r--src/mesa/drivers/dri/common/xmlconfig.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_util.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h32
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c135
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h16
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp178
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_builder.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp56
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp77
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp343
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp49
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp278
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_inst.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_vec4.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_link.cpp11
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_fast_clear.c181
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c330
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.h24
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c15
-rw-r--r--src/mesa/drivers/dri/i965/brw_reg.h53
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp42
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_surface_formats.c524
-rw-r--r--src/mesa/drivers/dri/i965/brw_surface_formats.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp85
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_builder.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp51
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp127
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp191
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp78
-rw-r--r--src/mesa/drivers/dri/i965/gen6_queryobj.c16
-rw-r--r--src/mesa/drivers/dri/i965/gen7_urb.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen8_surface_state.c16
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.c8
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.h2
-rw-r--r--src/mesa/drivers/dri/i965/intel_extensions.c1
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c32
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.h13
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp30
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp38
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp26
-rw-r--r--src/mesa/drivers/dri/r200/r200_tex.h4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex.h2
-rw-r--r--src/mesa/main/blend.c10
-rw-r--r--src/mesa/main/buffers.c2
-rw-r--r--src/mesa/main/enable.c10
-rw-r--r--src/mesa/main/errors.c39
-rw-r--r--src/mesa/main/extensions.c71
-rw-r--r--src/mesa/main/extensions_table.h178
-rw-r--r--src/mesa/main/fog.c2
-rw-r--r--src/mesa/main/get_hash_params.py4
-rw-r--r--src/mesa/main/getstring.c17
-rw-r--r--src/mesa/main/mtypes.h2
-rw-r--r--src/mesa/main/objectlabel.c46
-rw-r--r--src/mesa/main/points.c2
-rw-r--r--src/mesa/main/shader_query.cpp23
-rw-r--r--src/mesa/main/tests/Makefile.am1
-rw-r--r--src/mesa/main/tests/dispatch_sanity.cpp8
-rw-r--r--src/mesa/main/tests/mesa_extensions.cpp51
-rw-r--r--src/mesa/main/texenv.c2
-rw-r--r--src/mesa/main/teximage.c43
-rw-r--r--src/mesa/program/ir_to_mesa.cpp2
-rw-r--r--src/mesa/state_tracker/st_cb_perfmon.c258
-rw-r--r--src/mesa/state_tracker/st_cb_perfmon.h32
-rw-r--r--src/mesa/state_tracker/st_context.h3
-rw-r--r--src/mesa/state_tracker/st_extensions.c32
-rw-r--r--src/mesa/state_tracker/st_format.c231
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp2
-rw-r--r--src/vulkan/anv_pipeline.c17
319 files changed, 13179 insertions, 6167 deletions
diff --git a/src/egl/Makefile.am b/src/egl/Makefile.am
index 5c2ba301ffb..6953d44e607 100644
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -47,12 +47,21 @@ libEGL_la_LDFLAGS = \
$(LD_NO_UNDEFINED)
dri2_backend_FILES =
+dri3_backend_FILES =
if HAVE_EGL_PLATFORM_X11
AM_CFLAGS += -DHAVE_X11_PLATFORM
AM_CFLAGS += $(XCB_DRI2_CFLAGS)
libEGL_la_LIBADD += $(XCB_DRI2_LIBS)
dri2_backend_FILES += drivers/dri2/platform_x11.c
+
+if HAVE_DRI3
+dri3_backend_FILES += \
+ drivers/dri2/platform_x11_dri3.c \
+ drivers/dri2/platform_x11_dri3.h
+
+libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader_dri3_helper.la
+endif
endif
if HAVE_EGL_PLATFORM_WAYLAND
@@ -88,7 +97,8 @@ AM_CFLAGS += \
libEGL_la_SOURCES += \
$(dri2_backend_core_FILES) \
- $(dri2_backend_FILES)
+ $(dri2_backend_FILES) \
+ $(dri3_backend_FILES)
libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader.la
libEGL_la_LIBADD += $(DLOPEN_LIBS) $(LIBDRM_LIBS)
@@ -111,7 +121,10 @@ egl_HEADERS = \
$(top_srcdir)/include/EGL/eglmesaext.h \
$(top_srcdir)/include/EGL/eglplatform.h
+TESTS = egl-symbols-check
+
EXTRA_DIST = \
+ egl-symbols-check \
SConscript \
drivers/haiku \
docs \
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 4cc5f231333..d34b16119e2 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -352,6 +352,12 @@ struct dri2_extension_match {
int offset;
};
+static struct dri2_extension_match dri3_driver_extensions[] = {
+ { __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) },
+ { __DRI_IMAGE_DRIVER, 1, offsetof(struct dri2_egl_display, image_driver) },
+ { NULL, 0, 0 }
+};
+
static struct dri2_extension_match dri2_driver_extensions[] = {
{ __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) },
{ __DRI_DRI2, 2, offsetof(struct dri2_egl_display, dri2) },
@@ -385,13 +391,13 @@ dri2_bind_extensions(struct dri2_egl_display *dri2_dpy,
void *field;
for (i = 0; extensions[i]; i++) {
- _eglLog(_EGL_DEBUG, "DRI2: found extension `%s'", extensions[i]->name);
+ _eglLog(_EGL_DEBUG, "found extension `%s'", extensions[i]->name);
for (j = 0; matches[j].name; j++) {
if (strcmp(extensions[i]->name, matches[j].name) == 0 &&
extensions[i]->version >= matches[j].version) {
field = ((char *) dri2_dpy + matches[j].offset);
*(const __DRIextension **) field = extensions[i];
- _eglLog(_EGL_INFO, "DRI2: found extension %s version %d",
+ _eglLog(_EGL_INFO, "found extension %s version %d",
extensions[i]->name, extensions[i]->version);
}
}
@@ -400,7 +406,7 @@ dri2_bind_extensions(struct dri2_egl_display *dri2_dpy,
for (j = 0; matches[j].name; j++) {
field = ((char *) dri2_dpy + matches[j].offset);
if (*(const __DRIextension **) field == NULL) {
- _eglLog(_EGL_WARNING, "DRI2: did not find extension %s version %d",
+ _eglLog(_EGL_WARNING, "did not find extension %s version %d",
matches[j].name, matches[j].version);
ret = EGL_FALSE;
}
@@ -494,6 +500,25 @@ dri2_open_driver(_EGLDisplay *disp)
}
EGLBoolean
+dri2_load_driver_dri3(_EGLDisplay *disp)
+{
+ struct dri2_egl_display *dri2_dpy = disp->DriverData;
+ const __DRIextension **extensions;
+
+ extensions = dri2_open_driver(disp);
+ if (!extensions)
+ return EGL_FALSE;
+
+ if (!dri2_bind_extensions(dri2_dpy, dri3_driver_extensions, extensions)) {
+ dlclose(dri2_dpy->driver);
+ return EGL_FALSE;
+ }
+ dri2_dpy->driver_extensions = extensions;
+
+ return EGL_TRUE;
+}
+
+EGLBoolean
dri2_load_driver(_EGLDisplay *disp)
{
struct dri2_egl_display *dri2_dpy = disp->DriverData;
@@ -550,7 +575,9 @@ dri2_setup_screen(_EGLDisplay *disp)
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
unsigned int api_mask;
- if (dri2_dpy->dri2) {
+ if (dri2_dpy->image_driver) {
+ api_mask = dri2_dpy->image_driver->getAPIMask(dri2_dpy->dri_screen);
+ } else if (dri2_dpy->dri2) {
api_mask = dri2_dpy->dri2->getAPIMask(dri2_dpy->dri_screen);
} else {
assert(dri2_dpy->swrast);
@@ -570,7 +597,7 @@ dri2_setup_screen(_EGLDisplay *disp)
if (api_mask & (1 << __DRI_API_GLES3))
disp->ClientAPIs |= EGL_OPENGL_ES3_BIT_KHR;
- assert(dri2_dpy->dri2 || dri2_dpy->swrast);
+ assert(dri2_dpy->image_driver || dri2_dpy->dri2 || dri2_dpy->swrast);
disp->Extensions.KHR_surfaceless_context = EGL_TRUE;
disp->Extensions.MESA_configless_context = EGL_TRUE;
@@ -578,7 +605,8 @@ dri2_setup_screen(_EGLDisplay *disp)
__DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
disp->Extensions.KHR_gl_colorspace = EGL_TRUE;
- if ((dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) ||
+ if (dri2_dpy->image_driver ||
+ (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) ||
(dri2_dpy->swrast && dri2_dpy->swrast->base.version >= 3)) {
disp->Extensions.KHR_create_context = EGL_TRUE;
@@ -641,7 +669,14 @@ dri2_create_screen(_EGLDisplay *disp)
dri2_dpy = disp->DriverData;
- if (dri2_dpy->dri2) {
+ if (dri2_dpy->image_driver) {
+ dri2_dpy->dri_screen =
+ dri2_dpy->image_driver->createNewScreen2(0, dri2_dpy->fd,
+ dri2_dpy->extensions,
+ dri2_dpy->driver_extensions,
+ &dri2_dpy->driver_configs,
+ disp);
+ } else if (dri2_dpy->dri2) {
if (dri2_dpy->dri2->base.version >= 4) {
dri2_dpy->dri_screen =
dri2_dpy->dri2->createNewScreen2(0, dri2_dpy->fd,
@@ -677,7 +712,7 @@ dri2_create_screen(_EGLDisplay *disp)
extensions = dri2_dpy->core->getExtensions(dri2_dpy->dri_screen);
- if (dri2_dpy->dri2) {
+ if (dri2_dpy->image_driver || dri2_dpy->dri2) {
if (!dri2_bind_extensions(dri2_dpy, dri2_core_extensions, extensions))
goto cleanup_dri_screen;
} else {
@@ -1024,7 +1059,26 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
else
dri_config = NULL;
- if (dri2_dpy->dri2) {
+ if (dri2_dpy->image_driver) {
+ unsigned error;
+ unsigned num_attribs = 8;
+ uint32_t ctx_attribs[8];
+
+ if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+ &num_attribs))
+ goto cleanup;
+
+ dri2_ctx->dri_context =
+ dri2_dpy->image_driver->createContextAttribs(dri2_dpy->dri_screen,
+ api,
+ dri_config,
+ shared,
+ num_attribs / 2,
+ ctx_attribs,
+ & error,
+ dri2_ctx);
+ dri2_create_context_attribs_error(error);
+ } else if (dri2_dpy->dri2) {
if (dri2_dpy->dri2->base.version >= 3) {
unsigned error;
unsigned num_attribs = 8;
@@ -1119,11 +1173,10 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
{
struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
- struct dri2_egl_surface *dri2_dsurf = dri2_egl_surface(dsurf);
- struct dri2_egl_surface *dri2_rsurf = dri2_egl_surface(rsurf);
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
_EGLContext *old_ctx;
_EGLSurface *old_dsurf, *old_rsurf;
+ _EGLSurface *tmp_dsurf, *tmp_rsurf;
__DRIdrawable *ddraw, *rdraw;
__DRIcontext *cctx;
@@ -1135,8 +1188,8 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
if (old_ctx && dri2_drv->glFlush)
dri2_drv->glFlush();
- ddraw = (dri2_dsurf) ? dri2_dsurf->dri_drawable : NULL;
- rdraw = (dri2_rsurf) ? dri2_rsurf->dri_drawable : NULL;
+ ddraw = (dsurf) ? dri2_dpy->vtbl->get_dri_drawable(dsurf) : NULL;
+ rdraw = (rsurf) ? dri2_dpy->vtbl->get_dri_drawable(rsurf) : NULL;
cctx = (dri2_ctx) ? dri2_ctx->dri_context : NULL;
if (old_ctx) {
@@ -1156,10 +1209,10 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
return EGL_TRUE;
} else {
/* undo the previous _eglBindContext */
- _eglBindContext(old_ctx, old_dsurf, old_rsurf, &ctx, &dsurf, &rsurf);
+ _eglBindContext(old_ctx, old_dsurf, old_rsurf, &ctx, &tmp_dsurf, &tmp_rsurf);
assert(&dri2_ctx->base == ctx &&
- &dri2_dsurf->base == dsurf &&
- &dri2_rsurf->base == rsurf);
+ tmp_dsurf == dsurf &&
+ tmp_rsurf == rsurf);
_eglPutSurface(dsurf);
_eglPutSurface(rsurf);
@@ -1173,6 +1226,14 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
}
}
+__DRIdrawable *
+dri2_surface_get_dri_drawable(_EGLSurface *surf)
+{
+ struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
+
+ return dri2_surf->dri_drawable;
+}
+
/*
* Called from eglGetProcAddress() via drv->API.GetProcAddress().
*/
@@ -1235,7 +1296,7 @@ void
dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
- struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
+ __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(draw);
if (dri2_dpy->flush) {
if (dri2_dpy->flush->base.version >= 4) {
@@ -1253,12 +1314,12 @@ dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw)
* after calling eglSwapBuffers."
*/
dri2_dpy->flush->flush_with_flags(dri2_ctx->dri_context,
- dri2_surf->dri_drawable,
+ dri_drawable,
__DRI2_FLUSH_DRAWABLE |
__DRI2_FLUSH_INVALIDATE_ANCILLARY,
__DRI2_THROTTLE_SWAPBUFFER);
} else {
- dri2_dpy->flush->flush(dri2_surf->dri_drawable);
+ dri2_dpy->flush->flush(dri_drawable);
}
}
}
@@ -1315,7 +1376,8 @@ static EGLBoolean
dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
- struct dri2_egl_surface *dri2_surf = dri2_egl_surface(ctx->DrawSurface);
+ _EGLSurface *surf = ctx->DrawSurface;
+ __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf);
(void) drv;
@@ -1323,7 +1385,7 @@ dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
* we need to copy fake to real here.*/
if (dri2_dpy->flush != NULL)
- dri2_dpy->flush->flush(dri2_surf->dri_drawable);
+ dri2_dpy->flush->flush(dri_drawable);
return EGL_TRUE;
}
@@ -1346,10 +1408,10 @@ dri2_bind_tex_image(_EGLDriver *drv,
_EGLDisplay *disp, _EGLSurface *surf, EGLint buffer)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
- struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
struct dri2_egl_context *dri2_ctx;
_EGLContext *ctx;
GLint format, target;
+ __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf);
ctx = _eglGetCurrentContext();
dri2_ctx = dri2_egl_context(ctx);
@@ -1357,7 +1419,7 @@ dri2_bind_tex_image(_EGLDriver *drv,
if (!_eglBindTexImage(drv, disp, surf, buffer))
return EGL_FALSE;
- switch (dri2_surf->base.TextureFormat) {
+ switch (surf->TextureFormat) {
case EGL_TEXTURE_RGB:
format = __DRI_TEXTURE_FORMAT_RGB;
break;
@@ -1369,7 +1431,7 @@ dri2_bind_tex_image(_EGLDriver *drv,
format = __DRI_TEXTURE_FORMAT_RGBA;
}
- switch (dri2_surf->base.TextureTarget) {
+ switch (surf->TextureTarget) {
case EGL_TEXTURE_2D:
target = GL_TEXTURE_2D;
break;
@@ -1380,7 +1442,7 @@ dri2_bind_tex_image(_EGLDriver *drv,
(*dri2_dpy->tex_buffer->setTexBuffer2)(dri2_ctx->dri_context,
target, format,
- dri2_surf->dri_drawable);
+ dri_drawable);
return EGL_TRUE;
}
@@ -1390,10 +1452,10 @@ dri2_release_tex_image(_EGLDriver *drv,
_EGLDisplay *disp, _EGLSurface *surf, EGLint buffer)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
- struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
struct dri2_egl_context *dri2_ctx;
_EGLContext *ctx;
GLint target;
+ __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf);
ctx = _eglGetCurrentContext();
dri2_ctx = dri2_egl_context(ctx);
@@ -1401,7 +1463,7 @@ dri2_release_tex_image(_EGLDriver *drv,
if (!_eglReleaseTexImage(drv, disp, surf, buffer))
return EGL_FALSE;
- switch (dri2_surf->base.TextureTarget) {
+ switch (surf->TextureTarget) {
case EGL_TEXTURE_2D:
target = GL_TEXTURE_2D;
break;
@@ -1413,7 +1475,7 @@ dri2_release_tex_image(_EGLDriver *drv,
dri2_dpy->tex_buffer->releaseTexBuffer != NULL) {
(*dri2_dpy->tex_buffer->releaseTexBuffer)(dri2_ctx->dri_context,
target,
- dri2_surf->dri_drawable);
+ dri_drawable);
}
return EGL_TRUE;
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index 0e837b3eb8b..52ad92b182d 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -35,6 +35,10 @@
#include <xcb/dri2.h>
#include <xcb/xfixes.h>
#include <X11/Xlib-xcb.h>
+
+#ifdef HAVE_DRI3
+#include "loader_dri3_helper.h"
+#endif
#endif
#ifdef HAVE_WAYLAND_PLATFORM
@@ -145,6 +149,8 @@ struct dri2_egl_display_vtbl {
EGLBoolean (*get_sync_values)(_EGLDisplay *display, _EGLSurface *surface,
EGLuint64KHR *ust, EGLuint64KHR *msc,
EGLuint64KHR *sbc);
+
+ __DRIdrawable *(*get_dri_drawable)(_EGLSurface *surf);
};
struct dri2_egl_display
@@ -158,6 +164,7 @@ struct dri2_egl_display
const __DRIconfig **driver_configs;
void *driver;
const __DRIcoreExtension *core;
+ const __DRIimageDriverExtension *image_driver;
const __DRIdri2Extension *dri2;
const __DRIswrastExtension *swrast;
const __DRI2flushExtension *flush;
@@ -190,6 +197,9 @@ struct dri2_egl_display
#ifdef HAVE_X11_PLATFORM
xcb_connection_t *conn;
int screen;
+#ifdef HAVE_DRI3
+ struct loader_dri3_extensions loader_dri3_ext;
+#endif
#endif
#ifdef HAVE_WAYLAND_PLATFORM
@@ -203,8 +213,9 @@ struct dri2_egl_display
int formats;
uint32_t capabilities;
int is_render_node;
- int is_different_gpu;
#endif
+
+ int is_different_gpu;
};
struct dri2_egl_context
@@ -325,8 +336,14 @@ EGLBoolean
dri2_load_driver_swrast(_EGLDisplay *disp);
EGLBoolean
+dri2_load_driver_dri3(_EGLDisplay *disp);
+
+EGLBoolean
dri2_create_screen(_EGLDisplay *disp);
+__DRIdrawable *
+dri2_surface_get_dri_drawable(_EGLSurface *surf);
+
__DRIimage *
dri2_lookup_egl_image(__DRIscreen *screen, void *image, void *data);
diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c
index 4abe82f63a0..8f3abcb9867 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -650,6 +650,7 @@ static struct dri2_egl_display_vtbl droid_display_vtbl = {
.query_buffer_age = dri2_fallback_query_buffer_age,
.create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
.get_sync_values = dri2_fallback_get_sync_values,
+ .get_dri_drawable = dri2_surface_get_dri_drawable,
};
EGLBoolean
diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c
index 815d2674cb2..3f4f7e78190 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -594,6 +594,7 @@ static struct dri2_egl_display_vtbl dri2_drm_display_vtbl = {
.query_buffer_age = dri2_drm_query_buffer_age,
.create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
.get_sync_values = dri2_fallback_get_sync_values,
+ .get_dri_drawable = dri2_surface_get_dri_drawable,
};
EGLBoolean
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index a635c758da1..c2438f7509b 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1025,6 +1025,7 @@ static struct dri2_egl_display_vtbl dri2_wl_display_vtbl = {
.query_buffer_age = dri2_wl_query_buffer_age,
.create_wayland_buffer_from_image = dri2_wl_create_wayland_buffer_from_image,
.get_sync_values = dri2_fallback_get_sync_values,
+ .get_dri_drawable = dri2_surface_get_dri_drawable,
};
static EGLBoolean
@@ -1752,6 +1753,7 @@ static struct dri2_egl_display_vtbl dri2_wl_swrast_display_vtbl = {
.query_buffer_age = dri2_fallback_query_buffer_age,
.create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
.get_sync_values = dri2_fallback_get_sync_values,
+ .get_dri_drawable = dri2_surface_get_dri_drawable,
};
static EGLBoolean
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index 88a06a8c6a8..08cbf2d8393 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -45,6 +45,10 @@
#include "egl_dri2_fallbacks.h"
#include "loader.h"
+#ifdef HAVE_DRI3
+#include "platform_x11_dri3.h"
+#endif
+
static EGLBoolean
dri2_x11_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
EGLint interval);
@@ -703,7 +707,7 @@ dri2_x11_local_authenticate(_EGLDisplay *disp)
static EGLBoolean
dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
- _EGLDisplay *disp)
+ _EGLDisplay *disp, bool supports_preserved)
{
xcb_screen_iterator_t s;
xcb_depth_iterator_t d;
@@ -724,8 +728,10 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
surface_type =
EGL_WINDOW_BIT |
EGL_PIXMAP_BIT |
- EGL_PBUFFER_BIT |
- EGL_SWAP_BEHAVIOR_PRESERVED_BIT;
+ EGL_PBUFFER_BIT;
+
+ if (supports_preserved)
+ surface_type |= EGL_SWAP_BEHAVIOR_PRESERVED_BIT;
while (d.rem > 0) {
EGLBoolean class_added[6] = { 0, };
@@ -1112,6 +1118,7 @@ static struct dri2_egl_display_vtbl dri2_x11_swrast_display_vtbl = {
.query_buffer_age = dri2_fallback_query_buffer_age,
.create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
.get_sync_values = dri2_fallback_get_sync_values,
+ .get_dri_drawable = dri2_surface_get_dri_drawable,
};
static struct dri2_egl_display_vtbl dri2_x11_display_vtbl = {
@@ -1130,6 +1137,7 @@ static struct dri2_egl_display_vtbl dri2_x11_display_vtbl = {
.query_buffer_age = dri2_fallback_query_buffer_age,
.create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
.get_sync_values = dri2_x11_get_sync_values,
+ .get_dri_drawable = dri2_surface_get_dri_drawable,
};
static EGLBoolean
@@ -1179,7 +1187,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
if (!dri2_create_screen(disp))
goto cleanup_driver;
- if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
+ if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true))
goto cleanup_configs;
/* Fill vtbl last to prevent accidentally calling virtual function during
@@ -1250,6 +1258,100 @@ dri2_x11_setup_swap_interval(struct dri2_egl_display *dri2_dpy)
}
}
+#ifdef HAVE_DRI3
+static EGLBoolean
+dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
+{
+ struct dri2_egl_display *dri2_dpy;
+
+ dri2_dpy = calloc(1, sizeof *dri2_dpy);
+ if (!dri2_dpy)
+ return _eglError(EGL_BAD_ALLOC, "eglInitialize");
+
+ disp->DriverData = (void *) dri2_dpy;
+ if (disp->PlatformDisplay == NULL) {
+ dri2_dpy->conn = xcb_connect(0, &dri2_dpy->screen);
+ dri2_dpy->own_device = true;
+ } else {
+ Display *dpy = disp->PlatformDisplay;
+
+ dri2_dpy->conn = XGetXCBConnection(dpy);
+ dri2_dpy->screen = DefaultScreen(dpy);
+ }
+
+ if (xcb_connection_has_error(dri2_dpy->conn)) {
+ _eglLog(_EGL_WARNING, "DRI3: xcb_connect failed");
+ goto cleanup_dpy;
+ }
+
+ if (dri2_dpy->conn) {
+ if (!dri3_x11_connect(dri2_dpy))
+ goto cleanup_conn;
+ }
+
+ if (!dri2_load_driver_dri3(disp))
+ goto cleanup_conn;
+
+ dri2_dpy->extensions[0] = &dri3_image_loader_extension.base;
+ dri2_dpy->extensions[1] = &use_invalidate.base;
+ dri2_dpy->extensions[2] = &image_lookup_extension.base;
+ dri2_dpy->extensions[3] = NULL;
+
+ dri2_dpy->swap_available = true;
+ dri2_dpy->invalidate_available = true;
+
+ if (!dri2_create_screen(disp))
+ goto cleanup_fd;
+
+ dri2_x11_setup_swap_interval(dri2_dpy);
+
+ if (!dri2_dpy->is_different_gpu)
+ disp->Extensions.KHR_image_pixmap = EGL_TRUE;
+ disp->Extensions.NOK_texture_from_pixmap = EGL_TRUE;
+ disp->Extensions.CHROMIUM_sync_control = EGL_TRUE;
+ disp->Extensions.EXT_buffer_age = EGL_TRUE;
+
+#ifdef HAVE_WAYLAND_PLATFORM
+ disp->Extensions.WL_bind_wayland_display = EGL_TRUE;
+#endif
+
+ if (dri2_dpy->conn) {
+ if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false))
+ goto cleanup_configs;
+ }
+
+ dri2_dpy->loader_dri3_ext.core = dri2_dpy->core;
+ dri2_dpy->loader_dri3_ext.image_driver = dri2_dpy->image_driver;
+ dri2_dpy->loader_dri3_ext.flush = dri2_dpy->flush;
+ dri2_dpy->loader_dri3_ext.tex_buffer = dri2_dpy->tex_buffer;
+ dri2_dpy->loader_dri3_ext.image = dri2_dpy->image;
+ dri2_dpy->loader_dri3_ext.config = dri2_dpy->config;
+
+ /* Fill vtbl last to prevent accidentally calling virtual function during
+ * initialization.
+ */
+ dri2_dpy->vtbl = &dri3_x11_display_vtbl;
+
+ _eglLog(_EGL_INFO, "Using DRI3");
+
+ return EGL_TRUE;
+
+ cleanup_configs:
+ _eglCleanupDisplay(disp);
+ dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
+ dlclose(dri2_dpy->driver);
+ cleanup_fd:
+ close(dri2_dpy->fd);
+ cleanup_conn:
+ if (disp->PlatformDisplay == NULL)
+ xcb_disconnect(dri2_dpy->conn);
+ cleanup_dpy:
+ free(dri2_dpy);
+
+ return EGL_FALSE;
+}
+#endif
+
static EGLBoolean
dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
{
@@ -1321,7 +1423,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
disp->Extensions.WL_bind_wayland_display = EGL_TRUE;
#endif
- if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
+ if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true))
goto cleanup_configs;
/* Fill vtbl last to prevent accidentally calling virtual function during
@@ -1329,6 +1431,8 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
*/
dri2_dpy->vtbl = &dri2_x11_display_vtbl;
+ _eglLog(_EGL_INFO, "Using DRI2");
+
return EGL_TRUE;
cleanup_configs:
@@ -1355,9 +1459,16 @@ dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp)
int x11_dri2_accel = (getenv("LIBGL_ALWAYS_SOFTWARE") == NULL);
if (x11_dri2_accel) {
- if (!dri2_initialize_x11_dri2(drv, disp)) {
- initialized = dri2_initialize_x11_swrast(drv, disp);
+#ifdef HAVE_DRI3
+ if (getenv("LIBGL_DRI3_DISABLE") != NULL ||
+ !dri2_initialize_x11_dri3(drv, disp)) {
+#endif
+ if (!dri2_initialize_x11_dri2(drv, disp)) {
+ initialized = dri2_initialize_x11_swrast(drv, disp);
+ }
+#ifdef HAVE_DRI3
}
+#endif
} else {
initialized = dri2_initialize_x11_swrast(drv, disp);
}
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c
new file mode 100644
index 00000000000..8e4a131b11a
--- /dev/null
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -0,0 +1,547 @@
+/*
+ * Copyright © 2015 Boyan Ding
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <xcb/xcb.h>
+#include <xcb/dri3.h>
+#include <xcb/present.h>
+
+#include <xf86drm.h>
+
+#include "egl_dri2.h"
+#include "egl_dri2_fallbacks.h"
+#include "platform_x11_dri3.h"
+
+#include "loader.h"
+#include "loader_dri3_helper.h"
+
+static struct dri3_egl_surface *
+loader_drawable_to_egl_surface(struct loader_dri3_drawable *draw) {
+ size_t offset = offsetof(struct dri3_egl_surface, loader_drawable);
+ return (struct dri3_egl_surface *)(((void*) draw) - offset);
+}
+
+static int
+egl_dri3_get_swap_interval(struct loader_dri3_drawable *draw)
+{
+ struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+ return dri3_surf->base.SwapInterval;
+}
+
+static int
+egl_dri3_clamp_swap_interval(struct loader_dri3_drawable *draw, int interval)
+{
+ struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+ if (interval > dri3_surf->base.Config->MaxSwapInterval)
+ interval = dri3_surf->base.Config->MaxSwapInterval;
+ else if (interval < dri3_surf->base.Config->MinSwapInterval)
+ interval = dri3_surf->base.Config->MinSwapInterval;
+
+ return interval;
+}
+
+static void
+egl_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
+{
+ struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+ dri3_surf->base.SwapInterval = interval;
+}
+
+static void
+egl_dri3_set_drawable_size(struct loader_dri3_drawable *draw,
+ int width, int height)
+{
+ struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+ dri3_surf->base.Width = width;
+ dri3_surf->base.Height = height;
+}
+
+static bool
+egl_dri3_in_current_context(struct loader_dri3_drawable *draw)
+{
+ struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+ _EGLContext *ctx = _eglGetCurrentContext();
+
+ return ctx->Resource.Display == dri3_surf->base.Resource.Display;
+}
+
+static __DRIcontext *
+egl_dri3_get_dri_context(struct loader_dri3_drawable *draw)
+{
+ _EGLContext *ctx = _eglGetCurrentContext();
+ struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
+
+ return dri2_ctx->dri_context;
+}
+
+static void
+egl_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
+{
+ struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+ _EGLDisplay *disp = dri3_surf->base.Resource.Display;
+
+ dri2_flush_drawable_for_swapbuffers(disp, &dri3_surf->base);
+}
+
+static struct loader_dri3_vtable egl_dri3_vtable = {
+ .get_swap_interval = egl_dri3_get_swap_interval,
+ .clamp_swap_interval = egl_dri3_clamp_swap_interval,
+ .set_swap_interval = egl_dri3_set_swap_interval,
+ .set_drawable_size = egl_dri3_set_drawable_size,
+ .in_current_context = egl_dri3_in_current_context,
+ .get_dri_context = egl_dri3_get_dri_context,
+ .flush_drawable = egl_dri3_flush_drawable,
+ .show_fps = NULL,
+};
+
+static EGLBoolean
+dri3_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
+{
+ struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+ (void) drv;
+
+ if (!_eglPutSurface(surf))
+ return EGL_TRUE;
+
+ loader_dri3_drawable_fini(&dri3_surf->loader_drawable);
+
+ free(surf);
+
+ return EGL_TRUE;
+}
+
+static EGLBoolean
+dri3_set_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
+ EGLint interval)
+{
+ struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+ loader_dri3_set_swap_interval(&dri3_surf->loader_drawable, interval);
+
+ return EGL_TRUE;
+}
+
+static xcb_screen_t *
+get_xcb_screen(xcb_screen_iterator_t iter, int screen)
+{
+ for (; iter.rem; --screen, xcb_screen_next(&iter))
+ if (screen == 0)
+ return iter.data;
+
+ return NULL;
+}
+
+static _EGLSurface *
+dri3_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
+ _EGLConfig *conf, void *native_surface,
+ const EGLint *attrib_list)
+{
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+ struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
+ struct dri3_egl_surface *dri3_surf;
+ const __DRIconfig *dri_config;
+ xcb_drawable_t drawable;
+ xcb_screen_iterator_t s;
+ xcb_screen_t *screen;
+
+ STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
+ drawable = (uintptr_t) native_surface;
+
+ (void) drv;
+
+ dri3_surf = calloc(1, sizeof *dri3_surf);
+ if (!dri3_surf) {
+ _eglError(EGL_BAD_ALLOC, "dri3_create_surface");
+ return NULL;
+ }
+
+ if (!_eglInitSurface(&dri3_surf->base, disp, type, conf, attrib_list))
+ goto cleanup_surf;
+
+ if (type == EGL_PBUFFER_BIT) {
+ s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
+ screen = get_xcb_screen(s, dri2_dpy->screen);
+ if (!screen) {
+ _eglError(EGL_BAD_NATIVE_WINDOW, "dri3_create_surface");
+ goto cleanup_surf;
+ }
+
+ drawable = xcb_generate_id(dri2_dpy->conn);
+ xcb_create_pixmap(dri2_dpy->conn, conf->BufferSize,
+ drawable, screen->root,
+ dri3_surf->base.Width, dri3_surf->base.Height);
+ }
+
+ dri_config = dri2_get_dri_config(dri2_conf, type,
+ dri3_surf->base.GLColorspace);
+
+ if (loader_dri3_drawable_init(dri2_dpy->conn, drawable,
+ dri2_dpy->dri_screen,
+ dri2_dpy->is_different_gpu, dri_config,
+ &dri2_dpy->loader_dri3_ext,
+ &egl_dri3_vtable,
+ &dri3_surf->loader_drawable)) {
+ _eglError(EGL_BAD_ALLOC, "dri3_surface_create");
+ goto cleanup_pixmap;
+ }
+
+ return &dri3_surf->base;
+
+ cleanup_pixmap:
+ if (type == EGL_PBUFFER_BIT)
+ xcb_free_pixmap(dri2_dpy->conn, drawable);
+ cleanup_surf:
+ free(dri3_surf);
+
+ return NULL;
+}
+
+/**
+ * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
+ */
+static _EGLSurface *
+dri3_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
+ _EGLConfig *conf, void *native_window,
+ const EGLint *attrib_list)
+{
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+ _EGLSurface *surf;
+
+ surf = dri3_create_surface(drv, disp, EGL_WINDOW_BIT, conf,
+ native_window, attrib_list);
+ if (surf != NULL)
+ dri3_set_swap_interval(drv, disp, surf, dri2_dpy->default_swap_interval);
+
+ return surf;
+}
+
+static _EGLSurface *
+dri3_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *disp,
+ _EGLConfig *conf, void *native_pixmap,
+ const EGLint *attrib_list)
+{
+ return dri3_create_surface(drv, disp, EGL_PIXMAP_BIT, conf,
+ native_pixmap, attrib_list);
+}
+
+static _EGLSurface *
+dri3_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *disp,
+ _EGLConfig *conf, const EGLint *attrib_list)
+{
+ return dri3_create_surface(drv, disp, EGL_PBUFFER_BIT, conf,
+ XCB_WINDOW_NONE, attrib_list);
+}
+
+static EGLBoolean
+dri3_get_sync_values(_EGLDisplay *display, _EGLSurface *surface,
+ EGLuint64KHR *ust, EGLuint64KHR *msc,
+ EGLuint64KHR *sbc)
+{
+ struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surface);
+
+ return loader_dri3_wait_for_msc(&dri3_surf->loader_drawable, 0, 0, 0,
+ (int64_t *) ust, (int64_t *) msc,
+ (int64_t *) sbc) ? EGL_TRUE : EGL_FALSE;
+}
+
+static _EGLImage *
+dri3_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx,
+ EGLClientBuffer buffer, const EGLint *attr_list)
+{
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+ struct dri2_egl_image *dri2_img;
+ xcb_drawable_t drawable;
+ xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie;
+ xcb_dri3_buffer_from_pixmap_reply_t *bp_reply;
+ unsigned int format;
+
+ drawable = (xcb_drawable_t) (uintptr_t) buffer;
+ bp_cookie = xcb_dri3_buffer_from_pixmap(dri2_dpy->conn, drawable);
+ bp_reply = xcb_dri3_buffer_from_pixmap_reply(dri2_dpy->conn,
+ bp_cookie, NULL);
+ if (!bp_reply) {
+ _eglError(EGL_BAD_ALLOC, "xcb_dri3_buffer_from_pixmap");
+ return NULL;
+ }
+
+ switch (bp_reply->depth) {
+ case 16:
+ format = __DRI_IMAGE_FORMAT_RGB565;
+ break;
+ case 24:
+ format = __DRI_IMAGE_FORMAT_XRGB8888;
+ break;
+ case 32:
+ format = __DRI_IMAGE_FORMAT_ARGB8888;
+ break;
+ default:
+ _eglError(EGL_BAD_PARAMETER,
+ "dri3_create_image_khr: unsupported pixmap depth");
+ free(bp_reply);
+ return EGL_NO_IMAGE_KHR;
+ }
+
+ dri2_img = malloc(sizeof *dri2_img);
+ if (!dri2_img) {
+ _eglError(EGL_BAD_ALLOC, "dri3_create_image_khr");
+ return EGL_NO_IMAGE_KHR;
+ }
+
+ if (!_eglInitImage(&dri2_img->base, disp)) {
+ free(dri2_img);
+ return EGL_NO_IMAGE_KHR;
+ }
+
+ dri2_img->dri_image = loader_dri3_create_image(dri2_dpy->conn,
+ bp_reply,
+ format,
+ dri2_dpy->dri_screen,
+ dri2_dpy->image,
+ dri2_img);
+
+ free(bp_reply);
+
+ return &dri2_img->base;
+}
+
+static _EGLImage *
+dri3_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
+ _EGLContext *ctx, EGLenum target,
+ EGLClientBuffer buffer, const EGLint *attr_list)
+{
+ (void) drv;
+
+ switch (target) {
+ case EGL_NATIVE_PIXMAP_KHR:
+ return dri3_create_image_khr_pixmap(disp, ctx, buffer, attr_list);
+ default:
+ return dri2_create_image_khr(drv, disp, ctx, target, buffer, attr_list);
+ }
+}
+
+/**
+ * Called by the driver when it needs to update the real front buffer with the
+ * contents of its fake front buffer.
+ */
+static void
+dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate)
+{
+ /* There does not seem to be any kind of consensus on whether we should
+ * support front-buffer rendering or not:
+ * http://lists.freedesktop.org/archives/mesa-dev/2013-June/040129.html
+ */
+ _eglLog(_EGL_WARNING, "FIXME: egl/x11 doesn't support front buffer rendering.");
+ (void) driDrawable;
+ (void) loaderPrivate;
+}
+
+const __DRIimageLoaderExtension dri3_image_loader_extension = {
+ .base = { __DRI_IMAGE_LOADER, 1 },
+
+ .getBuffers = loader_dri3_get_buffers,
+ .flushFrontBuffer = dri3_flush_front_buffer,
+};
+
+static EGLBoolean
+dri3_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
+{
+ struct dri3_egl_surface *dri3_surf = dri3_egl_surface(draw);
+
+ /* No-op for a pixmap or pbuffer surface */
+ if (draw->Type == EGL_PIXMAP_BIT || draw->Type == EGL_PBUFFER_BIT)
+ return 0;
+
+ return loader_dri3_swap_buffers_msc(&dri3_surf->loader_drawable,
+ 0, 0, 0, 0,
+ draw->SwapBehavior == EGL_BUFFER_PRESERVED) != -1;
+}
+
+static EGLBoolean
+dri3_copy_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
+ void *native_pixmap_target)
+{
+ struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+ xcb_pixmap_t target;
+
+ STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_pixmap_target));
+ target = (uintptr_t) native_pixmap_target;
+
+ loader_dri3_copy_drawable(&dri3_surf->loader_drawable, target,
+ dri3_surf->loader_drawable.drawable);
+
+ return EGL_TRUE;
+}
+
+static int
+dri3_query_buffer_age(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf)
+{
+ struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+ return loader_dri3_query_buffer_age(&dri3_surf->loader_drawable);
+}
+
+static __DRIdrawable *
+dri3_get_dri_drawable(_EGLSurface *surf)
+{
+ struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+ return dri3_surf->loader_drawable.dri_drawable;
+}
+
+struct dri2_egl_display_vtbl dri3_x11_display_vtbl = {
+ .authenticate = NULL,
+ .create_window_surface = dri3_create_window_surface,
+ .create_pixmap_surface = dri3_create_pixmap_surface,
+ .create_pbuffer_surface = dri3_create_pbuffer_surface,
+ .destroy_surface = dri3_destroy_surface,
+ .create_image = dri3_create_image_khr,
+ .swap_interval = dri3_set_swap_interval,
+ .swap_buffers = dri3_swap_buffers,
+ .swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage,
+ .swap_buffers_region = dri2_fallback_swap_buffers_region,
+ .post_sub_buffer = dri2_fallback_post_sub_buffer,
+ .copy_buffers = dri3_copy_buffers,
+ .query_buffer_age = dri3_query_buffer_age,
+ .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
+ .get_sync_values = dri3_get_sync_values,
+ .get_dri_drawable = dri3_get_dri_drawable,
+};
+
+static char *
+dri3_get_device_name(int fd)
+{
+ char *ret = NULL;
+
+ ret = drmGetRenderDeviceNameFromFd(fd);
+ if (ret)
+ return ret;
+
+ /* For dri3, render node support is required for WL_bind_wayland_display.
+ * In order not to regress on older systems without kernel or libdrm
+ * support, fall back to dri2. User can override it with environment
+ * variable if they don't need to use that extension.
+ */
+ if (getenv("EGL_FORCE_DRI3") == NULL) {
+ _eglLog(_EGL_WARNING, "Render node support not available, falling back to dri2");
+ _eglLog(_EGL_WARNING, "If you want to force dri3, set EGL_FORCE_DRI3 environment variable");
+ } else
+ ret = loader_get_device_name_for_fd(fd);
+
+ return ret;
+}
+
+EGLBoolean
+dri3_x11_connect(struct dri2_egl_display *dri2_dpy)
+{
+ xcb_dri3_query_version_reply_t *dri3_query;
+ xcb_dri3_query_version_cookie_t dri3_query_cookie;
+ xcb_present_query_version_reply_t *present_query;
+ xcb_present_query_version_cookie_t present_query_cookie;
+ xcb_generic_error_t *error;
+ xcb_screen_iterator_t s;
+ xcb_screen_t *screen;
+ const xcb_query_extension_reply_t *extension;
+
+ xcb_prefetch_extension_data (dri2_dpy->conn, &xcb_dri3_id);
+ xcb_prefetch_extension_data (dri2_dpy->conn, &xcb_present_id);
+
+ extension = xcb_get_extension_data(dri2_dpy->conn, &xcb_dri3_id);
+ if (!(extension && extension->present))
+ return EGL_FALSE;
+
+ extension = xcb_get_extension_data(dri2_dpy->conn, &xcb_present_id);
+ if (!(extension && extension->present))
+ return EGL_FALSE;
+
+ dri3_query_cookie = xcb_dri3_query_version(dri2_dpy->conn,
+ XCB_DRI3_MAJOR_VERSION,
+ XCB_DRI3_MINOR_VERSION);
+
+ present_query_cookie = xcb_present_query_version(dri2_dpy->conn,
+ XCB_PRESENT_MAJOR_VERSION,
+ XCB_PRESENT_MINOR_VERSION);
+
+ dri3_query =
+ xcb_dri3_query_version_reply(dri2_dpy->conn, dri3_query_cookie, &error);
+ if (dri3_query == NULL || error != NULL) {
+ _eglLog(_EGL_WARNING, "DRI3: failed to query the version");
+ free(dri3_query);
+ free(error);
+ return EGL_FALSE;
+ }
+ free(dri3_query);
+
+ present_query =
+ xcb_present_query_version_reply(dri2_dpy->conn,
+ present_query_cookie, &error);
+ if (present_query == NULL || error != NULL) {
+ _eglLog(_EGL_WARNING, "DRI3: failed to query Present version");
+ free(present_query);
+ free(error);
+ return EGL_FALSE;
+ }
+ free(present_query);
+
+ s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
+ screen = get_xcb_screen(s, dri2_dpy->screen);
+ if (!screen) {
+ _eglError(EGL_BAD_NATIVE_WINDOW, "dri3_x11_connect");
+ return EGL_FALSE;
+ }
+
+ dri2_dpy->fd = loader_dri3_open(dri2_dpy->conn, screen->root, 0);
+ if (dri2_dpy->fd < 0) {
+ int conn_error = xcb_connection_has_error(dri2_dpy->conn);
+ _eglLog(_EGL_WARNING, "DRI3: Screen seems not DRI3 capable");
+
+ if (conn_error)
+ _eglLog(_EGL_WARNING, "DRI3: Failed to initialize");
+
+ return EGL_FALSE;
+ }
+
+ dri2_dpy->fd = loader_get_user_preferred_fd(dri2_dpy->fd, &dri2_dpy->is_different_gpu);
+
+ dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0);
+ if (!dri2_dpy->driver_name) {
+ _eglLog(_EGL_WARNING, "DRI3: No driver found");
+ close(dri2_dpy->fd);
+ return EGL_FALSE;
+ }
+
+ dri2_dpy->device_name = dri3_get_device_name(dri2_dpy->fd);
+ if (!dri2_dpy->device_name) {
+ close(dri2_dpy->fd);
+ return EGL_FALSE;
+ }
+
+ return EGL_TRUE;
+}
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.h b/src/egl/drivers/dri2/platform_x11_dri3.h
new file mode 100644
index 00000000000..13d85724288
--- /dev/null
+++ b/src/egl/drivers/dri2/platform_x11_dri3.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2015 Boyan Ding
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef EGL_X11_DRI3_INCLUDED
+#define EGL_X11_DRI3_INCLUDED
+
+#include "egl_dri2.h"
+
+_EGL_DRIVER_TYPECAST(dri3_egl_surface, _EGLSurface, obj)
+
+struct dri3_egl_surface {
+ _EGLSurface base;
+ struct loader_dri3_drawable loader_drawable;
+};
+
+extern const __DRIimageLoaderExtension dri3_image_loader_extension;
+extern struct dri2_egl_display_vtbl dri3_x11_display_vtbl;
+
+EGLBoolean
+dri3_x11_connect(struct dri2_egl_display *dri2_dpy);
+
+#endif
diff --git a/src/egl/egl-symbols-check b/src/egl/egl-symbols-check
new file mode 100755
index 00000000000..5d46fed57c9
--- /dev/null
+++ b/src/egl/egl-symbols-check
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+FUNCS=$(nm -D --defined-only ${1-.libs/libEGL.so} | grep -o "T .*" | cut -c 3- | while read func; do
+( grep -q "^$func$" || echo $func ) <<EOF
+eglBindAPI
+eglBindTexImage
+eglChooseConfig
+eglClientWaitSync
+eglCopyBuffers
+eglCreateContext
+eglCreateImage
+eglCreatePbufferFromClientBuffer
+eglCreatePbufferSurface
+eglCreatePixmapSurface
+eglCreatePlatformPixmapSurface
+eglCreatePlatformWindowSurface
+eglCreateSync
+eglCreateWindowSurface
+eglDestroyContext
+eglDestroyImage
+eglDestroySurface
+eglDestroySync
+eglGetConfigAttrib
+eglGetConfigs
+eglGetCurrentContext
+eglGetCurrentDisplay
+eglGetCurrentSurface
+eglGetDisplay
+eglGetError
+eglGetPlatformDisplay
+eglGetProcAddress
+eglGetSyncAttrib
+eglInitialize
+eglMakeCurrent
+eglQueryAPI
+eglQueryContext
+eglQueryString
+eglQuerySurface
+eglReleaseTexImage
+eglReleaseThread
+eglSurfaceAttrib
+eglSwapBuffers
+eglSwapInterval
+eglTerminate
+eglWaitClient
+eglWaitGL
+eglWaitNative
+eglWaitSync
+_fini
+_init
+EOF
+done)
+
+test ! -n "$FUNCS" || echo $FUNCS
+test ! -n "$FUNCS"
diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index 39e064e9538..b406d4a5480 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -27,6 +27,7 @@ GALLIUM_TOP := $(call my-dir)
GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk
SUBDIRS := auxiliary
+SUBDIRS += auxiliary/pipe-loader
#
# Gallium drivers and their respective winsys
diff --git a/src/gallium/Automake.inc b/src/gallium/Automake.inc
index ee07ab6c8f9..6fe2e22fecf 100644
--- a/src/gallium/Automake.inc
+++ b/src/gallium/Automake.inc
@@ -67,3 +67,9 @@ if HAVE_DRISW
GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
$(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
endif
+
+if HAVE_DRISW_KMS
+GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
+ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
+ $(LIBDRM_LIBS)
+endif
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index 611d55fafe2..e42a8f17703 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -5,6 +5,7 @@ SUBDIRS =
##
SUBDIRS += auxiliary
+SUBDIRS += auxiliary/pipe-loader
##
## Gallium pipe drivers and their respective winsys'
@@ -98,7 +99,7 @@ if HAVE_DRISW
SUBDIRS += winsys/sw/dri
endif
-if HAVE_DRI2
+if HAVE_DRISW_KMS
SUBDIRS += winsys/sw/kms-dri
endif
@@ -120,7 +121,8 @@ EXTRA_DIST = \
## Gallium state trackers and their users (targets)
##
-if HAVE_LOADER_GALLIUM
+## XXX: Rename the conditional once we have a config switch for static/dynamic pipe-drivers
+if HAVE_CLOVER
SUBDIRS += targets/pipe-loader
endif
diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index fa5fa6e8734..0c3a3742c16 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -5,6 +5,7 @@ Import('env')
#
SConscript('auxiliary/SConscript')
+SConscript('auxiliary/pipe-loader/SConscript')
#
# Drivers
diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index a728162bd9d..ee296ceda33 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,7 +1,3 @@
-if HAVE_LOADER_GALLIUM
-SUBDIRS := pipe-loader
-endif
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
@@ -66,15 +62,7 @@ COMMON_VL_CFLAGS = \
$(AM_CFLAGS) \
$(VL_CFLAGS) \
$(DRI2PROTO_CFLAGS) \
- $(LIBDRM_CFLAGS) \
- $(GALLIUM_PIPE_LOADER_DEFINES) \
- -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
-
-if HAVE_GALLIUM_STATIC_TARGETS
-COMMON_VL_CFLAGS += \
- -DGALLIUM_STATIC_TARGETS=1
-
-endif # HAVE_GALLIUM_STATIC_TARGETS
+ $(LIBDRM_CFLAGS)
noinst_LTLIBRARIES += libgalliumvl.la
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 82ef5ecfce4..61601920a94 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -219,8 +219,6 @@ C_SOURCES := \
util/u_format.h \
util/u_format_etc.c \
util/u_format_etc.h \
- util/u_format_fake.c \
- util/u_format_fake.h \
util/u_format_latc.c \
util/u_format_latc.h \
util/u_format_other.c \
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 7bda1184ee9..3ee708f4fad 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -536,6 +536,15 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
#if defined(PIPE_ARCH_PPC)
MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
+#if HAVE_LLVM >= 0x0304
+ /*
+ * Make sure VSX instructions are disabled
+ * See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7
+ */
+ if (util_cpu_caps.has_altivec) {
+ MAttrs.push_back("-vsx");
+ }
+#endif
#endif
builder.setMAttrs(MAttrs);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 7d2cd9a9e73..28c7a86316e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -2608,7 +2608,12 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
params.type = bld->bld_base.base.type;
params.sample_key = sample_key;
params.texture_index = unit;
- params.sampler_index = unit;
+ /*
+ * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
+ * and trigger some assertions with d3d10 where the sampler view number
+ * can exceed this.
+ */
+ params.sampler_index = 0;
params.context_ptr = bld->context_ptr;
params.thread_data_ptr = bld->thread_data_ptr;
params.coords = coords;
diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8fa79..efceb85e38d 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -33,6 +33,7 @@
* Set GALLIUM_HUD=help for more info.
*/
+#include <signal.h>
#include <stdio.h>
#include "hud/hud_context.h"
@@ -51,12 +52,15 @@
#include "tgsi/tgsi_text.h"
#include "tgsi/tgsi_dump.h"
+/* Control the visibility of all HUD contexts */
+static boolean huds_visible = TRUE;
struct hud_context {
struct pipe_context *pipe;
struct cso_context *cso;
struct u_upload_mgr *uploader;
+ struct hud_batch_query_context *batch_query;
struct list_head pane_list;
/* states */
@@ -95,6 +99,13 @@ struct hud_context {
} text, bg, whitelines;
};
+#ifdef PIPE_OS_UNIX
+static void
+signal_visible_handler(int sig, siginfo_t *siginfo, void *context)
+{
+ huds_visible = !huds_visible;
+}
+#endif
static void
hud_draw_colored_prims(struct hud_context *hud, unsigned prim,
@@ -441,6 +452,9 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
struct hud_pane *pane;
struct hud_graph *gr;
+ if (!huds_visible)
+ return;
+
hud->fb_width = tex->width0;
hud->fb_height = tex->height0;
hud->constants.two_div_fb_width = 2.0f / hud->fb_width;
@@ -510,6 +524,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud_alloc_vertices(hud, &hud->text, 4 * 512, 4 * sizeof(float));
/* prepare all graphs */
+ hud_batch_query_update(hud->batch_query);
+
LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) {
LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) {
gr->query_new_value(gr);
@@ -903,17 +919,21 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
}
else if (strcmp(name, "samples-passed") == 0 &&
has_occlusion_query(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+ hud_pipe_query_install(&hud->batch_query, pane, hud->pipe,
+ "samples-passed",
PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
PIPE_DRIVER_QUERY_TYPE_UINT64,
- PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+ PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+ 0);
}
else if (strcmp(name, "primitives-generated") == 0 &&
has_streamout(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+ hud_pipe_query_install(&hud->batch_query, pane, hud->pipe,
+ "primitives-generated",
PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
PIPE_DRIVER_QUERY_TYPE_UINT64,
- PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+ PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+ 0);
}
else {
boolean processed = FALSE;
@@ -938,17 +958,19 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
if (strcmp(name, pipeline_statistics_names[i]) == 0)
break;
if (i < Elements(pipeline_statistics_names)) {
- hud_pipe_query_install(pane, hud->pipe, name,
+ hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, name,
PIPE_QUERY_PIPELINE_STATISTICS, i,
0, PIPE_DRIVER_QUERY_TYPE_UINT64,
- PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+ PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+ 0);
processed = TRUE;
}
}
/* driver queries */
if (!processed) {
- if (!hud_driver_query_install(pane, hud->pipe, name)){
+ if (!hud_driver_query_install(&hud->batch_query, pane, hud->pipe,
+ name)) {
fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", name);
}
}
@@ -1125,6 +1147,12 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
struct pipe_sampler_view view_templ;
unsigned i;
const char *env = debug_get_option("GALLIUM_HUD", NULL);
+ unsigned signo = debug_get_num_option("GALLIUM_HUD_TOGGLE_SIGNAL", 0);
+#ifdef PIPE_OS_UNIX
+ static boolean sig_handled = FALSE;
+ struct sigaction action = {};
+#endif
+ huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", TRUE);
if (!env || !*env)
return NULL;
@@ -1267,6 +1295,22 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
LIST_INITHEAD(&hud->pane_list);
+ /* setup sig handler once for all hud contexts */
+#ifdef PIPE_OS_UNIX
+ if (!sig_handled && signo != 0) {
+ action.sa_sigaction = &signal_visible_handler;
+ action.sa_flags = SA_SIGINFO;
+
+ if (signo >= NSIG)
+ fprintf(stderr, "gallium_hud: invalid signal %u\n", signo);
+ else if (sigaction(signo, &action, NULL) < 0)
+ fprintf(stderr, "gallium_hud: unable to set handler for signal %u\n", signo);
+ fflush(stderr);
+
+ sig_handled = TRUE;
+ }
+#endif
+
hud_parse_env_var(hud, env);
return hud;
}
@@ -1287,6 +1331,7 @@ hud_destroy(struct hud_context *hud)
FREE(pane);
}
+ hud_batch_query_cleanup(&hud->batch_query);
pipe->delete_fs_state(pipe, hud->fs_color);
pipe->delete_fs_state(pipe, hud->fs_text);
pipe->delete_vs_state(pipe, hud->vs);
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c b/src/gallium/auxiliary/hud/hud_driver_query.c
index f14305ea835..d7b1f11ed56 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,164 @@
#include "hud/hud_private.h"
#include "pipe/p_screen.h"
#include "os/os_time.h"
+#include "util/u_math.h"
#include "util/u_memory.h"
#include <stdio.h>
+// Must be a power of two
#define NUM_QUERIES 8
+struct hud_batch_query_context {
+ struct pipe_context *pipe;
+ unsigned num_query_types;
+ unsigned allocated_query_types;
+ unsigned *query_types;
+
+ boolean failed;
+ struct pipe_query *query[NUM_QUERIES];
+ union pipe_query_result *result[NUM_QUERIES];
+ unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+ struct pipe_context *pipe;
+
+ if (!bq || bq->failed)
+ return;
+
+ pipe = bq->pipe;
+
+ if (bq->query[bq->head])
+ pipe->end_query(pipe, bq->query[bq->head]);
+
+ bq->results = 0;
+
+ while (bq->pending) {
+ unsigned idx = (bq->head - bq->pending + 1) % NUM_QUERIES;
+ struct pipe_query *query = bq->query[idx];
+
+ if (!bq->result[idx])
+ bq->result[idx] = MALLOC(sizeof(bq->result[idx]->batch[0]) *
+ bq->num_query_types);
+ if (!bq->result[idx]) {
+ fprintf(stderr, "gallium_hud: out of memory.\n");
+ bq->failed = TRUE;
+ return;
+ }
+
+ if (!pipe->get_query_result(pipe, query, FALSE, bq->result[idx]))
+ break;
+
+ ++bq->results;
+ --bq->pending;
+ }
+
+ bq->head = (bq->head + 1) % NUM_QUERIES;
+
+ if (bq->pending == NUM_QUERIES) {
+ fprintf(stderr,
+ "gallium_hud: all queries busy after %i frames, dropping data.\n",
+ NUM_QUERIES);
+
+ assert(bq->query[bq->head]);
+
+ pipe->destroy_query(bq->pipe, bq->query[bq->head]);
+ bq->query[bq->head] = NULL;
+ }
+
+ ++bq->pending;
+
+ if (!bq->query[bq->head]) {
+ bq->query[bq->head] = pipe->create_batch_query(pipe,
+ bq->num_query_types,
+ bq->query_types);
+
+ if (!bq->query[bq->head]) {
+ fprintf(stderr,
+ "gallium_hud: create_batch_query failed. You may have "
+ "selected too many or incompatible queries.\n");
+ bq->failed = TRUE;
+ return;
+ }
+ }
+
+ if (!pipe->begin_query(pipe, bq->query[bq->head])) {
+ fprintf(stderr,
+ "gallium_hud: could not begin batch query. You may have "
+ "selected too many or incompatible queries.\n");
+ bq->failed = TRUE;
+ }
+}
+
+static boolean
+batch_query_add(struct hud_batch_query_context **pbq,
+ struct pipe_context *pipe, unsigned query_type,
+ unsigned *result_index)
+{
+ struct hud_batch_query_context *bq = *pbq;
+ unsigned i;
+
+ if (!bq) {
+ bq = CALLOC_STRUCT(hud_batch_query_context);
+ if (!bq)
+ return false;
+ bq->pipe = pipe;
+ *pbq = bq;
+ }
+
+ for (i = 0; i < bq->num_query_types; ++i) {
+ if (bq->query_types[i] == query_type) {
+ *result_index = i;
+ return true;
+ }
+ }
+
+ if (bq->num_query_types == bq->allocated_query_types) {
+ unsigned new_alloc = MAX2(16, bq->allocated_query_types * 2);
+ unsigned *new_query_types
+ = REALLOC(bq->query_types,
+ bq->allocated_query_types * sizeof(unsigned),
+ new_alloc * sizeof(unsigned));
+ if (!new_query_types)
+ return false;
+ bq->query_types = new_query_types;
+ bq->allocated_query_types = new_alloc;
+ }
+
+ bq->query_types[bq->num_query_types] = query_type;
+ *result_index = bq->num_query_types++;
+ return true;
+}
+
+void
+hud_batch_query_cleanup(struct hud_batch_query_context **pbq)
+{
+ struct hud_batch_query_context *bq = *pbq;
+ unsigned idx;
+
+ if (!bq)
+ return;
+
+ *pbq = NULL;
+
+ if (bq->query[bq->head] && !bq->failed)
+ bq->pipe->end_query(bq->pipe, bq->query[bq->head]);
+
+ for (idx = 0; idx < NUM_QUERIES; ++idx) {
+ if (bq->query[idx])
+ bq->pipe->destroy_query(bq->pipe, bq->query[idx]);
+ FREE(bq->result[idx]);
+ }
+
+ FREE(bq->query_types);
+ FREE(bq);
+}
+
struct query_info {
struct pipe_context *pipe;
+ struct hud_batch_query_context *batch;
unsigned query_type;
unsigned result_index; /* unit depends on query_type */
enum pipe_driver_query_result_type result_type;
@@ -48,7 +199,6 @@ struct query_info {
/* Ring of queries. If a query is busy, we use another slot. */
struct pipe_query *query[NUM_QUERIES];
unsigned head, tail;
- unsigned num_queries;
uint64_t last_time;
uint64_t results_cumulative;
@@ -56,11 +206,26 @@ struct query_info {
};
static void
-query_new_value(struct hud_graph *gr)
+query_new_value_batch(struct query_info *info)
+{
+ struct hud_batch_query_context *bq = info->batch;
+ unsigned result_index = info->result_index;
+ unsigned idx = (bq->head - bq->pending) % NUM_QUERIES;
+ unsigned results = bq->results;
+
+ while (results) {
+ info->results_cumulative += bq->result[idx]->batch[result_index].u64;
+ ++info->num_results;
+
+ --results;
+ idx = (idx - 1) % NUM_QUERIES;
+ }
+}
+
+static void
+query_new_value_normal(struct query_info *info)
{
- struct query_info *info = gr->query_data;
struct pipe_context *pipe = info->pipe;
- uint64_t now = os_time_get();
if (info->last_time) {
if (info->query[info->head])
@@ -107,30 +272,9 @@ query_new_value(struct hud_graph *gr)
break;
}
}
-
- if (info->num_results && info->last_time + gr->pane->period <= now) {
- uint64_t value;
-
- switch (info->result_type) {
- default:
- case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE:
- value = info->results_cumulative / info->num_results;
- break;
- case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE:
- value = info->results_cumulative;
- break;
- }
-
- hud_graph_add_value(gr, value);
-
- info->last_time = now;
- info->results_cumulative = 0;
- info->num_results = 0;
- }
}
else {
/* initialize */
- info->last_time = now;
info->query[info->head] = pipe->create_query(pipe, info->query_type, 0);
}
@@ -139,11 +283,49 @@ query_new_value(struct hud_graph *gr)
}
static void
+query_new_value(struct hud_graph *gr)
+{
+ struct query_info *info = gr->query_data;
+ uint64_t now = os_time_get();
+
+ if (info->batch) {
+ query_new_value_batch(info);
+ } else {
+ query_new_value_normal(info);
+ }
+
+ if (!info->last_time) {
+ info->last_time = now;
+ return;
+ }
+
+ if (info->num_results && info->last_time + gr->pane->period <= now) {
+ uint64_t value;
+
+ switch (info->result_type) {
+ default:
+ case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE:
+ value = info->results_cumulative / info->num_results;
+ break;
+ case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE:
+ value = info->results_cumulative;
+ break;
+ }
+
+ hud_graph_add_value(gr, value);
+
+ info->last_time = now;
+ info->results_cumulative = 0;
+ info->num_results = 0;
+ }
+}
+
+static void
free_query_info(void *ptr)
{
struct query_info *info = ptr;
- if (info->last_time) {
+ if (!info->batch && info->last_time) {
struct pipe_context *pipe = info->pipe;
int i;
@@ -159,11 +341,13 @@ free_query_info(void *ptr)
}
void
-hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
+hud_pipe_query_install(struct hud_batch_query_context **pbq,
+ struct hud_pane *pane, struct pipe_context *pipe,
const char *name, unsigned query_type,
unsigned result_index,
uint64_t max_value, enum pipe_driver_query_type type,
- enum pipe_driver_query_result_type result_type)
+ enum pipe_driver_query_result_type result_type,
+ unsigned flags)
{
struct hud_graph *gr;
struct query_info *info;
@@ -175,28 +359,40 @@ hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
strncpy(gr->name, name, sizeof(gr->name));
gr->name[sizeof(gr->name) - 1] = '\0';
gr->query_data = CALLOC_STRUCT(query_info);
- if (!gr->query_data) {
- FREE(gr);
- return;
- }
+ if (!gr->query_data)
+ goto fail_gr;
gr->query_new_value = query_new_value;
gr->free_query_data = free_query_info;
info = gr->query_data;
info->pipe = pipe;
- info->query_type = query_type;
- info->result_index = result_index;
info->result_type = result_type;
+ if (flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+ if (!batch_query_add(pbq, pipe, query_type, &info->result_index))
+ goto fail_info;
+ info->batch = *pbq;
+ } else {
+ info->query_type = query_type;
+ info->result_index = result_index;
+ }
+
hud_pane_add_graph(pane, gr);
if (pane->max_value < max_value)
hud_pane_set_max_value(pane, max_value);
pane->type = type;
+ return;
+
+fail_info:
+ FREE(info);
+fail_gr:
+ FREE(gr);
}
boolean
-hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe,
+hud_driver_query_install(struct hud_batch_query_context **pbq,
+ struct hud_pane *pane, struct pipe_context *pipe,
const char *name)
{
struct pipe_screen *screen = pipe->screen;
@@ -220,8 +416,9 @@ hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe,
if (!found)
return FALSE;
- hud_pipe_query_install(pane, pipe, query.name, query.query_type, 0,
- query.max_value.u64, query.type, query.result_type);
+ hud_pipe_query_install(pbq, pane, pipe, query.name, query.query_type, 0,
+ query.max_value.u64, query.type, query.result_type,
+ query.flags);
return TRUE;
}
diff --git a/src/gallium/auxiliary/hud/hud_private.h b/src/gallium/auxiliary/hud/hud_private.h
index 01caf7b8b2c..4a788bba456 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -80,19 +80,26 @@ void hud_pane_set_max_value(struct hud_pane *pane, uint64_t value);
void hud_graph_add_value(struct hud_graph *gr, uint64_t value);
/* graphs/queries */
+struct hud_batch_query_context;
+
#define ALL_CPUS ~0 /* optionally set as cpu_index */
int hud_get_num_cpus(void);
void hud_fps_graph_install(struct hud_pane *pane);
void hud_cpu_graph_install(struct hud_pane *pane, unsigned cpu_index);
-void hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
+void hud_pipe_query_install(struct hud_batch_query_context **pbq,
+ struct hud_pane *pane, struct pipe_context *pipe,
const char *name, unsigned query_type,
unsigned result_index,
uint64_t max_value,
enum pipe_driver_query_type type,
- enum pipe_driver_query_result_type result_type);
-boolean hud_driver_query_install(struct hud_pane *pane,
+ enum pipe_driver_query_result_type result_type,
+ unsigned flags);
+boolean hud_driver_query_install(struct hud_batch_query_context **pbq,
+ struct hud_pane *pane,
struct pipe_context *pipe, const char *name);
+void hud_batch_query_update(struct hud_batch_query_context *bq);
+void hud_batch_query_cleanup(struct hud_batch_query_context **pbq);
#endif
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 0539cfc16a1..86c2ffadbc8 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -295,7 +295,7 @@ ttn_emit_declaration(struct ttn_compile *c)
type = nir_type_int;
break;
case TGSI_RETURN_TYPE_UINT:
- type = nir_type_unsigned;
+ type = nir_type_uint;
break;
case TGSI_RETURN_TYPE_FLOAT:
default:
@@ -1239,6 +1239,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
op = nir_texop_tex;
num_srcs = 1;
break;
+ case TGSI_OPCODE_TEX2:
+ op = nir_texop_tex;
+ num_srcs = 1;
+ samp = 2;
+ break;
case TGSI_OPCODE_TXP:
op = nir_texop_tex;
num_srcs = 2;
@@ -1275,6 +1280,10 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
num_srcs = 3;
samp = 3;
break;
+ case TGSI_OPCODE_LODQ:
+ op = nir_texop_lod;
+ num_srcs = 1;
+ break;
default:
fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
@@ -1327,7 +1336,9 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
*/
sview = instr->sampler_index;
- if (sview < c->num_samp_types) {
+ if (op == nir_texop_lod) {
+ instr->dest_type = nir_type_float;
+ } else if (sview < c->num_samp_types) {
instr->dest_type = c->samp_types[sview];
} else {
instr->dest_type = nir_type_float;
@@ -1394,10 +1405,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
}
if (instr->is_shadow) {
- if (instr->coord_components < 3)
- instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
- else
+ if (instr->coord_components == 4)
+ instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
+ else if (instr->coord_components == 3)
instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
+ else
+ instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
instr->src[src_number].src_type = nir_tex_src_comparitor;
src_number++;
@@ -1641,7 +1654,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
[TGSI_OPCODE_TG4] = 0,
- [TGSI_OPCODE_LODQ] = 0, /* XXX */
+ [TGSI_OPCODE_LODQ] = 0,
[TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
[TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
@@ -1650,7 +1663,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_POPC] = nir_op_bit_count,
[TGSI_OPCODE_LSB] = nir_op_find_lsb,
[TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
- [TGSI_OPCODE_UMSB] = nir_op_ifind_msb, /* XXX: signed vs unsigned */
+ [TGSI_OPCODE_UMSB] = nir_op_ufind_msb,
[TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
[TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */
@@ -1803,11 +1816,13 @@ ttn_emit_instruction(struct ttn_compile *c)
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXD:
+ case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXL2:
case TGSI_OPCODE_TXB2:
case TGSI_OPCODE_TXQ_LZ:
case TGSI_OPCODE_TXF:
case TGSI_OPCODE_TG4:
+ case TGSI_OPCODE_LODQ:
ttn_tex(c, dest, src);
break;
diff --git a/src/gallium/auxiliary/os/os_process.c b/src/gallium/auxiliary/os/os_process.c
index a6262283d87..d2dcd0d7fbc 100644
--- a/src/gallium/auxiliary/os/os_process.c
+++ b/src/gallium/auxiliary/os/os_process.c
@@ -54,37 +54,48 @@ boolean
os_get_process_name(char *procname, size_t size)
{
const char *name;
+
+ /* First, check if the GALLIUM_PROCESS_NAME env var is set to
+ * override the normal process name query.
+ */
+ name = os_get_option("GALLIUM_PROCESS_NAME");
+
+ if (!name) {
+ /* do normal query */
+
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
- char szProcessPath[MAX_PATH];
- char *lpProcessName;
- char *lpProcessExt;
+ char szProcessPath[MAX_PATH];
+ char *lpProcessName;
+ char *lpProcessExt;
- GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath));
+ GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath));
- lpProcessName = strrchr(szProcessPath, '\\');
- lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath;
+ lpProcessName = strrchr(szProcessPath, '\\');
+ lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath;
- lpProcessExt = strrchr(lpProcessName, '.');
- if (lpProcessExt) {
- *lpProcessExt = '\0';
- }
+ lpProcessExt = strrchr(lpProcessName, '.');
+ if (lpProcessExt) {
+ *lpProcessExt = '\0';
+ }
- name = lpProcessName;
+ name = lpProcessName;
#elif defined(__GLIBC__) || defined(__CYGWIN__)
- name = program_invocation_short_name;
+ name = program_invocation_short_name;
#elif defined(PIPE_OS_BSD) || defined(PIPE_OS_APPLE)
- /* *BSD and OS X */
- name = getprogname();
+ /* *BSD and OS X */
+ name = getprogname();
#elif defined(PIPE_OS_HAIKU)
- image_info info;
- get_image_info(B_CURRENT_TEAM, &info);
- name = info.name;
+ image_info info;
+ get_image_info(B_CURRENT_TEAM, &info);
+ name = info.name;
#else
#warning unexpected platform in os_process.c
- return FALSE;
+ return FALSE;
#endif
+ }
+
assert(size > 0);
assert(procname);
diff --git a/src/gallium/auxiliary/pipe-loader/Android.mk b/src/gallium/auxiliary/pipe-loader/Android.mk
new file mode 100644
index 00000000000..27893137a1a
--- /dev/null
+++ b/src/gallium/auxiliary/pipe-loader/Android.mk
@@ -0,0 +1,49 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2015 Emil Velikov <[email protected]>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# NOTE: Currently we build only a 'static' pipe-loader
+LOCAL_PATH := $(call my-dir)
+
+# get COMMON_SOURCES and DRM_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_CFLAGS := \
+ -DHAVE_PIPE_LOADER_DRI \
+ -DDROP_PIPE_LOADER_MISC \
+ -DGALLIUM_STATIC_TARGETS
+
+LOCAL_SRC_FILES := $(COMMON_SOURCES)
+
+LOCAL_MODULE := libmesa_pipe_loader
+
+ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DHAVE_LIBDRM
+LOCAL_SRC_FILES += $(DRM_SOURCES)
+
+LOCAL_SHARED_LIBRARIES := libdrm
+LOCAL_STATIC_LIBRARIES := libmesa_loader
+endif
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am
index 8c837996539..8039a957b1b 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -9,20 +9,40 @@ AM_CFLAGS = \
$(GALLIUM_CFLAGS) \
$(VISIBILITY_CFLAGS)
-noinst_LTLIBRARIES = libpipe_loader.la
+noinst_LTLIBRARIES = \
+ libpipe_loader_static.la \
+ libpipe_loader_dynamic.la
-libpipe_loader_la_SOURCES = \
+libpipe_loader_static_la_CFLAGS = \
+ $(AM_CFLAGS) \
+ -DGALLIUM_STATIC_TARGETS=1
+
+libpipe_loader_dynamic_la_CFLAGS = \
+ $(AM_CFLAGS) \
+ -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
+
+libpipe_loader_static_la_SOURCES = \
$(COMMON_SOURCES)
-if HAVE_DRM_LOADER_GALLIUM
+libpipe_loader_dynamic_la_SOURCES = \
+ $(COMMON_SOURCES)
+
+if HAVE_LIBDRM
AM_CFLAGS += \
$(LIBDRM_CFLAGS)
-libpipe_loader_la_SOURCES += \
+libpipe_loader_static_la_SOURCES += \
$(DRM_SOURCES)
-libpipe_loader_la_LIBADD = \
- $(top_builddir)/src/loader/libloader.la
+libpipe_loader_dynamic_la_SOURCES += \
+ $(DRM_SOURCES)
endif
+libpipe_loader_static_la_LIBADD = \
+ $(top_builddir)/src/loader/libloader.la
+
+libpipe_loader_dynamic_la_LIBADD = \
+ $(top_builddir)/src/loader/libloader.la
+
+EXTRA_DIST = SConscript
diff --git a/src/gallium/auxiliary/pipe-loader/SConscript b/src/gallium/auxiliary/pipe-loader/SConscript
new file mode 100644
index 00000000000..c611fb892f8
--- /dev/null
+++ b/src/gallium/auxiliary/pipe-loader/SConscript
@@ -0,0 +1,33 @@
+Import('*')
+
+env = env.Clone()
+
+env.MSVC2008Compat()
+
+env.Append(CPPPATH = [
+ '#/src/loader',
+ '#/src/gallium/winsys',
+])
+
+env.Append(CPPDEFINES = [
+ ('HAVE_PIPE_LOADER_DRI', '1'),
+ ('DROP_PIPE_LOADER_MISC', '1'),
+ ('GALLIUM_STATIC_TARGETS', '1'),
+])
+
+source = env.ParseSourceList('Makefile.sources', 'COMMON_SOURCES')
+
+if env['HAVE_DRM']:
+ source += env.ParseSourceList('Makefile.sources', 'DRM_SOURCES')
+
+ env.PkgUseModules('DRM')
+ env.Append(LIBS = [libloader])
+
+pipe_loader = env.ConvenienceLibrary(
+ target = 'pipe_loader',
+ source = source,
+)
+
+env.Alias('pipe_loader', pipe_loader)
+
+Export('pipe_loader')
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
index 8e79f853b0a..aef996c4617 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
@@ -32,10 +32,15 @@
#include "util/u_string.h"
#include "util/u_dl.h"
+#ifdef _MSC_VER
+#include <stdlib.h>
+#define PATH_MAX _MAX_PATH
+#endif
+
#define MODULE_PREFIX "pipe_"
static int (*backends[])(struct pipe_loader_device **, int) = {
-#ifdef HAVE_PIPE_LOADER_DRM
+#ifdef HAVE_LIBDRM
&pipe_loader_drm_probe,
#endif
&pipe_loader_sw_probe
@@ -69,10 +74,9 @@ pipe_loader_configuration(struct pipe_loader_device *dev,
}
struct pipe_screen *
-pipe_loader_create_screen(struct pipe_loader_device *dev,
- const char *library_paths)
+pipe_loader_create_screen(struct pipe_loader_device *dev)
{
- return dev->ops->create_screen(dev, library_paths);
+ return dev->ops->create_screen(dev);
}
struct util_dl_library *
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
index 9b8712666bb..690d088ed82 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
@@ -82,13 +82,9 @@ pipe_loader_probe(struct pipe_loader_device **devs, int ndev);
* Create a pipe_screen for the specified device.
*
* \param dev Device the screen will be created for.
- * \param library_paths Colon-separated list of filesystem paths that
- * will be used to look for the pipe driver
- * module that handles this device.
*/
struct pipe_screen *
-pipe_loader_create_screen(struct pipe_loader_device *dev,
- const char *library_paths);
+pipe_loader_create_screen(struct pipe_loader_device *dev);
/**
* Query the configuration parameters for the specified device.
@@ -112,8 +108,6 @@ pipe_loader_configuration(struct pipe_loader_device *dev,
void
pipe_loader_release(struct pipe_loader_device **devs, int ndev);
-#ifdef HAVE_PIPE_LOADER_DRI
-
/**
* Initialize sw dri device give the drisw_loader_funcs.
*
@@ -125,7 +119,15 @@ bool
pipe_loader_sw_probe_dri(struct pipe_loader_device **devs,
struct drisw_loader_funcs *drisw_lf);
-#endif
+/**
+ * Initialize a kms backed sw device given an fd.
+ *
+ * This function is platform-specific.
+ *
+ * \sa pipe_loader_probe
+ */
+bool
+pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd);
/**
* Initialize a null sw device.
@@ -158,8 +160,6 @@ boolean
pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev,
struct pipe_screen *screen);
-#ifdef HAVE_PIPE_LOADER_DRM
-
/**
* Get a list of known DRM devices.
*
@@ -180,8 +180,6 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev);
bool
pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd);
-#endif
-
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 1799df7e4c5..994a284385c 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -36,6 +36,7 @@
#include <unistd.h>
#include "loader.h"
+#include "target-helpers/drm_helper_public.h"
#include "state_tracker/drm_driver.h"
#include "pipe_loader_priv.h"
@@ -50,13 +51,123 @@
struct pipe_loader_drm_device {
struct pipe_loader_device base;
+ const struct drm_driver_descriptor *dd;
+#ifndef GALLIUM_STATIC_TARGETS
struct util_dl_library *lib;
+#endif
int fd;
};
#define pipe_loader_drm_device(dev) ((struct pipe_loader_drm_device *)dev)
-static struct pipe_loader_ops pipe_loader_drm_ops;
+static const struct pipe_loader_ops pipe_loader_drm_ops;
+
+#ifdef GALLIUM_STATIC_TARGETS
+static const struct drm_conf_ret throttle_ret = {
+ DRM_CONF_INT,
+ {2},
+};
+
+static const struct drm_conf_ret share_fd_ret = {
+ DRM_CONF_BOOL,
+ {true},
+};
+
+static inline const struct drm_conf_ret *
+configuration_query(enum drm_conf conf)
+{
+ switch (conf) {
+ case DRM_CONF_THROTTLE:
+ return &throttle_ret;
+ case DRM_CONF_SHARE_FD:
+ return &share_fd_ret;
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static const struct drm_driver_descriptor driver_descriptors[] = {
+ {
+ .name = "i915",
+ .driver_name = "i915",
+ .create_screen = pipe_i915_create_screen,
+ .configuration = configuration_query,
+ },
+#ifdef USE_VC4_SIMULATOR
+ /* VC4 simulator and ILO (i965) are mutually exclusive (error at
+ * configure). As the latter is unconditionally added, keep this one above
+ * it.
+ */
+ {
+ .name = "i965",
+ .driver_name = "vc4",
+ .create_screen = pipe_vc4_create_screen,
+ .configuration = configuration_query,
+ },
+#endif
+ {
+ .name = "i965",
+ .driver_name = "i915",
+ .create_screen = pipe_ilo_create_screen,
+ .configuration = configuration_query,
+ },
+ {
+ .name = "nouveau",
+ .driver_name = "nouveau",
+ .create_screen = pipe_nouveau_create_screen,
+ .configuration = configuration_query,
+ },
+ {
+ .name = "r300",
+ .driver_name = "radeon",
+ .create_screen = pipe_r300_create_screen,
+ .configuration = configuration_query,
+ },
+ {
+ .name = "r600",
+ .driver_name = "radeon",
+ .create_screen = pipe_r600_create_screen,
+ .configuration = configuration_query,
+ },
+ {
+ .name = "radeonsi",
+ .driver_name = "radeon",
+ .create_screen = pipe_radeonsi_create_screen,
+ .configuration = configuration_query,
+ },
+ {
+ .name = "vmwgfx",
+ .driver_name = "vmwgfx",
+ .create_screen = pipe_vmwgfx_create_screen,
+ .configuration = configuration_query,
+ },
+ {
+ .name = "kgsl",
+ .driver_name = "freedreno",
+ .create_screen = pipe_freedreno_create_screen,
+ .configuration = configuration_query,
+ },
+ {
+ .name = "msm",
+ .driver_name = "freedreno",
+ .create_screen = pipe_freedreno_create_screen,
+ .configuration = configuration_query,
+ },
+ {
+ .name = "virtio_gpu",
+ .driver_name = "virtio-gpu",
+ .create_screen = pipe_virgl_create_screen,
+ .configuration = configuration_query,
+ },
+ {
+ .name = "vc4",
+ .driver_name = "vc4",
+ .create_screen = pipe_vc4_create_screen,
+ .configuration = configuration_query,
+ },
+};
+#endif
bool
pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
@@ -81,10 +192,36 @@ pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
if (!ddev->base.driver_name)
goto fail;
+#ifdef GALLIUM_STATIC_TARGETS
+ for (int i = 0; i < ARRAY_SIZE(driver_descriptors); i++) {
+ if (strcmp(driver_descriptors[i].name, ddev->base.driver_name) == 0) {
+ ddev->dd = &driver_descriptors[i];
+ break;
+ }
+ }
+ if (!ddev->dd)
+ goto fail;
+#else
+ ddev->lib = pipe_loader_find_module(&ddev->base, PIPE_SEARCH_DIR);
+ if (!ddev->lib)
+ goto fail;
+
+ ddev->dd = (const struct drm_driver_descriptor *)
+ util_dl_get_proc_address(ddev->lib, "driver_descriptor");
+
+ /* sanity check on the name */
+ if (!ddev->dd || strcmp(ddev->dd->name, ddev->base.driver_name) != 0)
+ goto fail;
+#endif
+
*dev = &ddev->base;
return true;
fail:
+#ifndef GALLIUM_STATIC_TARGETS
+ if (ddev->lib)
+ util_dl_close(ddev->lib);
+#endif
FREE(ddev);
return false;
}
@@ -105,8 +242,9 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)
for (i = DRM_RENDER_NODE_MIN_MINOR, j = 0;
i <= DRM_RENDER_NODE_MAX_MINOR; i++) {
- fd = open_drm_render_node_minor(i);
struct pipe_loader_device *dev;
+
+ fd = open_drm_render_node_minor(i);
if (fd < 0)
continue;
@@ -132,8 +270,10 @@ pipe_loader_drm_release(struct pipe_loader_device **dev)
{
struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(*dev);
+#ifndef GALLIUM_STATIC_TARGETS
if (ddev->lib)
util_dl_close(ddev->lib);
+#endif
close(ddev->fd);
FREE(ddev->base.driver_name);
@@ -146,47 +286,22 @@ pipe_loader_drm_configuration(struct pipe_loader_device *dev,
enum drm_conf conf)
{
struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
- const struct drm_driver_descriptor *dd;
-
- if (!ddev->lib)
- return NULL;
-
- dd = (const struct drm_driver_descriptor *)
- util_dl_get_proc_address(ddev->lib, "driver_descriptor");
- /* sanity check on the name */
- if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0)
+ if (!ddev->dd->configuration)
return NULL;
- if (!dd->configuration)
- return NULL;
-
- return dd->configuration(conf);
+ return ddev->dd->configuration(conf);
}
static struct pipe_screen *
-pipe_loader_drm_create_screen(struct pipe_loader_device *dev,
- const char *library_paths)
+pipe_loader_drm_create_screen(struct pipe_loader_device *dev)
{
struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
- const struct drm_driver_descriptor *dd;
-
- if (!ddev->lib)
- ddev->lib = pipe_loader_find_module(dev, library_paths);
- if (!ddev->lib)
- return NULL;
-
- dd = (const struct drm_driver_descriptor *)
- util_dl_get_proc_address(ddev->lib, "driver_descriptor");
-
- /* sanity check on the name */
- if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0)
- return NULL;
- return dd->create_screen(ddev->fd);
+ return ddev->dd->create_screen(ddev->fd);
}
-static struct pipe_loader_ops pipe_loader_drm_ops = {
+static const struct pipe_loader_ops pipe_loader_drm_ops = {
.create_screen = pipe_loader_drm_create_screen,
.configuration = pipe_loader_drm_configuration,
.release = pipe_loader_drm_release
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h
index d3b025221c5..da2ca8c6e1f 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h
@@ -31,8 +31,7 @@
#include "pipe_loader.h"
struct pipe_loader_ops {
- struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev,
- const char *library_paths);
+ struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev);
const struct drm_conf_ret *(*configuration)(struct pipe_loader_device *dev,
enum drm_conf conf);
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 6794930193d..5539a730b4c 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -30,45 +30,160 @@
#include "util/u_memory.h"
#include "util/u_dl.h"
#include "sw/dri/dri_sw_winsys.h"
+#include "sw/kms-dri/kms_dri_sw_winsys.h"
#include "sw/null/null_sw_winsys.h"
#include "sw/wrapper/wrapper_sw_winsys.h"
#include "target-helpers/inline_sw_helper.h"
#include "state_tracker/drisw_api.h"
+#include "state_tracker/sw_driver.h"
struct pipe_loader_sw_device {
struct pipe_loader_device base;
+ const struct sw_driver_descriptor *dd;
+#ifndef GALLIUM_STATIC_TARGETS
struct util_dl_library *lib;
+#endif
struct sw_winsys *ws;
};
#define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev)
-static struct pipe_loader_ops pipe_loader_sw_ops;
+static const struct pipe_loader_ops pipe_loader_sw_ops;
-static struct sw_winsys *(*backends[])() = {
- null_sw_create
+#ifdef GALLIUM_STATIC_TARGETS
+static const struct sw_driver_descriptor driver_descriptors = {
+ .create_screen = sw_screen_create,
+ .winsys = {
+#ifdef HAVE_PIPE_LOADER_DRI
+ {
+ .name = "dri",
+ .create_winsys = dri_create_sw_winsys,
+ },
+#endif
+#ifdef HAVE_PIPE_LOADER_KMS
+ {
+ .name = "kms_dri",
+ .create_winsys = kms_dri_create_winsys,
+ },
+#endif
+/**
+ * XXX: Do not include these two for non autotools builds.
+ * They don't have neither opencl nor nine, where these are used.
+ */
+#ifndef DROP_PIPE_LOADER_MISC
+ {
+ .name = "null",
+ .create_winsys = null_sw_create,
+ },
+ {
+ .name = "wrapped",
+ .create_winsys = wrapper_sw_winsys_wrap_pipe_screen,
+ },
+#endif
+ { 0 },
+ }
};
+#endif
+
+static bool
+pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
+{
+ sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
+ sdev->base.driver_name = "swrast";
+ sdev->base.ops = &pipe_loader_sw_ops;
+
+#ifdef GALLIUM_STATIC_TARGETS
+ sdev->dd = &driver_descriptors;
+ if (!sdev->dd)
+ return false;
+#else
+ sdev->lib = pipe_loader_find_module(&sdev->base, PIPE_SEARCH_DIR);
+ if (!sdev->lib)
+ return false;
+
+ sdev->dd = (const struct sw_driver_descriptor *)
+ util_dl_get_proc_address(sdev->lib, "swrast_driver_descriptor");
+
+ if (!sdev->dd){
+ util_dl_close(sdev->lib);
+ sdev->lib = NULL;
+ return false;
+ }
+#endif
+
+ return true;
+}
+
+static void
+pipe_loader_sw_probe_teardown_common(struct pipe_loader_sw_device *sdev)
+{
+#ifndef GALLIUM_STATIC_TARGETS
+ if (sdev->lib)
+ util_dl_close(sdev->lib);
+#endif
+}
#ifdef HAVE_PIPE_LOADER_DRI
bool
pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf)
{
struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+ int i;
if (!sdev)
return false;
- sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
- sdev->base.driver_name = "swrast";
- sdev->base.ops = &pipe_loader_sw_ops;
- sdev->ws = dri_create_sw_winsys(drisw_lf);
- if (!sdev->ws) {
- FREE(sdev);
- return false;
+ if (!pipe_loader_sw_probe_init_common(sdev))
+ goto fail;
+
+ for (i = 0; sdev->dd->winsys; i++) {
+ if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) {
+ sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf);
+ break;
+ }
}
+ if (!sdev->ws)
+ goto fail;
+
*devs = &sdev->base;
+ return true;
+
+fail:
+ pipe_loader_sw_probe_teardown_common(sdev);
+ FREE(sdev);
+ return false;
+}
+#endif
+
+#ifdef HAVE_PIPE_LOADER_KMS
+bool
+pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd)
+{
+ struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+ int i;
+ if (!sdev)
+ return false;
+
+ if (!pipe_loader_sw_probe_init_common(sdev))
+ goto fail;
+
+ for (i = 0; sdev->dd->winsys; i++) {
+ if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) {
+ sdev->ws = sdev->dd->winsys[i].create_winsys(fd);
+ break;
+ }
+ }
+ if (!sdev->ws)
+ goto fail;
+
+ *devs = &sdev->base;
return true;
+
+fail:
+ pipe_loader_sw_probe_teardown_common(sdev);
+ FREE(sdev);
+ return false;
}
#endif
@@ -76,38 +191,40 @@ bool
pipe_loader_sw_probe_null(struct pipe_loader_device **devs)
{
struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+ int i;
if (!sdev)
return false;
- sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
- sdev->base.driver_name = "swrast";
- sdev->base.ops = &pipe_loader_sw_ops;
- sdev->ws = null_sw_create();
- if (!sdev->ws) {
- FREE(sdev);
- return false;
+ if (!pipe_loader_sw_probe_init_common(sdev))
+ goto fail;
+
+ for (i = 0; sdev->dd->winsys; i++) {
+ if (strcmp(sdev->dd->winsys[i].name, "null") == 0) {
+ sdev->ws = sdev->dd->winsys[i].create_winsys();
+ break;
+ }
}
- *devs = &sdev->base;
+ if (!sdev->ws)
+ goto fail;
+ *devs = &sdev->base;
return true;
+
+fail:
+ pipe_loader_sw_probe_teardown_common(sdev);
+ FREE(sdev);
+ return false;
}
int
pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev)
{
- int i;
-
- for (i = 0; i < Elements(backends); i++) {
- if (i < ndev) {
- struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
- /* TODO: handle CALLOC_STRUCT failure */
+ int i = 1;
- sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
- sdev->base.driver_name = "swrast";
- sdev->base.ops = &pipe_loader_sw_ops;
- sdev->ws = backends[i]();
- devs[i] = &sdev->base;
+ if (i < ndev) {
+ if (!pipe_loader_sw_probe_null(devs)) {
+ i--;
}
}
@@ -119,21 +236,30 @@ pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev,
struct pipe_screen *screen)
{
struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+ int i;
if (!sdev)
return false;
- sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
- sdev->base.driver_name = "swrast";
- sdev->base.ops = &pipe_loader_sw_ops;
- sdev->ws = wrapper_sw_winsys_wrap_pipe_screen(screen);
+ if (!pipe_loader_sw_probe_init_common(sdev))
+ goto fail;
- if (!sdev->ws) {
- FREE(sdev);
- return false;
+ for (i = 0; sdev->dd->winsys; i++) {
+ if (strcmp(sdev->dd->winsys[i].name, "wrapped") == 0) {
+ sdev->ws = sdev->dd->winsys[i].create_winsys(screen);
+ break;
+ }
}
+ if (!sdev->ws)
+ goto fail;
+
*dev = &sdev->base;
return true;
+
+fail:
+ pipe_loader_sw_probe_teardown_common(sdev);
+ FREE(sdev);
+ return false;
}
static void
@@ -141,8 +267,10 @@ pipe_loader_sw_release(struct pipe_loader_device **dev)
{
struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev);
+#ifndef GALLIUM_STATIC_TARGETS
if (sdev->lib)
util_dl_close(sdev->lib);
+#endif
FREE(sdev);
*dev = NULL;
@@ -156,28 +284,19 @@ pipe_loader_sw_configuration(struct pipe_loader_device *dev,
}
static struct pipe_screen *
-pipe_loader_sw_create_screen(struct pipe_loader_device *dev,
- const char *library_paths)
+pipe_loader_sw_create_screen(struct pipe_loader_device *dev)
{
struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev);
- struct pipe_screen *(*init)(struct sw_winsys *);
+ struct pipe_screen *screen;
- if (!sdev->lib)
- sdev->lib = pipe_loader_find_module(dev, library_paths);
- if (!sdev->lib)
- return NULL;
-
- init = (void *)util_dl_get_proc_address(sdev->lib, "swrast_create_screen");
- if (!init){
- util_dl_close(sdev->lib);
- sdev->lib = NULL;
- return NULL;
- }
+ screen = sdev->dd->create_screen(sdev->ws);
+ if (!screen)
+ sdev->ws->destroy(sdev->ws);
- return init(sdev->ws);
+ return screen;
}
-static struct pipe_loader_ops pipe_loader_sw_ops = {
+static const struct pipe_loader_ops pipe_loader_sw_ops = {
.create_screen = pipe_loader_sw_create_screen,
.configuration = pipe_loader_sw_configuration,
.release = pipe_loader_sw_release
diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h
new file mode 100644
index 00000000000..332b1cba984
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@@ -0,0 +1,275 @@
+#ifndef DRM_HELPER_H
+#define DRM_HELPER_H
+
+#include <stdio.h>
+#include "target-helpers/inline_debug_helper.h"
+#include "target-helpers/drm_helper_public.h"
+
+#ifdef GALLIUM_I915
+#include "i915/drm/i915_drm_public.h"
+#include "i915/i915_public.h"
+
+struct pipe_screen *
+pipe_i915_create_screen(int fd)
+{
+ struct i915_winsys *iws;
+ struct pipe_screen *screen;
+
+ iws = i915_drm_winsys_create(fd);
+ if (!iws)
+ return NULL;
+
+ screen = i915_screen_create(iws);
+ return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_i915_create_screen(int fd)
+{
+ fprintf(stderr, "i915g: driver missing\n");
+ return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_ILO
+#include "intel/drm/intel_drm_public.h"
+#include "ilo/ilo_public.h"
+
+struct pipe_screen *
+pipe_ilo_create_screen(int fd)
+{
+ struct intel_winsys *iws;
+ struct pipe_screen *screen;
+
+ iws = intel_winsys_create_for_fd(fd);
+ if (!iws)
+ return NULL;
+
+ screen = ilo_screen_create(iws);
+ return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_ilo_create_screen(int fd)
+{
+ fprintf(stderr, "ilo: driver missing\n");
+ return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_NOUVEAU
+#include "nouveau/drm/nouveau_drm_public.h"
+
+struct pipe_screen *
+pipe_nouveau_create_screen(int fd)
+{
+ struct pipe_screen *screen;
+
+ screen = nouveau_drm_screen_create(fd);
+ return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_nouveau_create_screen(int fd)
+{
+ fprintf(stderr, "nouveau: driver missing\n");
+ return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_R300
+#include "radeon/radeon_winsys.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r300/r300_public.h"
+
+struct pipe_screen *
+pipe_r300_create_screen(int fd)
+{
+ struct radeon_winsys *rw;
+
+ rw = radeon_drm_winsys_create(fd, r300_screen_create);
+ return rw ? debug_screen_wrap(rw->screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_r300_create_screen(int fd)
+{
+ fprintf(stderr, "r300: driver missing\n");
+ return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_R600
+#include "radeon/radeon_winsys.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r600/r600_public.h"
+
+struct pipe_screen *
+pipe_r600_create_screen(int fd)
+{
+ struct radeon_winsys *rw;
+
+ rw = radeon_drm_winsys_create(fd, r600_screen_create);
+ return rw ? debug_screen_wrap(rw->screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_r600_create_screen(int fd)
+{
+ fprintf(stderr, "r600: driver missing\n");
+ return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_RADEONSI
+#include "radeon/radeon_winsys.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "amdgpu/drm/amdgpu_public.h"
+#include "radeonsi/si_public.h"
+
+struct pipe_screen *
+pipe_radeonsi_create_screen(int fd)
+{
+ struct radeon_winsys *rw;
+
+ /* First, try amdgpu. */
+ rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
+
+ if (!rw)
+ rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+
+ return rw ? debug_screen_wrap(rw->screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_radeonsi_create_screen(int fd)
+{
+ fprintf(stderr, "radeonsi: driver missing\n");
+ return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_VMWGFX
+#include "svga/drm/svga_drm_public.h"
+#include "svga/svga_public.h"
+
+struct pipe_screen *
+pipe_vmwgfx_create_screen(int fd)
+{
+ struct svga_winsys_screen *sws;
+ struct pipe_screen *screen;
+
+ sws = svga_drm_winsys_screen_create(fd);
+ if (!sws)
+ return NULL;
+
+ screen = svga_screen_create(sws);
+ return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_vmwgfx_create_screen(int fd)
+{
+ fprintf(stderr, "svga: driver missing\n");
+ return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_FREEDRENO
+#include "freedreno/drm/freedreno_drm_public.h"
+
+struct pipe_screen *
+pipe_freedreno_create_screen(int fd)
+{
+ struct pipe_screen *screen;
+
+ screen = fd_drm_screen_create(fd);
+ return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_freedreno_create_screen(int fd)
+{
+ fprintf(stderr, "freedreno: driver missing\n");
+ return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_VIRGL
+#include "virgl/drm/virgl_drm_public.h"
+#include "virgl/virgl_public.h"
+
+struct pipe_screen *
+pipe_virgl_create_screen(int fd)
+{
+ struct virgl_winsys *vws;
+ struct pipe_screen *screen;
+
+ vws = virgl_drm_winsys_create(fd);
+ if (!vws)
+ return NULL;
+
+ screen = virgl_create_screen(vws);
+ return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_virgl_create_screen(int fd)
+{
+ fprintf(stderr, "virgl: driver missing\n");
+ return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_VC4
+#include "vc4/drm/vc4_drm_public.h"
+
+struct pipe_screen *
+pipe_vc4_create_screen(int fd)
+{
+ struct pipe_screen *screen;
+
+ screen = vc4_drm_screen_create(fd);
+ return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_vc4_create_screen(int fd)
+{
+ fprintf(stderr, "vc4: driver missing\n");
+ return NULL;
+}
+
+#endif
+
+
+#endif /* DRM_HELPER_H */
diff --git a/src/gallium/auxiliary/target-helpers/drm_helper_public.h b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
new file mode 100644
index 00000000000..d1f9382a6f9
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
@@ -0,0 +1,37 @@
+#ifndef _DRM_HELPER_PUBLIC_H
+#define _DRM_HELPER_PUBLIC_H
+
+
+struct pipe_screen;
+
+struct pipe_screen *
+pipe_i915_create_screen(int fd);
+
+struct pipe_screen *
+pipe_ilo_create_screen(int fd);
+
+struct pipe_screen *
+pipe_nouveau_create_screen(int fd);
+
+struct pipe_screen *
+pipe_r300_create_screen(int fd);
+
+struct pipe_screen *
+pipe_r600_create_screen(int fd);
+
+struct pipe_screen *
+pipe_radeonsi_create_screen(int fd);
+
+struct pipe_screen *
+pipe_vmwgfx_create_screen(int fd);
+
+struct pipe_screen *
+pipe_freedreno_create_screen(int fd);
+
+struct pipe_screen *
+pipe_virgl_create_screen(int fd);
+
+struct pipe_screen *
+pipe_vc4_create_screen(int fd);
+
+#endif /* _DRM_HELPER_PUBLIC_H */
diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
deleted file mode 100644
index 6ca4dc8136c..00000000000
--- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+++ /dev/null
@@ -1,531 +0,0 @@
-#ifndef INLINE_DRM_HELPER_H
-#define INLINE_DRM_HELPER_H
-
-#include "state_tracker/drm_driver.h"
-#include "target-helpers/inline_debug_helper.h"
-#include "loader.h"
-#if defined(DRI_TARGET)
-#include "dri_screen.h"
-#endif
-
-#if GALLIUM_SOFTPIPE
-#include "target-helpers/inline_sw_helper.h"
-#include "sw/kms-dri/kms_dri_sw_winsys.h"
-#endif
-
-#if GALLIUM_I915
-#include "i915/drm/i915_drm_public.h"
-#include "i915/i915_public.h"
-#endif
-
-#if GALLIUM_ILO
-#include "intel/drm/intel_drm_public.h"
-#include "ilo/ilo_public.h"
-#endif
-
-#if GALLIUM_NOUVEAU
-#include "nouveau/drm/nouveau_drm_public.h"
-#endif
-
-#if GALLIUM_R300
-#include "radeon/radeon_winsys.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "r300/r300_public.h"
-#endif
-
-#if GALLIUM_R600
-#include "radeon/radeon_winsys.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "r600/r600_public.h"
-#endif
-
-#if GALLIUM_RADEONSI
-#include "radeon/radeon_winsys.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "amdgpu/drm/amdgpu_public.h"
-#include "radeonsi/si_public.h"
-#endif
-
-#if GALLIUM_VMWGFX
-#include "svga/drm/svga_drm_public.h"
-#include "svga/svga_public.h"
-#endif
-
-#if GALLIUM_FREEDRENO
-#include "freedreno/drm/freedreno_drm_public.h"
-#endif
-
-#if GALLIUM_VC4
-#include "vc4/drm/vc4_drm_public.h"
-#endif
-
-#if GALLIUM_VIRGL
-#include "virgl/drm/virgl_drm_public.h"
-#include "virgl/virgl_public.h"
-#endif
-
-static char* driver_name = NULL;
-
-/* XXX: We need to teardown the winsys if *screen_create() fails. */
-
-#if defined(GALLIUM_SOFTPIPE)
-#if defined(DRI_TARGET)
-#if defined(HAVE_LIBDRM)
-
-const __DRIextension **__driDriverGetExtensions_kms_swrast(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_kms_swrast(void)
-{
- globalDriverAPI = &dri_kms_driver_api;
- return galliumdrm_driver_extensions;
-}
-
-struct pipe_screen *
-kms_swrast_create_screen(int fd)
-{
- struct sw_winsys *sws;
- struct pipe_screen *screen;
-
- sws = kms_dri_create_winsys(fd);
- if (!sws)
- return NULL;
-
- screen = sw_screen_create(sws);
- return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-#endif
-#endif
-
-#if defined(GALLIUM_I915)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_i915(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_i915(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_i915_create_screen(int fd)
-{
- struct i915_winsys *iws;
- struct pipe_screen *screen;
-
- iws = i915_drm_winsys_create(fd);
- if (!iws)
- return NULL;
-
- screen = i915_screen_create(iws);
- return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_ILO)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_i965(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_ilo_create_screen(int fd)
-{
- struct intel_winsys *iws;
- struct pipe_screen *screen;
-
- iws = intel_winsys_create_for_fd(fd);
- if (!iws)
- return NULL;
-
- screen = ilo_screen_create(iws);
- return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_NOUVEAU)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_nouveau(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_nouveau(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_nouveau_create_screen(int fd)
-{
- struct pipe_screen *screen;
-
- screen = nouveau_drm_screen_create(fd);
- return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_R300)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_r300(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_r300(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_r300_create_screen(int fd)
-{
- struct radeon_winsys *rw;
-
- rw = radeon_drm_winsys_create(fd, r300_screen_create);
- return rw ? debug_screen_wrap(rw->screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_R600)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_r600(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_r600(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_r600_create_screen(int fd)
-{
- struct radeon_winsys *rw;
-
- rw = radeon_drm_winsys_create(fd, r600_screen_create);
- return rw ? debug_screen_wrap(rw->screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_RADEONSI)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_radeonsi(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_radeonsi(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_radeonsi_create_screen(int fd)
-{
- struct radeon_winsys *rw;
-
- /* First, try amdgpu. */
- rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
-
- if (!rw)
- rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
-
- return rw ? debug_screen_wrap(rw->screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_VMWGFX)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_vmwgfx(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_vmwgfx(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_vmwgfx_create_screen(int fd)
-{
- struct svga_winsys_screen *sws;
- struct pipe_screen *screen;
-
- sws = svga_drm_winsys_screen_create(fd);
- if (!sws)
- return NULL;
-
- screen = svga_screen_create(sws);
- return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_FREEDRENO)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_msm(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_msm(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-
-const __DRIextension **__driDriverGetExtensions_kgsl(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_kgsl(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_freedreno_create_screen(int fd)
-{
- struct pipe_screen *screen;
-
- screen = fd_drm_screen_create(fd);
- return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_VIRGL)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_virtio_gpu(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_virgl_create_screen(int fd)
-{
- struct virgl_winsys *vws;
- struct pipe_screen *screen;
-
- vws = virgl_drm_winsys_create(fd);
- if (!vws)
- return NULL;
-
- screen = virgl_create_screen(vws);
- return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_VC4)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_vc4(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_vc4(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-
-#if defined(USE_VC4_SIMULATOR)
-const __DRIextension **__driDriverGetExtensions_i965(void);
-
-/**
- * When building using the simulator (on x86), we advertise ourselves as the
- * i965 driver so that you can just make a directory with a link from
- * i965_dri.so to the built vc4_dri.so, and point LIBGL_DRIVERS_PATH to that
- * on your i965-using host to run the driver under simulation.
- *
- * This is, of course, incompatible with building with the ilo driver, but you
- * shouldn't be building that anyway.
- */
-PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
-{
- globalDriverAPI = &galliumdrm_driver_api;
- return galliumdrm_driver_extensions;
-}
-#endif
-
-#endif
-
-static struct pipe_screen *
-pipe_vc4_create_screen(int fd)
-{
- struct pipe_screen *screen;
-
- screen = vc4_drm_screen_create(fd);
- return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-inline struct pipe_screen *
-dd_create_screen(int fd)
-{
- driver_name = loader_get_driver_for_fd(fd, _LOADER_GALLIUM);
- if (!driver_name)
- return NULL;
-
-#if defined(GALLIUM_I915)
- if (strcmp(driver_name, "i915") == 0)
- return pipe_i915_create_screen(fd);
- else
-#endif
-#if defined(GALLIUM_ILO)
- if (strcmp(driver_name, "i965") == 0)
- return pipe_ilo_create_screen(fd);
- else
-#endif
-#if defined(GALLIUM_NOUVEAU)
- if (strcmp(driver_name, "nouveau") == 0)
- return pipe_nouveau_create_screen(fd);
- else
-#endif
-#if defined(GALLIUM_R300)
- if (strcmp(driver_name, "r300") == 0)
- return pipe_r300_create_screen(fd);
- else
-#endif
-#if defined(GALLIUM_R600)
- if (strcmp(driver_name, "r600") == 0)
- return pipe_r600_create_screen(fd);
- else
-#endif
-#if defined(GALLIUM_RADEONSI)
- if (strcmp(driver_name, "radeonsi") == 0)
- return pipe_radeonsi_create_screen(fd);
- else
-#endif
-#if defined(GALLIUM_VMWGFX)
- if (strcmp(driver_name, "vmwgfx") == 0)
- return pipe_vmwgfx_create_screen(fd);
- else
-#endif
-#if defined(GALLIUM_FREEDRENO)
- if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0))
- return pipe_freedreno_create_screen(fd);
- else
-#endif
-#if defined(GALLIUM_VIRGL)
- if ((strcmp(driver_name, "virtio_gpu") == 0))
- return pipe_virgl_create_screen(fd);
- else
-#endif
-#if defined(GALLIUM_VC4)
- if (strcmp(driver_name, "vc4") == 0)
- return pipe_vc4_create_screen(fd);
- else
-#if defined(USE_VC4_SIMULATOR)
- if (strcmp(driver_name, "i965") == 0)
- return pipe_vc4_create_screen(fd);
- else
-#endif
-#endif
- return NULL;
-}
-
-inline const char *
-dd_driver_name(void)
-{
- return driver_name;
-}
-
-static const struct drm_conf_ret throttle_ret = {
- DRM_CONF_INT,
- {2},
-};
-
-static const struct drm_conf_ret share_fd_ret = {
- DRM_CONF_BOOL,
- {true},
-};
-
-static inline const struct drm_conf_ret *
-configuration_query(enum drm_conf conf)
-{
- switch (conf) {
- case DRM_CONF_THROTTLE:
- return &throttle_ret;
- case DRM_CONF_SHARE_FD:
- return &share_fd_ret;
- default:
- break;
- }
- return NULL;
-}
-
-inline const struct drm_conf_ret *
-dd_configuration(enum drm_conf conf)
-{
- if (!driver_name)
- return NULL;
-
-#if defined(GALLIUM_I915)
- if (strcmp(driver_name, "i915") == 0)
- return configuration_query(conf);
- else
-#endif
-#if defined(GALLIUM_ILO)
- if (strcmp(driver_name, "i965") == 0)
- return configuration_query(conf);
- else
-#endif
-#if defined(GALLIUM_NOUVEAU)
- if (strcmp(driver_name, "nouveau") == 0)
- return configuration_query(conf);
- else
-#endif
-#if defined(GALLIUM_R300)
- if (strcmp(driver_name, "r300") == 0)
- return configuration_query(conf);
- else
-#endif
-#if defined(GALLIUM_R600)
- if (strcmp(driver_name, "r600") == 0)
- return configuration_query(conf);
- else
-#endif
-#if defined(GALLIUM_RADEONSI)
- if (strcmp(driver_name, "radeonsi") == 0)
- return configuration_query(conf);
- else
-#endif
-#if defined(GALLIUM_VMWGFX)
- if (strcmp(driver_name, "vmwgfx") == 0)
- return configuration_query(conf);
- else
-#endif
-#if defined(GALLIUM_FREEDRENO)
- if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0))
- return configuration_query(conf);
- else
-#endif
-#if defined(GALLIUM_VIRGL)
- if ((strcmp(driver_name, "virtio_gpu") == 0))
- return configuration_query(conf);
- else
-#endif
-#if defined(GALLIUM_VC4)
- if (strcmp(driver_name, "vc4") == 0)
- return configuration_query(conf);
- else
-#if defined(USE_VC4_SIMULATOR)
- if (strcmp(driver_name, "i965") == 0)
- return configuration_query(conf);
- else
-#endif
-#endif
- return NULL;
-}
-#endif /* INLINE_DRM_HELPER_H */
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index f3693fb1f39..a9ab16f2b54 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -69,69 +69,4 @@ sw_screen_create(struct sw_winsys *winsys)
return sw_screen_create_named(winsys, driver);
}
-#if defined(GALLIUM_SOFTPIPE)
-#if defined(DRI_TARGET)
-#include "target-helpers/inline_debug_helper.h"
-#include "sw/dri/dri_sw_winsys.h"
-#include "dri_screen.h"
-
-const __DRIextension **__driDriverGetExtensions_swrast(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void)
-{
- globalDriverAPI = &galliumsw_driver_api;
- return galliumsw_driver_extensions;
-}
-
-inline struct pipe_screen *
-drisw_create_screen(struct drisw_loader_funcs *lf)
-{
- struct sw_winsys *winsys = NULL;
- struct pipe_screen *screen = NULL;
-
- winsys = dri_create_sw_winsys(lf);
- if (winsys == NULL)
- return NULL;
-
- screen = sw_screen_create(winsys);
- if (screen == NULL) {
- winsys->destroy(winsys);
- return NULL;
- }
-
- screen = debug_screen_wrap(screen);
- return screen;
-}
-#endif // DRI_TARGET
-
-#if defined(NINE_TARGET)
-#include "sw/wrapper/wrapper_sw_winsys.h"
-#include "target-helpers/inline_debug_helper.h"
-
-extern struct pipe_screen *ninesw_create_screen(struct pipe_screen *screen);
-
-inline struct pipe_screen *
-ninesw_create_screen(struct pipe_screen *pscreen)
-{
- struct sw_winsys *winsys = NULL;
- struct pipe_screen *screen = NULL;
-
- winsys = wrapper_sw_winsys_wrap_pipe_screen(pscreen);
- if (winsys == NULL)
- return NULL;
-
- screen = sw_screen_create(winsys);
- if (screen == NULL) {
- winsys->destroy(winsys);
- return NULL;
- }
-
- screen = debug_screen_wrap(screen);
- return screen;
-}
-#endif // NINE_TARGET
-
-#endif // GALLIUM_SOFTPIPE
-
-
#endif
diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c
index aca435d6cad..9b97d8dc4b9 100644
--- a/src/gallium/auxiliary/util/u_dl.c
+++ b/src/gallium/auxiliary/util/u_dl.c
@@ -45,7 +45,7 @@ struct util_dl_library *
util_dl_open(const char *filename)
{
#if defined(PIPE_OS_UNIX)
- return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL);
+ return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_LOCAL);
#elif defined(PIPE_OS_WINDOWS)
return (struct util_dl_library *)LoadLibraryA(filename);
#else
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index d3b77e6b99b..c26d7331d4c 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -202,6 +202,36 @@ PIPE_FORMAT_BPTC_SRGBA , bptc, 4, 4, x128, , , , xyzw, sr
PIPE_FORMAT_BPTC_RGB_FLOAT , bptc, 4, 4, x128, , , , xyz1, rgb
PIPE_FORMAT_BPTC_RGB_UFLOAT , bptc, 4, 4, x128, , , , xyz1, rgb
+PIPE_FORMAT_ASTC_4x4 , astc, 4, 4, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_5x4 , astc, 5, 4, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_5x5 , astc, 5, 5, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_6x5 , astc, 6, 5, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_6x6 , astc, 6, 6, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_8x5 , astc, 8, 5, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_8x6 , astc, 8, 6, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_8x8 , astc, 8, 8, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_10x5 , astc,10, 5, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_10x6 , astc,10, 6, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_10x8 , astc,10, 8, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_10x10 , astc,10,10, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_12x10 , astc,12,10, x128, , , , xyzw, rgb
+PIPE_FORMAT_ASTC_12x12 , astc,12,12, x128, , , , xyzw, rgb
+
+PIPE_FORMAT_ASTC_4x4_SRGB , astc, 4, 4, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_5x4_SRGB , astc, 5, 4, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_5x5_SRGB , astc, 5, 5, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_6x5_SRGB , astc, 6, 5, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_6x6_SRGB , astc, 6, 6, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_8x5_SRGB , astc, 8, 5, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_8x6_SRGB , astc, 8, 6, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_8x8_SRGB , astc, 8, 8, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_10x5_SRGB , astc,10, 5, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_10x6_SRGB , astc,10, 6, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_10x8_SRGB , astc,10, 8, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_10x10_SRGB , astc,10,10, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_12x10_SRGB , astc,12,10, x128, , , , xyzw, srgb
+PIPE_FORMAT_ASTC_12x12_SRGB , astc,12,12, x128, , , , xyzw, srgb
+
# Straightforward D3D10-like formats (also used for
# vertex buffer element description)
#
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index a1b1b28fa41..ffdb864fa83 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -84,9 +84,14 @@ enum util_format_layout {
UTIL_FORMAT_LAYOUT_BPTC = 7,
/**
+ * ASTC
+ */
+ UTIL_FORMAT_LAYOUT_ASTC = 8,
+
+ /**
* Everything else that doesn't fit in any of the above layouts.
*/
- UTIL_FORMAT_LAYOUT_OTHER = 8
+ UTIL_FORMAT_LAYOUT_OTHER = 9
};
@@ -481,6 +486,7 @@ util_format_is_compressed(enum pipe_format format)
case UTIL_FORMAT_LAYOUT_RGTC:
case UTIL_FORMAT_LAYOUT_ETC:
case UTIL_FORMAT_LAYOUT_BPTC:
+ case UTIL_FORMAT_LAYOUT_ASTC:
/* XXX add other formats in the future */
return TRUE;
default:
@@ -924,6 +930,35 @@ util_format_srgb(enum pipe_format format)
return PIPE_FORMAT_B5G6R5_SRGB;
case PIPE_FORMAT_BPTC_RGBA_UNORM:
return PIPE_FORMAT_BPTC_SRGBA;
+ case PIPE_FORMAT_ASTC_4x4:
+ return PIPE_FORMAT_ASTC_4x4_SRGB;
+ case PIPE_FORMAT_ASTC_5x4:
+ return PIPE_FORMAT_ASTC_5x4_SRGB;
+ case PIPE_FORMAT_ASTC_5x5:
+ return PIPE_FORMAT_ASTC_5x5_SRGB;
+ case PIPE_FORMAT_ASTC_6x5:
+ return PIPE_FORMAT_ASTC_6x5_SRGB;
+ case PIPE_FORMAT_ASTC_6x6:
+ return PIPE_FORMAT_ASTC_6x6_SRGB;
+ case PIPE_FORMAT_ASTC_8x5:
+ return PIPE_FORMAT_ASTC_8x5_SRGB;
+ case PIPE_FORMAT_ASTC_8x6:
+ return PIPE_FORMAT_ASTC_8x6_SRGB;
+ case PIPE_FORMAT_ASTC_8x8:
+ return PIPE_FORMAT_ASTC_8x8_SRGB;
+ case PIPE_FORMAT_ASTC_10x5:
+ return PIPE_FORMAT_ASTC_10x5_SRGB;
+ case PIPE_FORMAT_ASTC_10x6:
+ return PIPE_FORMAT_ASTC_10x6_SRGB;
+ case PIPE_FORMAT_ASTC_10x8:
+ return PIPE_FORMAT_ASTC_10x8_SRGB;
+ case PIPE_FORMAT_ASTC_10x10:
+ return PIPE_FORMAT_ASTC_10x10_SRGB;
+ case PIPE_FORMAT_ASTC_12x10:
+ return PIPE_FORMAT_ASTC_12x10_SRGB;
+ case PIPE_FORMAT_ASTC_12x12:
+ return PIPE_FORMAT_ASTC_12x12_SRGB;
+
default:
return PIPE_FORMAT_NONE;
}
@@ -971,6 +1006,34 @@ util_format_linear(enum pipe_format format)
return PIPE_FORMAT_B5G6R5_UNORM;
case PIPE_FORMAT_BPTC_SRGBA:
return PIPE_FORMAT_BPTC_RGBA_UNORM;
+ case PIPE_FORMAT_ASTC_4x4_SRGB:
+ return PIPE_FORMAT_ASTC_4x4;
+ case PIPE_FORMAT_ASTC_5x4_SRGB:
+ return PIPE_FORMAT_ASTC_5x4;
+ case PIPE_FORMAT_ASTC_5x5_SRGB:
+ return PIPE_FORMAT_ASTC_5x5;
+ case PIPE_FORMAT_ASTC_6x5_SRGB:
+ return PIPE_FORMAT_ASTC_6x5;
+ case PIPE_FORMAT_ASTC_6x6_SRGB:
+ return PIPE_FORMAT_ASTC_6x6;
+ case PIPE_FORMAT_ASTC_8x5_SRGB:
+ return PIPE_FORMAT_ASTC_8x5;
+ case PIPE_FORMAT_ASTC_8x6_SRGB:
+ return PIPE_FORMAT_ASTC_8x6;
+ case PIPE_FORMAT_ASTC_8x8_SRGB:
+ return PIPE_FORMAT_ASTC_8x8;
+ case PIPE_FORMAT_ASTC_10x5_SRGB:
+ return PIPE_FORMAT_ASTC_10x5;
+ case PIPE_FORMAT_ASTC_10x6_SRGB:
+ return PIPE_FORMAT_ASTC_10x6;
+ case PIPE_FORMAT_ASTC_10x8_SRGB:
+ return PIPE_FORMAT_ASTC_10x8;
+ case PIPE_FORMAT_ASTC_10x10_SRGB:
+ return PIPE_FORMAT_ASTC_10x10;
+ case PIPE_FORMAT_ASTC_12x10_SRGB:
+ return PIPE_FORMAT_ASTC_12x10;
+ case PIPE_FORMAT_ASTC_12x12_SRGB:
+ return PIPE_FORMAT_ASTC_12x12;
default:
return format;
}
diff --git a/src/gallium/auxiliary/util/u_format_fake.c b/src/gallium/auxiliary/util/u_format_fake.c
deleted file mode 100644
index 77e896d27bd..00000000000
--- a/src/gallium/auxiliary/util/u_format_fake.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include "u_format.h"
-#include "u_format_fake.h"
-
-#define fake(format) \
-void \
-util_format_##format##_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) {assert(0);} \
-\
-void \
-util_format_##format##_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \
-\
-void \
-util_format_##format##_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \
-\
-void \
-util_format_##format##_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \
-\
-void \
-util_format_##format##_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \
-\
-void \
-util_format_##format##_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) {assert(0);}
-
-fake(bptc_rgba_unorm)
-fake(bptc_srgba)
-fake(bptc_rgb_float)
-fake(bptc_rgb_ufloat)
-
-fake(etc2_rgb8)
-fake(etc2_srgb8)
-fake(etc2_rgb8a1)
-fake(etc2_srgb8a1)
-fake(etc2_rgba8)
-fake(etc2_srgba8)
-fake(etc2_r11_unorm)
-fake(etc2_r11_snorm)
-fake(etc2_rg11_unorm)
-fake(etc2_rg11_snorm)
diff --git a/src/gallium/auxiliary/util/u_format_fake.h b/src/gallium/auxiliary/util/u_format_fake.h
deleted file mode 100644
index e6bfd4e1594..00000000000
--- a/src/gallium/auxiliary/util/u_format_fake.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2011 Red Hat Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- **************************************************************************/
-
-#ifndef U_FORMAT_FAKE_H_
-#define U_FORMAT_FAKE_H_
-
-#define __format_fake(format) \
-void \
-util_format_##format##_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); \
-\
-void \
-util_format_##format##_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \
-\
-void \
-util_format_##format##_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \
-\
-void \
-util_format_##format##_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \
-\
-void \
-util_format_##format##_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); \
-\
-void \
-util_format_##format##_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
-
-__format_fake(bptc_rgba_unorm)
-__format_fake(bptc_srgba)
-__format_fake(bptc_rgb_float)
-__format_fake(bptc_rgb_ufloat)
-
-__format_fake(etc2_rgb8)
-__format_fake(etc2_srgb8)
-__format_fake(etc2_rgb8a1)
-__format_fake(etc2_srgb8a1)
-__format_fake(etc2_rgba8)
-__format_fake(etc2_srgba8)
-__format_fake(etc2_r11_unorm)
-__format_fake(etc2_r11_snorm)
-__format_fake(etc2_rg11_unorm)
-__format_fake(etc2_rg11_snorm)
-
-#endif
diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
index fb42de723c4..d4bb1de4cb5 100644
--- a/src/gallium/auxiliary/util/u_format_pack.py
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -686,7 +686,7 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix):
def is_format_hand_written(format):
- return format.layout in ('s3tc', 'rgtc', 'etc', 'bptc', 'subsampled', 'other') or format.colorspace == ZS
+ return format.layout in ('s3tc', 'rgtc', 'etc', 'bptc', 'astc', 'subsampled', 'other') or format.colorspace == ZS
def generate(formats):
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index aceb0caf7e1..879d10ff01d 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -90,7 +90,6 @@ def write_format_table(formats):
print '#include "u_format_rgtc.h"'
print '#include "u_format_latc.h"'
print '#include "u_format_etc.h"'
- print '#include "u_format_fake.h"'
print
u_format_pack.generate(formats)
@@ -139,10 +138,15 @@ def write_format_table(formats):
u_format_pack.print_channels(format, do_channel_array)
u_format_pack.print_channels(format, do_swizzle_array)
print " %s," % (colorspace_map(format.colorspace),)
- if format.colorspace != ZS and not format.is_pure_color():
+ access = True
+ if format.layout in ('bptc', 'astc'):
+ access = False
+ if format.layout == 'etc' and format.short_name() != 'etc1_rgb8':
+ access = False
+ if format.colorspace != ZS and not format.is_pure_color() and access:
print " &util_format_%s_unpack_rgba_8unorm," % format.short_name()
print " &util_format_%s_pack_rgba_8unorm," % format.short_name()
- if format.layout == 's3tc' or format.layout == 'rgtc' or format.layout == 'bptc':
+ if format.layout == 's3tc' or format.layout == 'rgtc':
print " &util_format_%s_fetch_rgba_8unorm," % format.short_name()
else:
print " NULL, /* fetch_rgba_8unorm */"
diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h
index df01917466f..1af7653d650 100644
--- a/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/src/gallium/auxiliary/vl/vl_winsys.h
@@ -42,34 +42,31 @@ struct pipe_loader_device;
struct vl_screen
{
- struct pipe_screen *pscreen;
- struct pipe_loader_device *dev;
-};
+ void (*destroy)(struct vl_screen *vscreen);
-struct vl_screen*
-vl_screen_create(Display *display, int screen);
+ struct pipe_resource *
+ (*texture_from_drawable)(struct vl_screen *vscreen, void *drawable);
-void vl_screen_destroy(struct vl_screen *vscreen);
+ struct u_rect *
+ (*get_dirty_area)(struct vl_screen *vscreen);
-struct pipe_resource*
-vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable);
+ uint64_t
+ (*get_timestamp)(struct vl_screen *vscreen, void *drawable);
-struct u_rect *
-vl_screen_get_dirty_area(struct vl_screen *vscreen);
+ void
+ (*set_next_timestamp)(struct vl_screen *vscreen, uint64_t stamp);
-uint64_t
-vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable);
+ void *
+ (*get_private)(struct vl_screen *vscreen);
-void
-vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp);
+ struct pipe_screen *pscreen;
+ struct pipe_loader_device *dev;
+};
-void*
-vl_screen_get_private(struct vl_screen *vscreen);
+struct vl_screen *
+vl_dri2_screen_create(Display *display, int screen);
-struct vl_screen*
+struct vl_screen *
vl_drm_screen_create(int fd);
-void
-vl_drm_screen_destroy(struct vl_screen *vscreen);
-
#endif
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index 3b1b87f9523..ae0d4cdee1b 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -73,24 +73,27 @@ struct vl_dri_screen
int64_t last_ust, ns_frame, last_msc, next_msc;
};
-static const unsigned int attachments[1] = { XCB_DRI2_ATTACHMENT_BUFFER_BACK_LEFT };
+static const unsigned attachments[1] = { XCB_DRI2_ATTACHMENT_BUFFER_BACK_LEFT };
+
+static void vl_dri2_screen_destroy(struct vl_screen *vscreen);
static void
-vl_dri2_handle_stamps(struct vl_dri_screen* scrn,
+vl_dri2_handle_stamps(struct vl_dri_screen *scrn,
uint32_t ust_hi, uint32_t ust_lo,
uint32_t msc_hi, uint32_t msc_lo)
{
int64_t ust = ((((uint64_t)ust_hi) << 32) | ust_lo) * 1000;
int64_t msc = (((uint64_t)msc_hi) << 32) | msc_lo;
- if (scrn->last_ust && scrn->last_msc && (ust > scrn->last_ust) && (msc > scrn->last_msc))
+ if (scrn->last_ust && (ust > scrn->last_ust) &&
+ scrn->last_msc && (msc > scrn->last_msc))
scrn->ns_frame = (ust - scrn->last_ust) / (msc - scrn->last_msc);
scrn->last_ust = ust;
scrn->last_msc = msc;
}
-static xcb_dri2_get_buffers_reply_t*
+static xcb_dri2_get_buffers_reply_t *
vl_dri2_get_flush_reply(struct vl_dri_screen *scrn)
{
xcb_dri2_wait_sbc_reply_t *wait_sbc_reply;
@@ -120,7 +123,7 @@ vl_dri2_flush_frontbuffer(struct pipe_screen *screen,
unsigned level, unsigned layer,
void *context_private, struct pipe_box *sub_box)
{
- struct vl_dri_screen *scrn = (struct vl_dri_screen*)context_private;
+ struct vl_dri_screen *scrn = (struct vl_dri_screen *)context_private;
uint32_t msc_hi, msc_lo;
assert(screen);
@@ -132,9 +135,11 @@ vl_dri2_flush_frontbuffer(struct pipe_screen *screen,
msc_hi = scrn->next_msc >> 32;
msc_lo = scrn->next_msc & 0xFFFFFFFF;
- scrn->swap_cookie = xcb_dri2_swap_buffers_unchecked(scrn->conn, scrn->drawable, msc_hi, msc_lo, 0, 0, 0, 0);
+ scrn->swap_cookie = xcb_dri2_swap_buffers_unchecked(scrn->conn, scrn->drawable,
+ msc_hi, msc_lo, 0, 0, 0, 0);
scrn->wait_cookie = xcb_dri2_wait_sbc_unchecked(scrn->conn, scrn->drawable, 0, 0);
- scrn->buffers_cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, scrn->drawable, 1, 1, attachments);
+ scrn->buffers_cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, scrn->drawable,
+ 1, 1, attachments);
scrn->flushed = true;
scrn->current_buffer = !scrn->current_buffer;
@@ -170,10 +175,10 @@ vl_dri2_set_drawable(struct vl_dri_screen *scrn, Drawable drawable)
scrn->drawable = drawable;
}
-struct pipe_resource*
-vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable)
+static struct pipe_resource *
+vl_dri2_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable)
{
- struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
+ struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
struct winsys_handle dri2_handle;
struct pipe_resource template, *tex;
@@ -185,11 +190,12 @@ vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable)
assert(scrn);
- vl_dri2_set_drawable(scrn, drawable);
+ vl_dri2_set_drawable(scrn, (Drawable)drawable);
reply = vl_dri2_get_flush_reply(scrn);
if (!reply) {
xcb_dri2_get_buffers_cookie_t cookie;
- cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, drawable, 1, 1, attachments);
+ cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, (Drawable)drawable,
+ 1, 1, attachments);
reply = xcb_dri2_get_buffers_reply(scrn->conn, cookie, NULL);
}
if (!reply)
@@ -241,32 +247,33 @@ vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable)
template.bind = PIPE_BIND_RENDER_TARGET;
template.flags = 0;
- tex = scrn->base.pscreen->resource_from_handle(scrn->base.pscreen, &template, &dri2_handle);
+ tex = scrn->base.pscreen->resource_from_handle(scrn->base.pscreen, &template,
+ &dri2_handle);
free(reply);
return tex;
}
-struct u_rect *
-vl_screen_get_dirty_area(struct vl_screen *vscreen)
+static struct u_rect *
+vl_dri2_screen_get_dirty_area(struct vl_screen *vscreen)
{
- struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
+ struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
assert(scrn);
return &scrn->dirty_areas[scrn->current_buffer];
}
-uint64_t
-vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable)
+static uint64_t
+vl_dri2_screen_get_timestamp(struct vl_screen *vscreen, void *drawable)
{
- struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
+ struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
xcb_dri2_get_msc_cookie_t cookie;
xcb_dri2_get_msc_reply_t *reply;
assert(scrn);
- vl_dri2_set_drawable(scrn, drawable);
+ vl_dri2_set_drawable(scrn, (Drawable)drawable);
if (!scrn->last_ust) {
- cookie = xcb_dri2_get_msc_unchecked(scrn->conn, drawable);
+ cookie = xcb_dri2_get_msc_unchecked(scrn->conn, (Drawable)drawable);
reply = xcb_dri2_get_msc_reply(scrn->conn, cookie, NULL);
if (reply) {
@@ -278,19 +285,20 @@ vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable)
return scrn->last_ust;
}
-void
-vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp)
+static void
+vl_dri2_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp)
{
- struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
+ struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
assert(scrn);
if (stamp && scrn->last_ust && scrn->ns_frame && scrn->last_msc)
- scrn->next_msc = ((int64_t)stamp - scrn->last_ust + scrn->ns_frame/2) / scrn->ns_frame + scrn->last_msc;
+ scrn->next_msc = ((int64_t)stamp - scrn->last_ust + scrn->ns_frame/2) /
+ scrn->ns_frame + scrn->last_msc;
else
scrn->next_msc = 0;
}
-void*
-vl_screen_get_private(struct vl_screen *vscreen)
+static void *
+vl_dri2_screen_get_private(struct vl_screen *vscreen)
{
return vscreen;
}
@@ -305,8 +313,8 @@ get_xcb_screen(xcb_screen_iterator_t iter, int screen)
return NULL;
}
-struct vl_screen*
-vl_screen_create(Display *display, int screen)
+struct vl_screen *
+vl_dri2_screen_create(Display *display, int screen)
{
struct vl_dri_screen *scrn;
const xcb_query_extension_reply_t *extension;
@@ -320,7 +328,7 @@ vl_screen_create(Display *display, int screen)
xcb_generic_error_t *error = NULL;
char *device_name;
int fd, device_name_length;
- unsigned int driverType;
+ unsigned driverType;
drm_magic_t magic;
@@ -340,7 +348,9 @@ vl_screen_create(Display *display, int screen)
if (!(extension && extension->present))
goto free_screen;
- dri2_query_cookie = xcb_dri2_query_version (scrn->conn, XCB_DRI2_MAJOR_VERSION, XCB_DRI2_MINOR_VERSION);
+ dri2_query_cookie = xcb_dri2_query_version (scrn->conn,
+ XCB_DRI2_MAJOR_VERSION,
+ XCB_DRI2_MINOR_VERSION);
dri2_query = xcb_dri2_query_version_reply (scrn->conn, dri2_query_cookie, &error);
if (dri2_query == NULL || error != NULL || dri2_query->minor_version < 2)
goto free_query;
@@ -352,7 +362,7 @@ vl_screen_create(Display *display, int screen)
{
char *prime = getenv("DRI_PRIME");
if (prime) {
- unsigned int primeid;
+ unsigned primeid;
errno = 0;
primeid = strtoul(prime, NULL, 0);
if (errno == 0)
@@ -362,9 +372,12 @@ vl_screen_create(Display *display, int screen)
}
#endif
- connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, driverType);
+ connect_cookie = xcb_dri2_connect_unchecked(scrn->conn,
+ get_xcb_screen(s, screen)->root,
+ driverType);
connect = xcb_dri2_connect_reply(scrn->conn, connect_cookie, NULL);
- if (connect == NULL || connect->driver_name_length + connect->device_name_length == 0)
+ if (connect == NULL ||
+ connect->driver_name_length + connect->device_name_length == 0)
goto free_connect;
device_name_length = xcb_dri2_connect_device_name_length(connect);
@@ -381,22 +394,26 @@ vl_screen_create(Display *display, int screen)
if (drmGetMagic(fd, &magic))
goto free_connect;
- authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, magic);
+ authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn,
+ get_xcb_screen(s, screen)->root,
+ magic);
authenticate = xcb_dri2_authenticate_reply(scrn->conn, authenticate_cookie, NULL);
if (authenticate == NULL || !authenticate->authenticated)
goto free_authenticate;
-#if GALLIUM_STATIC_TARGETS
- scrn->base.pscreen = dd_create_screen(fd);
-#else
- if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd))
- scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev, PIPE_SEARCH_DIR);
-#endif // GALLIUM_STATIC_TARGETS
+ if (pipe_loader_drm_probe_fd(&scrn->base.dev, dup(fd)))
+ scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev);
if (!scrn->base.pscreen)
goto release_pipe;
+ scrn->base.destroy = vl_dri2_screen_destroy;
+ scrn->base.texture_from_drawable = vl_dri2_screen_texture_from_drawable;
+ scrn->base.get_dirty_area = vl_dri2_screen_get_dirty_area;
+ scrn->base.get_timestamp = vl_dri2_screen_get_timestamp;
+ scrn->base.set_next_timestamp = vl_dri2_screen_set_next_timestamp;
+ scrn->base.get_private = vl_dri2_screen_get_private;
scrn->base.pscreen->flush_frontbuffer = vl_dri2_flush_frontbuffer;
vl_compositor_reset_dirty_area(&scrn->dirty_areas[0]);
vl_compositor_reset_dirty_area(&scrn->dirty_areas[1]);
@@ -409,10 +426,8 @@ vl_screen_create(Display *display, int screen)
return &scrn->base;
release_pipe:
-#if !GALLIUM_STATIC_TARGETS
if (scrn->base.dev)
pipe_loader_release(&scrn->base.dev, 1);
-#endif // !GALLIUM_STATIC_TARGETS
free_authenticate:
free(authenticate);
free_connect:
@@ -426,9 +441,10 @@ free_screen:
return NULL;
}
-void vl_screen_destroy(struct vl_screen *vscreen)
+static void
+vl_dri2_screen_destroy(struct vl_screen *vscreen)
{
- struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
+ struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
assert(vscreen);
@@ -440,8 +456,6 @@ void vl_screen_destroy(struct vl_screen *vscreen)
vl_dri2_destroy_drawable(scrn);
scrn->base.pscreen->destroy(scrn->base.pscreen);
-#if !GALLIUM_STATIC_TARGETS
pipe_loader_release(&scrn->base.dev, 1);
-#endif // !GALLIUM_STATIC_TARGETS
FREE(scrn);
}
diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c
index 1167fcf6a90..f993e2c7727 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_drm.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c
@@ -34,7 +34,10 @@
#include "util/u_memory.h"
#include "vl/vl_winsys.h"
-struct vl_screen*
+static void
+vl_drm_screen_destroy(struct vl_screen *vscreen);
+
+struct vl_screen *
vl_drm_screen_create(int fd)
{
struct vl_screen *vscreen;
@@ -43,35 +46,34 @@ vl_drm_screen_create(int fd)
if (!vscreen)
return NULL;
-#if GALLIUM_STATIC_TARGETS
- vscreen->pscreen = dd_create_screen(fd);
-#else
- if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd))) {
- vscreen->pscreen =
- pipe_loader_create_screen(vscreen->dev, PIPE_SEARCH_DIR);
- if (!vscreen->pscreen)
- pipe_loader_release(&vscreen->dev, 1);
- }
-#endif
+ if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd)))
+ vscreen->pscreen = pipe_loader_create_screen(vscreen->dev);
- if (!vscreen->pscreen) {
- FREE(vscreen);
- return NULL;
- }
+ if (!vscreen->pscreen)
+ goto error;
+ vscreen->destroy = vl_drm_screen_destroy;
+ vscreen->texture_from_drawable = NULL;
+ vscreen->get_dirty_area = NULL;
+ vscreen->get_timestamp = NULL;
+ vscreen->set_next_timestamp = NULL;
+ vscreen->get_private = NULL;
return vscreen;
+
+error:
+ if (vscreen->dev)
+ pipe_loader_release(&vscreen->dev, 1);
+
+ FREE(vscreen);
+ return NULL;
}
-void
+static void
vl_drm_screen_destroy(struct vl_screen *vscreen)
{
assert(vscreen);
vscreen->pscreen->destroy(vscreen->pscreen);
-
-#if !GALLIUM_STATIC_TARGETS
pipe_loader_release(&vscreen->dev, 1);
-#endif
-
FREE(vscreen);
}
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index ef235734755..77f708f449c 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index b5e1ddadde0..a6940dfefea 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -111,10 +111,14 @@ enum a3xx_vtx_fmt {
VFMT_8_8_SNORM = 53,
VFMT_8_8_8_SNORM = 54,
VFMT_8_8_8_8_SNORM = 55,
- VFMT_10_10_10_2_UINT = 60,
- VFMT_10_10_10_2_UNORM = 61,
- VFMT_10_10_10_2_SINT = 62,
- VFMT_10_10_10_2_SNORM = 63,
+ VFMT_10_10_10_2_UINT = 56,
+ VFMT_10_10_10_2_UNORM = 57,
+ VFMT_10_10_10_2_SINT = 58,
+ VFMT_10_10_10_2_SNORM = 59,
+ VFMT_2_10_10_10_UINT = 60,
+ VFMT_2_10_10_10_UNORM = 61,
+ VFMT_2_10_10_10_SINT = 62,
+ VFMT_2_10_10_10_SNORM = 63,
};
enum a3xx_tex_fmt {
@@ -138,10 +142,12 @@ enum a3xx_tex_fmt {
TFMT_DXT1 = 36,
TFMT_DXT3 = 37,
TFMT_DXT5 = 38,
+ TFMT_2_10_10_10_UNORM = 40,
TFMT_10_10_10_2_UNORM = 41,
TFMT_9_9_9_E5_FLOAT = 42,
TFMT_11_11_10_FLOAT = 43,
TFMT_A8_UNORM = 44,
+ TFMT_L8_UNORM = 45,
TFMT_L8_A8_UNORM = 47,
TFMT_8_UNORM = 48,
TFMT_8_8_UNORM = 49,
@@ -183,6 +189,8 @@ enum a3xx_tex_fmt {
TFMT_32_SINT = 92,
TFMT_32_32_SINT = 93,
TFMT_32_32_32_32_SINT = 95,
+ TFMT_2_10_10_10_UINT = 96,
+ TFMT_10_10_10_2_UINT = 97,
TFMT_ETC2_RG11_SNORM = 112,
TFMT_ETC2_RG11_UNORM = 113,
TFMT_ETC2_R11_SNORM = 114,
@@ -215,6 +223,9 @@ enum a3xx_color_fmt {
RB_R8_UINT = 14,
RB_R8_SINT = 15,
RB_R10G10B10A2_UNORM = 16,
+ RB_A2R10G10B10_UNORM = 17,
+ RB_R10G10B10A2_UINT = 18,
+ RB_A2R10G10B10_UINT = 19,
RB_A8_UNORM = 20,
RB_R8_UNORM = 21,
RB_R16_FLOAT = 24,
@@ -251,25 +262,6 @@ enum a3xx_sp_perfcounter_select {
SP_ALU_ACTIVE_CYCLES = 29,
};
-enum a3xx_rop_code {
- ROP_CLEAR = 0,
- ROP_NOR = 1,
- ROP_AND_INVERTED = 2,
- ROP_COPY_INVERTED = 3,
- ROP_AND_REVERSE = 4,
- ROP_INVERT = 5,
- ROP_XOR = 6,
- ROP_NAND = 7,
- ROP_AND = 8,
- ROP_EQUIV = 9,
- ROP_NOOP = 10,
- ROP_OR_INVERTED = 11,
- ROP_COPY = 12,
- ROP_OR_REVERSE = 13,
- ROP_OR = 14,
- ROP_SET = 15,
-};
-
enum a3xx_rb_blend_opcode {
BLEND_DST_PLUS_SRC = 0,
BLEND_SRC_MINUS_DST = 1,
@@ -1620,12 +1612,24 @@ static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
}
#define REG_A3XX_VFD_CONTROL_1 0x00002241
-#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000000f
#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0
static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
{
return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
}
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK 0x000000f0
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT 4
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK;
+}
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK 0x00000f00
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT 8
+static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK;
+}
#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000
#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16
static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 3906c9b996e..b8a31d84b3f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -81,7 +81,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
+ /* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex &&
+ fd3_emit_get_vp(emit)->writes_psize &&
(info->mode == PIPE_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 8f9c8b0623c..24afbc9e956 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -209,13 +209,19 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd3_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
struct fd_resource *rsc = fd_resource(view->base.texture);
- unsigned start = fd_sampler_first_level(&view->base);
- unsigned end = fd_sampler_last_level(&view->base);;
+ if (rsc && rsc->base.b.target == PIPE_BUFFER) {
+ OUT_RELOC(ring, rsc->bo, view->base.u.buf.first_element *
+ util_format_get_blocksize(view->base.format), 0, 0);
+ j = 1;
+ } else {
+ unsigned start = fd_sampler_first_level(&view->base);
+ unsigned end = fd_sampler_last_level(&view->base);;
- for (j = 0; j < (end - start + 1); j++) {
- struct fd_resource_slice *slice =
+ for (j = 0; j < (end - start + 1); j++) {
+ struct fd_resource_slice *slice =
fd_resource_slice(rsc, j + start);
- OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+ OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+ }
}
/* pad the remaining entries w/ null: */
@@ -350,7 +356,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
unsigned instance_regid = regid(63, 0);
unsigned vtxcnt_regid = regid(63, 0);
+ /* Note that sysvals come *after* normal inputs: */
for (i = 0; i < vp->inputs_count; i++) {
+ if (!vp->inputs[i].compmask)
+ continue;
if (vp->inputs[i].sysval) {
switch(vp->inputs[i].slot) {
case SYSTEM_VALUE_BASE_VERTEX:
@@ -369,18 +378,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
unreachable("invalid system value");
break;
}
- } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
+ } else if (i < vtx->vtx->num_elements) {
last = i;
}
}
- /* hw doesn't like to be configured for zero vbo's, it seems: */
- if ((vtx->vtx->num_elements == 0) &&
- (vertex_regid == regid(63, 0)) &&
- (instance_regid == regid(63, 0)) &&
- (vtxcnt_regid == regid(63, 0)))
- return;
-
for (i = 0, j = 0; i <= last; i++) {
assert(!vp->inputs[i].sysval);
if (vp->inputs[i].compmask) {
@@ -424,6 +426,38 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
}
}
+ /* hw doesn't like to be configured for zero vbo's, it seems: */
+ if (last < 0) {
+ /* just recycle the shader bo, we just need to point to *something*
+ * valid:
+ */
+ struct fd_bo *dummy_vbo = vp->bo;
+ bool switchnext = (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2);
+ OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+ COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+ A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) |
+ A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+ OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1);
+ OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+ A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+ A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) |
+ A3XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+ A3XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
+ A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+ A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in = 1;
+ j = 1;
+ }
+
OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
index 857d156c869..52ea9444517 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -188,9 +188,13 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
_T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX),
+ V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ),
V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_UINT, 10_10_10_2_UINT, NONE, WXYZ),
V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ),
V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX),
+ V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ),
_T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
_T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX),
@@ -271,6 +275,16 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
_T(DXT3_SRGBA, DXT3, NONE, WZYX),
_T(DXT5_RGBA, DXT5, NONE, WZYX),
_T(DXT5_SRGBA, DXT5, NONE, WZYX),
+
+ /* faked */
+ _T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+ _T(RGTC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(RGTC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+ _T(LATC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(LATC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+ _T(LATC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(LATC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
};
enum a3xx_vtx_fmt
@@ -310,6 +324,8 @@ fd3_pipe2fetchsize(enum pipe_format format)
{
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
format = PIPE_FORMAT_Z32_FLOAT;
+ else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+ format = PIPE_FORMAT_R8G8B8A8_UNORM;
switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
case 8: return TFETCH_1_BYTE;
case 16: return TFETCH_2_BYTE;
@@ -324,6 +340,14 @@ fd3_pipe2fetchsize(enum pipe_format format)
}
}
+unsigned
+fd3_pipe2nblocksx(enum pipe_format format, unsigned width)
+{
+ if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+ format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ return util_format_get_nblocksx(format, width);
+}
+
/* we need to special case a bit the depth/stencil restore, because we are
* using the texture sampler to blit into the depth/stencil buffer, *not*
* into a color buffer. Otherwise fd3_tex_swiz() will do the wrong thing,
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
index 05c5ea3d247..48c503e9a82 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
@@ -37,6 +37,7 @@ enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format);
enum pipe_format fd3_gmem_restore_format(enum pipe_format format);
enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format);
enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
+unsigned fd3_pipe2nblocksx(enum pipe_format format, unsigned width);
uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index 2d6ecb2c050..99ae99ea0c1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -211,8 +211,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
{
struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl = fd_sampler_first_level(cso);
- unsigned miplevels = fd_sampler_last_level(cso) - lvl;
+ unsigned lvl;
uint32_t sz2 = 0;
if (!so)
@@ -227,20 +226,34 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->texconst0 =
A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
- A3XX_TEX_CONST_0_MIPLVLS(miplevels) |
fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
if (util_format_is_srgb(cso->format))
so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
- so->texconst1 =
+ if (prsc->target == PIPE_BUFFER) {
+ lvl = 0;
+ so->texconst1 =
+ A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
+ A3XX_TEX_CONST_1_WIDTH(cso->u.buf.last_element -
+ cso->u.buf.first_element + 1) |
+ A3XX_TEX_CONST_1_HEIGHT(1);
+ } else {
+ unsigned miplevels;
+
+ lvl = fd_sampler_first_level(cso);
+ miplevels = fd_sampler_last_level(cso) - lvl;
+
+ so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels);
+ so->texconst1 =
A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ }
/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
so->texconst2 =
- A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+ A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
switch (prsc->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 9f970365464..a450379e98d 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -47,11 +47,13 @@ enum a4xx_color_fmt {
RB4_R8_UNORM = 2,
RB4_R4G4B4A4_UNORM = 8,
RB4_R5G5B5A1_UNORM = 10,
- RB4_R5G6R5_UNORM = 14,
+ RB4_R5G6B5_UNORM = 14,
RB4_R8G8_UNORM = 15,
RB4_R8G8_SNORM = 16,
RB4_R8G8_UINT = 17,
RB4_R8G8_SINT = 18,
+ RB4_R16_UNORM = 19,
+ RB4_R16_SNORM = 20,
RB4_R16_FLOAT = 21,
RB4_R16_UINT = 22,
RB4_R16_SINT = 23,
@@ -63,12 +65,16 @@ enum a4xx_color_fmt {
RB4_R10G10B10A2_UNORM = 31,
RB4_R10G10B10A2_UINT = 34,
RB4_R11G11B10_FLOAT = 39,
+ RB4_R16G16_UNORM = 40,
+ RB4_R16G16_SNORM = 41,
RB4_R16G16_FLOAT = 42,
RB4_R16G16_UINT = 43,
RB4_R16G16_SINT = 44,
RB4_R32_FLOAT = 45,
RB4_R32_UINT = 46,
RB4_R32_SINT = 47,
+ RB4_R16G16B16A16_UNORM = 52,
+ RB4_R16G16B16A16_SNORM = 53,
RB4_R16G16B16A16_FLOAT = 54,
RB4_R16G16B16A16_UINT = 55,
RB4_R16G16B16A16_SINT = 56,
@@ -106,6 +112,7 @@ enum a4xx_vtx_fmt {
VFMT4_32_32_FIXED = 10,
VFMT4_32_32_32_FIXED = 11,
VFMT4_32_32_32_32_FIXED = 12,
+ VFMT4_11_11_10_FLOAT = 13,
VFMT4_16_SINT = 16,
VFMT4_16_16_SINT = 17,
VFMT4_16_16_16_SINT = 18,
@@ -146,18 +153,19 @@ enum a4xx_vtx_fmt {
VFMT4_8_8_SNORM = 53,
VFMT4_8_8_8_SNORM = 54,
VFMT4_8_8_8_8_SNORM = 55,
- VFMT4_10_10_10_2_UINT = 60,
- VFMT4_10_10_10_2_UNORM = 61,
- VFMT4_10_10_10_2_SINT = 62,
- VFMT4_10_10_10_2_SNORM = 63,
+ VFMT4_10_10_10_2_UINT = 56,
+ VFMT4_10_10_10_2_UNORM = 57,
+ VFMT4_10_10_10_2_SINT = 58,
+ VFMT4_10_10_10_2_SNORM = 59,
};
enum a4xx_tex_fmt {
TFMT4_5_6_5_UNORM = 11,
- TFMT4_5_5_5_1_UNORM = 10,
+ TFMT4_5_5_5_1_UNORM = 9,
TFMT4_4_4_4_4_UNORM = 8,
TFMT4_X8Z24_UNORM = 71,
TFMT4_10_10_10_2_UNORM = 33,
+ TFMT4_10_10_10_2_UINT = 34,
TFMT4_A8_UNORM = 3,
TFMT4_L8_A8_UNORM = 13,
TFMT4_8_UNORM = 4,
@@ -172,6 +180,12 @@ enum a4xx_tex_fmt {
TFMT4_8_SINT = 7,
TFMT4_8_8_SINT = 17,
TFMT4_8_8_8_8_SINT = 31,
+ TFMT4_16_UNORM = 18,
+ TFMT4_16_16_UNORM = 38,
+ TFMT4_16_16_16_16_UNORM = 51,
+ TFMT4_16_SNORM = 19,
+ TFMT4_16_16_SNORM = 39,
+ TFMT4_16_16_16_16_SNORM = 52,
TFMT4_16_UINT = 21,
TFMT4_16_16_UINT = 41,
TFMT4_16_16_16_16_UINT = 54,
@@ -190,8 +204,21 @@ enum a4xx_tex_fmt {
TFMT4_32_FLOAT = 43,
TFMT4_32_32_FLOAT = 56,
TFMT4_32_32_32_32_FLOAT = 63,
+ TFMT4_32_32_32_FLOAT = 59,
+ TFMT4_32_32_32_UINT = 60,
+ TFMT4_32_32_32_SINT = 61,
TFMT4_9_9_9_E5_FLOAT = 32,
TFMT4_11_11_10_FLOAT = 37,
+ TFMT4_DXT1 = 86,
+ TFMT4_DXT3 = 87,
+ TFMT4_DXT5 = 88,
+ TFMT4_RGTC1_UNORM = 90,
+ TFMT4_RGTC1_SNORM = 91,
+ TFMT4_RGTC2_UNORM = 94,
+ TFMT4_RGTC2_SNORM = 95,
+ TFMT4_BPTC_UFLOAT = 97,
+ TFMT4_BPTC_FLOAT = 98,
+ TFMT4_BPTC = 99,
TFMT4_ATC_RGB = 100,
TFMT4_ATC_RGBA_EXPLICIT = 101,
TFMT4_ATC_RGBA_INTERPOLATED = 102,
@@ -400,8 +427,13 @@ static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4
#define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008
#define A4XX_RB_MRT_CONTROL_BLEND 0x00000010
#define A4XX_RB_MRT_CONTROL_BLEND2 0x00000020
-#define A4XX_RB_MRT_CONTROL_FASTCLEAR 0x00000400
-#define A4XX_RB_MRT_CONTROL_B11 0x00000800
+#define A4XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000040
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8
+static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
+{
+ return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000
#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24
static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
@@ -600,7 +632,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val)
{
return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK;
}
-#define A4XX_RB_FS_OUTPUT_FAST_CLEAR 0x00000100
+#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND 0x00000100
#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000
#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16
static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
@@ -2056,6 +2088,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b
#define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000
+#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000
#define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003
#define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR 0x00000001
@@ -2596,7 +2630,20 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val)
#define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000
#define A4XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000
-#define REG_A4XX_UNKNOWN_21C5 0x000021c5
+#define REG_A4XX_PC_PRIM_VTX_CNTL2 0x000021c5
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK 0x00000007
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT 0
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+ return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK 0x00000038
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT 3
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+ return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE 0x00000040
#define REG_A4XX_PC_RESTART_INDEX 0x000021c6
@@ -2738,6 +2785,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
{
return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
}
+#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000
+#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19
+static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val)
+{
+ return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK;
+}
#define REG_A4XX_TEX_SAMP_1 0x00000001
#define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e
@@ -2746,6 +2799,7 @@ static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val
{
return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
}
+#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010
#define A4XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020
#define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040
#define A4XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00
@@ -2814,7 +2868,7 @@ static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val)
{
return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK;
}
-#define A4XX_TEX_CONST_1_WIDTH__MASK 0x1fff8000
+#define A4XX_TEX_CONST_1_WIDTH__MASK 0x3fff8000
#define A4XX_TEX_CONST_1_WIDTH__SHIFT 15
static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val)
{
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
index d5e823ef69d..f19702280e0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
@@ -27,6 +27,7 @@
*/
#include "pipe/p_state.h"
+#include "util/u_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h"
@@ -59,12 +60,12 @@ fd4_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso)
{
struct fd4_blend_stateobj *so;
-// enum a3xx_rop_code rop = ROP_COPY;
+ enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
unsigned i, mrt_blend = 0;
if (cso->logicop_enable) {
-// rop = cso->logicop_func; /* maps 1:1 */
+ rop = cso->logicop_func; /* maps 1:1 */
switch (cso->logicop_func) {
case PIPE_LOGICOP_NOR:
@@ -98,16 +99,25 @@ fd4_blend_state_create(struct pipe_context *pctx,
else
rt = &cso->rt[0];
- so->rb_mrt[i].blend_control =
+ so->rb_mrt[i].blend_control_rgb =
A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
- A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor));
+
+ so->rb_mrt[i].blend_control_alpha =
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
+ so->rb_mrt[i].blend_control_no_alpha_rgb =
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor)));
+
+
so->rb_mrt[i].control =
- 0xc00 | /* XXX ROP_CODE ?? */
+ A4XX_RB_MRT_CONTROL_ROP_CODE(rop) |
+ COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
if (rt->blend_enable) {
@@ -118,14 +128,17 @@ fd4_blend_state_create(struct pipe_context *pctx,
mrt_blend |= (1 << i);
}
- if (reads_dest)
+ if (reads_dest) {
so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+ mrt_blend |= (1 << i);
+ }
if (cso->dither)
so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
- so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend);
+ so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) |
+ COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND);
return so;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
index 7620d00a625..6230fa7a50e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
@@ -39,7 +39,12 @@ struct fd4_blend_stateobj {
struct {
uint32_t control;
uint32_t buf_info;
- uint32_t blend_control;
+ /* Blend control bits for color if there is an alpha channel */
+ uint32_t blend_control_rgb;
+ /* Blend control bits for color if there is no alpha channel */
+ uint32_t blend_control_no_alpha_rgb;
+ /* Blend control bits for alpha channel */
+ uint32_t blend_control_alpha;
} rb_mrt[A4XX_MAX_RENDER_TARGETS];
uint32_t rb_fs_output;
};
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index 7bd5163529a..8cbe68d5790 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -47,6 +47,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd4_emit *emit)
{
const struct pipe_draw_info *info = emit->info;
+ enum pc_di_primtype primtype = ctx->primtypes[info->mode];
if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit)))
return;
@@ -64,7 +65,14 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
+ /* points + psize -> spritelist: */
+ if (ctx->rasterizer->point_size_per_vertex &&
+ fd4_emit_get_vp(emit)->writes_psize &&
+ (info->mode == PIPE_PRIM_POINTS))
+ primtype = DI_PT_POINTLIST_PSIZE;
+
fd4_draw_emit(ctx, ring,
+ primtype,
emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
info);
}
@@ -263,8 +271,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0;
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
- A4XX_RB_MRT_CONTROL_B11 |
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
index b89a30a7c4b..a6c56404a8a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
@@ -101,12 +101,12 @@ fd4_size2indextype(unsigned index_size)
}
static inline void
fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum pc_di_primtype primtype,
enum pc_di_vis_cull_mode vismode,
const struct pipe_draw_info *info)
{
struct pipe_index_buffer *idx = &ctx->indexbuf;
struct fd_bo *idx_bo = NULL;
- enum pc_di_primtype primtype = ctx->primtypes[info->mode];
enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel;
uint32_t idx_size, idx_offset;
@@ -127,11 +127,6 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
- /* points + psize -> spritelist: */
- if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
- (info->mode == PIPE_PRIM_POINTS))
- primtype = DI_PT_POINTLIST_PSIZE;
-
fd4_draw(ctx, ring, primtype, vismode, src_sel,
info->count, info->instance_count,
idx_type, idx_size, idx_offset, idx_bo);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 26b58718cd8..f220fc7ac1f 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -185,7 +185,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
fd4_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
- unsigned start = fd_sampler_first_level(&view->base);
OUT_RING(ring, view->texconst0);
OUT_RING(ring, view->texconst1);
@@ -193,8 +192,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, view->texconst3);
if (view->base.texture) {
struct fd_resource *rsc = fd_resource(view->base.texture);
- uint32_t offset = fd_resource_offset(rsc, start, 0);
- OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+ OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
} else {
OUT_RING(ring, 0x00000000);
}
@@ -286,7 +284,8 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
- OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
+ OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
+ A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format)));
OUT_RING(ring, 0x00000000);
OUT_RELOC(ring, rsc->bo, offset, 0, 0);
OUT_RING(ring, 0x00000000);
@@ -332,7 +331,10 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
unsigned instance_regid = regid(63, 0);
unsigned vtxcnt_regid = regid(63, 0);
+ /* Note that sysvals come *after* normal inputs: */
for (i = 0; i < vp->inputs_count; i++) {
+ if (!vp->inputs[i].compmask)
+ continue;
if (vp->inputs[i].sysval) {
switch(vp->inputs[i].slot) {
case SYSTEM_VALUE_BASE_VERTEX:
@@ -351,19 +353,11 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
unreachable("invalid system value");
break;
}
- } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
+ } else if (i < vtx->vtx->num_elements) {
last = i;
}
}
-
- /* hw doesn't like to be configured for zero vbo's, it seems: */
- if ((vtx->vtx->num_elements == 0) &&
- (vertex_regid == regid(63, 0)) &&
- (instance_regid == regid(63, 0)) &&
- (vtxcnt_regid == regid(63, 0)))
- return;
-
for (i = 0, j = 0; i <= last; i++) {
assert(!vp->inputs[i].sysval);
if (vp->inputs[i].compmask) {
@@ -408,6 +402,38 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
}
}
+ /* hw doesn't like to be configured for zero vbo's, it seems: */
+ if (last < 0) {
+ /* just recycle the shader bo, we just need to point to *something*
+ * valid:
+ */
+ struct fd_bo *dummy_vbo = vp->bo;
+ bool switchnext = (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+ A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+ COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
+ OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
+ OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
+ A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+ A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
+ A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+ A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
+ A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+ A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in = 1;
+ j = 1;
+ }
+
OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
0xa0000 | /* XXX */
@@ -470,11 +496,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
}
- if (dirty & FD_DIRTY_ZSA) {
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ uint32_t rb_alpha_control = zsa->rb_alpha_control;
+
+ if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
+ rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
- OUT_RING(ring, zsa->rb_alpha_control);
+ OUT_RING(ring, rb_alpha_control);
OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
OUT_RING(ring, zsa->rb_stencil_control);
@@ -535,8 +566,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
*/
if (emit->info) {
const struct pipe_draw_info *info = emit->info;
- uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
- ->pc_prim_vtx_cntl;
+ struct fd4_rasterizer_stateobj *rast =
+ fd4_rasterizer_stateobj(ctx->rasterizer);
+ uint32_t val = rast->pc_prim_vtx_cntl;
if (info->indexed && info->primitive_restart)
val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
@@ -552,7 +584,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
OUT_RING(ring, val);
- OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */
+ OUT_RING(ring, rast->pc_prim_vtx_cntl2);
}
if (dirty & FD_DIRTY_SCISSOR) {
@@ -581,7 +613,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
}
- if (dirty & FD_DIRTY_PROG) {
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
}
@@ -599,11 +631,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
uint32_t i;
for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ enum pipe_format format = pipe_surface_format(
+ ctx->framebuffer.cbufs[i]);
+ bool is_int = util_format_is_pure_integer(format);
+ bool has_alpha = util_format_has_alpha(format);
+ uint32_t control = blend->rb_mrt[i].control;
+ uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
+
+ if (is_int) {
+ control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+ control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
+ }
+
+ if (has_alpha) {
+ blend_control |= blend->rb_mrt[i].blend_control_rgb;
+ } else {
+ blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb;
+ control &= ~A4XX_RB_MRT_CONTROL_BLEND2;
+ }
+
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, blend->rb_mrt[i].control);
+ OUT_RING(ring, control);
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
- OUT_RING(ring, blend->rb_mrt[i].blend_control);
+ OUT_RING(ring, blend_control);
}
OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
@@ -611,19 +662,48 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
}
- if (dirty & FD_DIRTY_BLEND_COLOR) {
+ if (dirty & (FD_DIRTY_BLEND_COLOR | FD_DIRTY_FRAMEBUFFER)) {
struct pipe_blend_color *bcolor = &ctx->blend_color;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ float factor = 65535.0;
+ int i;
+
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
+ const struct util_format_description *desc =
+ util_format_description(format);
+ int j;
+
+ if (desc->is_mixed)
+ continue;
+
+ j = util_format_get_first_non_void_channel(format);
+ if (j == -1)
+ continue;
+
+ if (desc->channel[j].size > 8 || !desc->channel[j].normalized ||
+ desc->channel[j].pure_integer)
+ continue;
+
+ /* Just use the first unorm8/snorm8 render buffer. Can't keep
+ * everyone happy.
+ */
+ if (desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED)
+ factor = 32767.0;
+ break;
+ }
+
OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
- OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * factor) |
A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
- OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * factor) |
A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1]));
- OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * factor) |
A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
- OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * factor) |
A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 847d4fb6d63..c240745cec1 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -99,20 +99,26 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(S8_UINT, 8_UINT, R8_UNORM, WZYX),
/* 16-bit */
- V_(R16_UNORM, 16_UNORM, NONE, WZYX),
- V_(R16_SNORM, 16_SNORM, NONE, WZYX),
- VT(R16_UINT, 16_UINT, R16_UINT, WZYX),
- VT(R16_SINT, 16_SINT, R16_SINT, WZYX),
- V_(R16_USCALED, 16_UINT, NONE, WZYX),
- V_(R16_SSCALED, 16_UINT, NONE, WZYX),
- VT(R16_FLOAT, 16_FLOAT, R16_FLOAT,WZYX),
-
- _T(A16_UINT, 16_UINT, NONE, WZYX),
- _T(A16_SINT, 16_SINT, NONE, WZYX),
- _T(L16_UINT, 16_UINT, NONE, WZYX),
- _T(L16_SINT, 16_SINT, NONE, WZYX),
- _T(I16_UINT, 16_UINT, NONE, WZYX),
- _T(I16_SINT, 16_SINT, NONE, WZYX),
+ VT(R16_UNORM, 16_UNORM, R16_UNORM, WZYX),
+ VT(R16_SNORM, 16_SNORM, R16_SNORM, WZYX),
+ VT(R16_UINT, 16_UINT, R16_UINT, WZYX),
+ VT(R16_SINT, 16_SINT, R16_SINT, WZYX),
+ V_(R16_USCALED, 16_UINT, NONE, WZYX),
+ V_(R16_SSCALED, 16_UINT, NONE, WZYX),
+ VT(R16_FLOAT, 16_FLOAT, R16_FLOAT, WZYX),
+
+ _T(A16_UNORM, 16_UNORM, NONE, WZYX),
+ _T(A16_SNORM, 16_SNORM, NONE, WZYX),
+ _T(A16_UINT, 16_UINT, NONE, WZYX),
+ _T(A16_SINT, 16_SINT, NONE, WZYX),
+ _T(L16_UNORM, 16_UNORM, NONE, WZYX),
+ _T(L16_SNORM, 16_SNORM, NONE, WZYX),
+ _T(L16_UINT, 16_UINT, NONE, WZYX),
+ _T(L16_SINT, 16_SINT, NONE, WZYX),
+ _T(I16_UNORM, 16_UNORM, NONE, WZYX),
+ _T(I16_SNORM, 16_SNORM, NONE, WZYX),
+ _T(I16_UINT, 16_UINT, NONE, WZYX),
+ _T(I16_SINT, 16_SINT, NONE, WZYX),
VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX),
VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX),
@@ -124,6 +130,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(L8A8_UINT, 8_8_UINT, NONE, WZYX),
_T(L8A8_SINT, 8_8_SINT, NONE, WZYX),
+ _T(B5G6R5_UNORM, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ),
_T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
_T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
_T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ),
@@ -151,16 +158,18 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(I32_UINT, 32_UINT, NONE, WZYX),
_T(I32_SINT, 32_SINT, NONE, WZYX),
- V_(R16G16_UNORM, 16_16_UNORM, NONE, WZYX),
- V_(R16G16_SNORM, 16_16_SNORM, NONE, WZYX),
- VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX),
- VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX),
- V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX),
- V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX),
- VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT,WZYX),
+ VT(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX),
+ VT(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX),
+ VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX),
+ VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX),
+ V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX),
+ V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX),
+ VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX),
- _T(L16A16_UINT, 16_16_UINT, NONE, WZYX),
- _T(L16A16_SINT, 16_16_SINT, NONE, WZYX),
+ _T(L16A16_UNORM, 16_16_UNORM, NONE, WZYX),
+ _T(L16A16_SNORM, 16_16_SNORM, NONE, WZYX),
+ _T(L16A16_UINT, 16_16_UINT, NONE, WZYX),
+ _T(L16A16_SINT, 16_16_SINT, NONE, WZYX),
VT(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
_T(R8G8B8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
@@ -191,11 +200,15 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
_T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX),
- V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ),
+ VT(R10G10B10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX),
+ VT(B10G10R10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ),
V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ),
V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX),
+ V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ),
- _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
+ VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
_T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX),
_T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
@@ -213,8 +226,10 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
V_(R16G16B16_FLOAT, 16_16_16_FLOAT, NONE, WZYX),
/* 64-bit */
- V_(R16G16B16A16_UNORM, 16_16_16_16_UNORM, NONE, WZYX),
- V_(R16G16B16A16_SNORM, 16_16_16_16_SNORM, NONE, WZYX),
+ VT(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX),
+ VT(R16G16B16X16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX),
+ VT(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX),
+ VT(R16G16B16X16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX),
VT(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX),
_T(R16G16B16X16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX),
VT(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX),
@@ -235,11 +250,11 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(L32A32_SINT, 32_32_SINT, NONE, WZYX),
/* 96-bit */
- V_(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX),
- V_(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX),
+ VT(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX),
+ VT(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX),
V_(R32G32B32_USCALED, 32_32_32_UINT, NONE, WZYX),
V_(R32G32B32_SSCALED, 32_32_32_SINT, NONE, WZYX),
- V_(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX),
+ VT(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX),
V_(R32G32B32_FIXED, 32_32_32_FIXED, NONE, WZYX),
/* 128-bit */
@@ -252,6 +267,72 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
_T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
V_(R32G32B32A32_FIXED, 32_32_32_32_FIXED, NONE, WZYX),
+
+ /* compressed */
+ _T(ETC1_RGB8, ETC1, NONE, WZYX),
+ _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX),
+ _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX),
+ _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX),
+ _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX),
+ _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX),
+ _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX),
+ _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX),
+ _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX),
+ _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX),
+ _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX),
+
+ _T(DXT1_RGB, DXT1, NONE, WZYX),
+ _T(DXT1_SRGB, DXT1, NONE, WZYX),
+ _T(DXT1_RGBA, DXT1, NONE, WZYX),
+ _T(DXT1_SRGBA, DXT1, NONE, WZYX),
+ _T(DXT3_RGBA, DXT3, NONE, WZYX),
+ _T(DXT3_SRGBA, DXT3, NONE, WZYX),
+ _T(DXT5_RGBA, DXT5, NONE, WZYX),
+ _T(DXT5_SRGBA, DXT5, NONE, WZYX),
+
+ _T(BPTC_RGBA_UNORM, BPTC, NONE, WZYX),
+ _T(BPTC_SRGBA, BPTC, NONE, WZYX),
+ _T(BPTC_RGB_FLOAT, BPTC_FLOAT, NONE, WZYX),
+ _T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX),
+
+ _T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX),
+ _T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX),
+ _T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX),
+ _T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX),
+ _T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX),
+ _T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX),
+ _T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX),
+ _T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX),
+
+ _T(ASTC_4x4, ASTC_4x4, NONE, WZYX),
+ _T(ASTC_5x4, ASTC_5x4, NONE, WZYX),
+ _T(ASTC_5x5, ASTC_5x5, NONE, WZYX),
+ _T(ASTC_6x5, ASTC_6x5, NONE, WZYX),
+ _T(ASTC_6x6, ASTC_6x6, NONE, WZYX),
+ _T(ASTC_8x5, ASTC_8x5, NONE, WZYX),
+ _T(ASTC_8x6, ASTC_8x6, NONE, WZYX),
+ _T(ASTC_8x8, ASTC_8x8, NONE, WZYX),
+ _T(ASTC_10x5, ASTC_10x5, NONE, WZYX),
+ _T(ASTC_10x6, ASTC_10x6, NONE, WZYX),
+ _T(ASTC_10x8, ASTC_10x8, NONE, WZYX),
+ _T(ASTC_10x10, ASTC_10x10, NONE, WZYX),
+ _T(ASTC_12x10, ASTC_12x10, NONE, WZYX),
+ _T(ASTC_12x12, ASTC_12x12, NONE, WZYX),
+
+ _T(ASTC_4x4_SRGB, ASTC_4x4, NONE, WZYX),
+ _T(ASTC_5x4_SRGB, ASTC_5x4, NONE, WZYX),
+ _T(ASTC_5x5_SRGB, ASTC_5x5, NONE, WZYX),
+ _T(ASTC_6x5_SRGB, ASTC_6x5, NONE, WZYX),
+ _T(ASTC_6x6_SRGB, ASTC_6x6, NONE, WZYX),
+ _T(ASTC_8x5_SRGB, ASTC_8x5, NONE, WZYX),
+ _T(ASTC_8x6_SRGB, ASTC_8x6, NONE, WZYX),
+ _T(ASTC_8x8_SRGB, ASTC_8x8, NONE, WZYX),
+ _T(ASTC_10x5_SRGB, ASTC_10x5, NONE, WZYX),
+ _T(ASTC_10x6_SRGB, ASTC_10x6, NONE, WZYX),
+ _T(ASTC_10x8_SRGB, ASTC_10x8, NONE, WZYX),
+ _T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX),
+ _T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX),
+ _T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX),
};
/* convert pipe format to vertex buffer format: */
@@ -295,11 +376,15 @@ fd4_pipe2fetchsize(enum pipe_format format)
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
format = PIPE_FORMAT_Z32_FLOAT;
- switch (util_format_get_blocksizebits(format)) {
+ if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
+ return TFETCH4_16_BYTE;
+
+ switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
case 8: return TFETCH4_1_BYTE;
case 16: return TFETCH4_2_BYTE;
case 32: return TFETCH4_4_BYTE;
case 64: return TFETCH4_8_BYTE;
+ case 96: return TFETCH4_1_BYTE; /* Does this matter? */
case 128: return TFETCH4_16_BYTE;
default:
debug_printf("Unknown block size for format %s: %d\n",
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
index 3f8bbf3a124..221608127b4 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -347,8 +347,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
- A4XX_RB_MRT_CONTROL_B11 |
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index e3d5dabab4c..3df13543148 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -245,13 +245,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
}
- /* adjust regids for alpha output formats. there is no alpha render
- * format, so it's just treated like red
- */
- for (i = 0; i < nr; i++)
- if (util_format_is_alpha(pipe_surface_format(bufs[i])))
- color_regid[i] += 3;
-
/* TODO get these dynamically: */
face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
index dc7e98b149d..7456c63febe 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
@@ -77,6 +77,13 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
so->gras_su_mode_control =
A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
+ so->pc_prim_vtx_cntl2 =
+ A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
+ A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+
+ if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+ cso->fill_back != PIPE_POLYGON_MODE_FILL)
+ so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
@@ -90,5 +97,10 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
if (cso->offset_tri)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+ if (!cso->depth_clip)
+ so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
+ if (cso->clip_halfz)
+ so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
+
return so;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
index 64e81a9983b..b56a04da6a8 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
@@ -42,6 +42,7 @@ struct fd4_rasterizer_stateobj {
uint32_t gras_su_mode_control;
uint32_t gras_cl_clip_cntl;
uint32_t pc_prim_vtx_cntl;
+ uint32_t pc_prim_vtx_cntl2;
};
static inline struct fd4_rasterizer_stateobj *
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
index d8ea414f300..b2a69cca56c 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
@@ -57,6 +57,8 @@ fd4_screen_is_format_supported(struct pipe_screen *pscreen,
}
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ (target == PIPE_BUFFER ||
+ util_format_get_blocksize(format) != 12) &&
(fd4_pipe2tex(format) != ~0)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index dbff5a738fd..0eba75577b0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -124,9 +124,11 @@ fd4_sampler_state_create(struct pipe_context *pctx,
so->texsamp1 =
// COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
+ COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
so->texsamp1 |=
A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
@@ -210,8 +212,8 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
{
struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl = fd_sampler_first_level(cso);
- unsigned miplevels = fd_sampler_last_level(cso) - lvl;
+ unsigned lvl, layers;
+ uint32_t sz2 = 0;
if (!so)
return NULL;
@@ -223,39 +225,65 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->base.context = pctx;
so->texconst0 =
- A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
+ A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) |
A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) |
- A4XX_TEX_CONST_0_MIPLVLS(miplevels) |
fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
if (util_format_is_srgb(cso->format))
so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
- so->texconst1 =
- A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
- A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
- so->texconst2 =
- A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
- A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
+ if (cso->target == PIPE_BUFFER) {
+ unsigned elements = cso->u.buf.last_element -
+ cso->u.buf.first_element + 1;
+ lvl = 0;
+ so->texconst1 =
+ A4XX_TEX_CONST_1_WIDTH(elements) |
+ A4XX_TEX_CONST_1_HEIGHT(1);
+ so->texconst2 =
+ A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
+ A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp);
+ so->offset = cso->u.buf.first_element *
+ util_format_get_blocksize(cso->format);
+ } else {
+ unsigned miplevels;
- switch (prsc->target) {
+ lvl = fd_sampler_first_level(cso);
+ miplevels = fd_sampler_last_level(cso) - lvl;
+ layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
+
+ so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels);
+ so->texconst1 =
+ A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+ A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ so->texconst2 =
+ A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
+ A4XX_TEX_CONST_2_PITCH(
+ util_format_get_nblocksx(
+ cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+ so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
+ }
+
+ switch (cso->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
so->texconst3 =
- A4XX_TEX_CONST_3_DEPTH(prsc->array_size) |
+ A4XX_TEX_CONST_3_DEPTH(layers) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
so->texconst3 =
- A4XX_TEX_CONST_3_DEPTH(prsc->array_size / 6) |
+ A4XX_TEX_CONST_3_DEPTH(layers / 6) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
break;
case PIPE_TEXTURE_3D:
so->texconst3 =
A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
- A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0);
+ A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[lvl].size0);
+ while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0)
+ sz2 = rsc->slices[++lvl].size0;
+ so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(sz2);
break;
default:
so->texconst3 = 0x00000000;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
index 31955770a85..6ca34ade60d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
@@ -51,7 +51,8 @@ fd4_sampler_stateobj(struct pipe_sampler_state *samp)
struct fd4_pipe_sampler_view {
struct pipe_sampler_view base;
- uint32_t texconst0, texconst1, texconst2, texconst3, textconst4;
+ uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
+ uint32_t offset;
};
static inline struct fd4_pipe_sampler_view *
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index ca3d2ac3fca..0e0f0e65e9b 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -119,6 +119,25 @@ enum adreno_rb_copy_control_mode {
RB_COPY_DEPTH_STENCIL = 5,
};
+enum a3xx_rop_code {
+ ROP_CLEAR = 0,
+ ROP_NOR = 1,
+ ROP_AND_INVERTED = 2,
+ ROP_COPY_INVERTED = 3,
+ ROP_AND_REVERSE = 4,
+ ROP_INVERT = 5,
+ ROP_XOR = 6,
+ ROP_NAND = 7,
+ ROP_AND = 8,
+ ROP_EQUIV = 9,
+ ROP_NOOP = 10,
+ ROP_OR_INVERTED = 11,
+ ROP_COPY = 12,
+ ROP_OR_REVERSE = 13,
+ ROP_OR = 14,
+ ROP_SET = 15,
+};
+
enum a3xx_render_mode {
RB_RENDERING_PASS = 0,
RB_TILING_PASS = 1,
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index f095e3061b2..4aabc086607 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 61c4c6d6e24..571c8142bf7 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -359,6 +359,10 @@ struct fd_context {
struct fd_streamout_stateobj streamout;
struct pipe_clip_state ucp;
+ struct pipe_query *cond_query;
+ bool cond_cond; /* inverted rendering condition */
+ uint cond_mode;
+
/* GMEM/tile handling fxns: */
void (*emit_tile_init)(struct fd_context *ctx);
void (*emit_tile_prep)(struct fd_context *ctx, struct fd_tile *tile);
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index 7bf3343f43a..bf803cc77bc 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -88,6 +88,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
return;
}
+ /* TODO: push down the region versions into the tiles */
+ if (!fd_render_condition_check(pctx))
+ return;
+
/* emulate unsupported primitives: */
if (!fd_supported_prim(ctx, info->mode)) {
if (ctx->streamout.num_targets > 0)
@@ -220,6 +224,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
unsigned cleared_buffers;
int i;
+ /* TODO: push down the region versions into the tiles */
+ if (!fd_render_condition_check(pctx))
+ return;
+
/* for bookkeeping about which buffers have been cleared (and thus
* can fully or partially skip mem2gmem) we need to ignore buffers
* that have already had a draw, in case apps do silly things like
diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c
index db2683c9b6f..b87e8250719 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.c
+++ b/src/gallium/drivers/freedreno/freedreno_query.c
@@ -81,6 +81,16 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq,
return q->funcs->get_query_result(fd_context(pctx), q, wait, result);
}
+static void
+fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq,
+ boolean condition, uint mode)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->cond_query = pq;
+ ctx->cond_cond = condition;
+ ctx->cond_mode = mode;
+}
+
static int
fd_get_driver_query_info(struct pipe_screen *pscreen,
unsigned index, struct pipe_driver_query_info *info)
@@ -118,4 +128,5 @@ fd_query_context_init(struct pipe_context *pctx)
pctx->begin_query = fd_begin_query;
pctx->end_query = fd_end_query;
pctx->get_query_result = fd_get_query_result;
+ pctx->render_condition = fd_render_condition;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 98de0969cab..63ca9e30620 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -27,6 +27,7 @@
*/
#include "util/u_format.h"
+#include "util/u_format_rgtc.h"
#include "util/u_format_zs.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
@@ -111,11 +112,19 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
util_range_set_empty(&rsc->valid_buffer_range);
}
-/* Currently this is only used for flushing Z32_S8 texture transfers, but
- * eventually it should handle everything.
- */
+static unsigned
+fd_resource_layer_offset(struct fd_resource *rsc,
+ struct fd_resource_slice *slice,
+ unsigned layer)
+{
+ if (rsc->layer_first)
+ return layer * rsc->layer_size;
+ else
+ return layer * slice->size0;
+}
+
static void
-fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
{
struct fd_resource *rsc = fd_resource(trans->base.resource);
struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
@@ -123,13 +132,12 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
enum pipe_format format = trans->base.resource->format;
float *depth = fd_bo_map(rsc->bo) + slice->offset +
+ fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
(trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
+ fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
(trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
- assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
- format == PIPE_FORMAT_X32_S8X24_UINT);
-
if (format != PIPE_FORMAT_X32_S8X24_UINT)
util_format_z32_float_s8x24_uint_unpack_z_float(
depth, slice->pitch * 4,
@@ -142,6 +150,73 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
box->width, box->height);
}
+static void
+fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
+{
+ struct fd_resource *rsc = fd_resource(trans->base.resource);
+ struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
+ enum pipe_format format = trans->base.resource->format;
+
+ uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
+ fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
+ ((trans->base.box.y + box->y) * slice->pitch +
+ trans->base.box.x + box->x) * rsc->cpp;
+
+ uint8_t *source = trans->staging +
+ util_format_get_nblocksy(format, box->y) * trans->base.stride +
+ util_format_get_stride(format, box->x);
+
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ util_format_rgtc1_unorm_unpack_rgba_8unorm(
+ data, slice->pitch * rsc->cpp,
+ source, trans->base.stride,
+ box->width, box->height);
+ break;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ util_format_rgtc2_unorm_unpack_rgba_8unorm(
+ data, slice->pitch * rsc->cpp,
+ source, trans->base.stride,
+ box->width, box->height);
+ break;
+ default:
+ assert(!"Unexpected format\n");
+ break;
+ }
+}
+
+static void
+fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+{
+ enum pipe_format format = trans->base.resource->format;
+
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_X32_S8X24_UINT:
+ fd_resource_flush_z32s8(trans, box);
+ break;
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ fd_resource_flush_rgtc(trans, box);
+ break;
+ default:
+ assert(!"Unexpected staging transfer type");
+ break;
+ }
+}
+
static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
struct pipe_transfer *ptrans,
const struct pipe_box *box)
@@ -267,20 +342,15 @@ fd_resource_transfer_map(struct pipe_context *pctx,
return NULL;
}
- if (rsc->layer_first) {
- offset = slice->offset +
- box->y / util_format_get_blockheight(format) * ptrans->stride +
- box->x / util_format_get_blockwidth(format) * rsc->cpp +
- box->z * rsc->layer_size;
- } else {
- offset = slice->offset +
- box->y / util_format_get_blockheight(format) * ptrans->stride +
- box->x / util_format_get_blockwidth(format) * rsc->cpp +
- box->z * slice->size0;
- }
+ offset = slice->offset +
+ box->y / util_format_get_blockheight(format) * ptrans->stride +
+ box->x / util_format_get_blockwidth(format) * rsc->cpp +
+ fd_resource_layer_offset(rsc, slice, box->z);
if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
+ assert(trans->base.box.depth == 1);
+
trans->base.stride = trans->base.box.width * rsc->cpp * 2;
trans->staging = malloc(trans->base.stride * trans->base.box.height);
if (!trans->staging)
@@ -298,8 +368,10 @@ fd_resource_transfer_map(struct pipe_context *pctx,
goto fail;
float *depth = (float *)(buf + slice->offset +
+ fd_resource_layer_offset(rsc, slice, box->z) +
box->y * slice->pitch * 4 + box->x * 4);
uint8_t *stencil = sbuf + sslice->offset +
+ fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
box->y * sslice->pitch + box->x;
if (format != PIPE_FORMAT_X32_S8X24_UINT)
@@ -316,6 +388,54 @@ fd_resource_transfer_map(struct pipe_context *pctx,
buf = trans->staging;
offset = 0;
+ } else if (rsc->internal_format != format &&
+ util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ assert(trans->base.box.depth == 1);
+
+ trans->base.stride = util_format_get_stride(
+ format, trans->base.box.width);
+ trans->staging = malloc(
+ util_format_get_2d_size(format, trans->base.stride,
+ trans->base.box.height));
+ if (!trans->staging)
+ goto fail;
+
+ /* if we're not discarding the whole range (or resource), we must copy
+ * the real data in.
+ */
+ if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+ PIPE_TRANSFER_DISCARD_RANGE))) {
+ uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
+ fd_resource_layer_offset(rsc, slice, box->z) +
+ box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
+
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ util_format_rgtc1_unorm_pack_rgba_8unorm(
+ trans->staging, trans->base.stride,
+ rgba8, slice->pitch * rsc->cpp,
+ box->width, box->height);
+ break;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ util_format_rgtc2_unorm_pack_rgba_8unorm(
+ trans->staging, trans->base.stride,
+ rgba8, slice->pitch * rsc->cpp,
+ box->width, box->height);
+ break;
+ default:
+ assert(!"Unexpected format");
+ break;
+ }
+ }
+
+ buf = trans->staging;
+ offset = 0;
}
*pptrans = ptrans;
@@ -361,9 +481,10 @@ static const struct u_resource_vtbl fd_resource_vtbl = {
};
static uint32_t
-setup_slices(struct fd_resource *rsc, uint32_t alignment)
+setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
{
struct pipe_resource *prsc = &rsc->base.b;
+ enum util_format_layout layout = util_format_description(format)->layout;
uint32_t level, size = 0;
uint32_t width = prsc->width0;
uint32_t height = prsc->height0;
@@ -377,9 +498,13 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment)
struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
uint32_t blocks;
- slice->pitch = width = align(width, 32);
+ if (layout == UTIL_FORMAT_LAYOUT_ASTC)
+ slice->pitch = width =
+ util_align_npot(width, 32 * util_format_get_blockwidth(format));
+ else
+ slice->pitch = width = align(width, 32);
slice->offset = size;
- blocks = util_format_get_nblocks(prsc->format, width, height);
+ blocks = util_format_get_nblocks(format, width, height);
/* 1d array and 2d array textures must all have the same layer size
* for each miplevel on a3xx. 3d textures can have different layer
* sizes for high levels, but the hw auto-sizer is buggy (or at least
@@ -430,11 +555,12 @@ fd_resource_create(struct pipe_screen *pscreen,
{
struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
struct pipe_resource *prsc = &rsc->base.b;
- uint32_t size;
+ enum pipe_format format = tmpl->format;
+ uint32_t size, alignment;
DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
"nr_samples=%u, usage=%u, bind=%x, flags=%x",
- tmpl->target, util_format_name(tmpl->format),
+ tmpl->target, util_format_name(format),
tmpl->width0, tmpl->height0, tmpl->depth0,
tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
tmpl->usage, tmpl->bind, tmpl->flags);
@@ -451,13 +577,18 @@ fd_resource_create(struct pipe_screen *pscreen,
util_range_init(&rsc->valid_buffer_range);
rsc->base.vtbl = &fd_resource_vtbl;
- if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
- rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT);
- else
- rsc->cpp = util_format_get_blocksize(tmpl->format);
+
+ if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ format = PIPE_FORMAT_Z32_FLOAT;
+ else if (fd_screen(pscreen)->gpu_id < 400 &&
+ util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+ format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ rsc->internal_format = format;
+ rsc->cpp = util_format_get_blocksize(format);
assert(rsc->cpp);
+ alignment = slice_alignment(pscreen, tmpl);
if (is_a4xx(fd_screen(pscreen))) {
switch (tmpl->target) {
case PIPE_TEXTURE_3D:
@@ -465,11 +596,12 @@ fd_resource_create(struct pipe_screen *pscreen,
break;
default:
rsc->layer_first = true;
+ alignment = 1;
break;
}
}
- size = setup_slices(rsc, slice_alignment(pscreen, tmpl));
+ size = setup_slices(rsc, alignment, format);
if (rsc->layer_first) {
rsc->layer_size = align(size, 4096);
@@ -548,7 +680,7 @@ fail:
return NULL;
}
-static void fd_blitter_pipe_begin(struct fd_context *ctx);
+static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond);
static void fd_blitter_pipe_end(struct fd_context *ctx);
/**
@@ -570,7 +702,7 @@ fd_blitter_pipe_copy_region(struct fd_context *ctx,
if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
return false;
- fd_blitter_pipe_begin(ctx);
+ fd_blitter_pipe_begin(ctx, false);
util_blitter_copy_texture(ctx->blitter,
dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
@@ -612,6 +744,25 @@ fd_resource_copy_region(struct pipe_context *pctx,
src, src_level, src_box);
}
+bool
+fd_render_condition_check(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ if (!ctx->cond_query)
+ return true;
+
+ union pipe_query_result res = { 0 };
+ bool wait =
+ ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
+ ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
+
+ if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
+ return (bool)res.u64 != ctx->cond_cond;
+
+ return true;
+}
+
/**
* Optimal hardware path for blitting pixels.
* Scaling, format conversion, up- and downsampling (resolve) are allowed.
@@ -630,6 +781,9 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
return;
}
+ if (info.render_condition_enable && !fd_render_condition_check(pctx))
+ return;
+
if (util_try_blit_via_copy_region(pctx, &info)) {
return; /* done */
}
@@ -646,13 +800,13 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
return;
}
- fd_blitter_pipe_begin(ctx);
+ fd_blitter_pipe_begin(ctx, info.render_condition_enable);
util_blitter_blit(ctx->blitter, &info);
fd_blitter_pipe_end(ctx);
}
static void
-fd_blitter_pipe_begin(struct fd_context *ctx)
+fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond)
{
util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
@@ -673,6 +827,9 @@ fd_blitter_pipe_begin(struct fd_context *ctx)
(void **)ctx->fragtex.samplers);
util_blitter_save_fragment_sampler_views(ctx->blitter,
ctx->fragtex.num_textures, ctx->fragtex.textures);
+ if (!render_cond)
+ util_blitter_save_render_condition(ctx->blitter,
+ ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT);
}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index 7549becaa1f..9a9b0d08244 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -73,6 +73,7 @@ struct fd_resource {
struct u_resource base;
struct fd_bo *bo;
uint32_t cpp;
+ enum pipe_format internal_format;
bool layer_first; /* see above description */
uint32_t layer_size;
struct fd_resource_slice slices[MAX_MIP_LEVELS];
@@ -135,4 +136,6 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
void fd_resource_screen_init(struct pipe_screen *pscreen);
void fd_resource_context_init(struct pipe_context *pctx);
+bool fd_render_condition_check(struct pipe_context *pctx);
+
#endif /* FREEDRENO_RESOURCE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 56d1834ef9c..5bbe4016a2a 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -160,11 +160,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
- case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_COMPUTE:
return 0;
@@ -176,27 +174,31 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_FAKE_SW_MSAA:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_CLIP_HALFZ:
return is_a3xx(screen) || is_a4xx(screen);
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
- /* ignoring first/last_element.. but I guess that should be
- * easy to add..
- */
+ if (is_a3xx(screen)) return 16;
+ if (is_a4xx(screen)) return 32;
return 0;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- /* I think 32k on a4xx.. and we could possibly emulate more
- * by pretending 2d/rect textures and splitting high bits
- * of index into 2nd dimension..
+ /* We could possibly emulate more by pretending 2d/rect textures and
+ * splitting high bits of index into 2nd dimension..
*/
- return 16383;
-
- case PIPE_CAP_DEPTH_CLIP_DISABLE:
- case PIPE_CAP_CLIP_HALFZ:
- case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
- return is_a3xx(screen);
+ if (is_a3xx(screen)) return 8192;
+ if (is_a4xx(screen)) return 16384;
+ return 0;
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_CUBE_MAP_ARRAY:
+ case PIPE_CAP_START_INSTANCE:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
return is_a4xx(screen);
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -205,7 +207,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_GLSL_FEATURE_LEVEL:
if (glsl120)
return 120;
- return is_ir3(screen) ? 130 : 120;
+ return is_ir3(screen) ? 140 : 120;
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -220,15 +222,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
- case PIPE_CAP_FAKE_SW_MSAA:
- case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
- case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c
index 04e4643b4c9..f5611abaec8 100644
--- a/src/gallium/drivers/freedreno/freedreno_texture.c
+++ b/src/gallium/drivers/freedreno/freedreno_texture.c
@@ -197,33 +197,15 @@ fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
continue;
const struct util_format_channel_description *chan =
- &desc->channel[desc->swizzle[j]];
- int size = chan->size;
-
- /* The Z16 texture format we use seems to look in the
- * 32-bit border color slots
- */
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
- size = 32;
-
- /* Formats like R11G11B10 or RGB9_E5 don't specify
- * per-channel sizes properly.
- */
- if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
- size = 16;
-
- if (chan->pure_integer && size > 16)
- bcolor32[desc->swizzle[j] + 4] =
- sampler->border_color.i[j];
- else if (size > 16)
- bcolor32[desc->swizzle[j]] =
- fui(sampler->border_color.f[j]);
- else if (chan->pure_integer)
- bcolor[desc->swizzle[j] + 8] =
- sampler->border_color.i[j];
- else
+ &desc->channel[desc->swizzle[j]];
+ if (chan->pure_integer) {
+ bcolor32[desc->swizzle[j] + 4] = sampler->border_color.i[j];
+ bcolor[desc->swizzle[j] + 8] = sampler->border_color.i[j];
+ } else {
+ bcolor32[desc->swizzle[j]] = fui(sampler->border_color.f[j]);
bcolor[desc->swizzle[j]] =
- util_float_to_half(sampler->border_color.f[j]);
+ util_float_to_half(sampler->border_color.f[j]);
+ }
}
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 157dc73a3c6..156bb0be247 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1177,6 +1177,33 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu)
dst[0] = ir3_SEL_B32(b, src[1], 0, ir3_b2n(b, src[0]), 0, src[2], 0);
break;
+ case nir_op_bit_count:
+ dst[0] = ir3_CBITS_B(b, src[0], 0);
+ break;
+ case nir_op_ifind_msb: {
+ struct ir3_instruction *cmp;
+ dst[0] = ir3_CLZ_S(b, src[0], 0);
+ cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0);
+ cmp->cat2.condition = IR3_COND_GE;
+ dst[0] = ir3_SEL_B32(b,
+ ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
+ cmp, 0, dst[0], 0);
+ break;
+ }
+ case nir_op_ufind_msb:
+ dst[0] = ir3_CLZ_B(b, src[0], 0);
+ dst[0] = ir3_SEL_B32(b,
+ ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
+ src[0], 0, dst[0], 0);
+ break;
+ case nir_op_find_lsb:
+ dst[0] = ir3_BFREV_B(b, src[0], 0);
+ dst[0] = ir3_CLZ_B(b, dst[0], 0);
+ break;
+ case nir_op_bitfield_reverse:
+ dst[0] = ir3_BFREV_B(b, src[0], 0);
+ break;
+
default:
compile_error(ctx, "Unhandled ALU op: %s\n",
nir_op_infos[alu->op].name);
@@ -1547,10 +1574,10 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
unreachable("bad sampler_dim");
}
- if (tex->is_shadow)
+ if (tex->is_shadow && tex->op != nir_texop_lod)
flags |= IR3_INSTR_S;
- if (tex->is_array)
+ if (tex->is_array && tex->op != nir_texop_lod)
flags |= IR3_INSTR_A;
*flagsp = flags;
@@ -1618,12 +1645,13 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
case nir_texop_txl: opc = OPC_SAML; break;
case nir_texop_txd: opc = OPC_SAMGQ; break;
case nir_texop_txf: opc = OPC_ISAML; break;
+ case nir_texop_lod: opc = OPC_GETLOD; break;
case nir_texop_txf_ms:
case nir_texop_txs:
- case nir_texop_lod:
case nir_texop_tg4:
case nir_texop_query_levels:
case nir_texop_texture_samples:
+ case nir_texop_samples_identical:
compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
return;
}
@@ -1665,10 +1693,10 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
src0[nsrc0++] = create_immed(b, fui(0.5));
}
- if (tex->is_shadow)
+ if (tex->is_shadow && tex->op != nir_texop_lod)
src0[nsrc0++] = compare;
- if (tex->is_array)
+ if (tex->is_array && tex->op != nir_texop_lod)
src0[nsrc0++] = coord[coords];
if (has_proj) {
@@ -1717,7 +1745,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
case nir_type_int:
type = TYPE_S32;
break;
- case nir_type_unsigned:
+ case nir_type_uint:
case nir_type_bool:
type = TYPE_U32;
break;
@@ -1725,12 +1753,26 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
unreachable("bad dest_type");
}
+ if (opc == OPC_GETLOD)
+ type = TYPE_U32;
+
sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW,
flags, tex->sampler_index, tex->sampler_index,
create_collect(b, src0, nsrc0),
create_collect(b, src1, nsrc1));
split_dest(b, dst, sam, 4);
+
+ /* GETLOD returns results in 4.8 fixed point */
+ if (opc == OPC_GETLOD) {
+ struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256));
+
+ compile_assert(ctx, tex->dest_type == nir_type_float);
+ for (i = 0; i < 2; i++) {
+ dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0,
+ factor, 0);
+ }
+ }
}
static void
@@ -1889,6 +1931,8 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
case nir_texop_query_levels:
emit_tex_query_levels(ctx, tex);
break;
+ case nir_texop_samples_identical:
+ unreachable("nir_texop_samples_identical");
default:
emit_tex(ctx, tex);
break;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 7e2c27d9765..5d1cccb0daa 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -166,7 +166,9 @@ struct ir3_shader_variant {
} outputs[16 + 2]; /* +POSITION +PSIZE */
bool writes_pos, writes_psize;
- /* vertices/inputs: */
+ /* attributes (VS) / varyings (FS):
+ * Note that sysval's should come *after* normal inputs.
+ */
unsigned inputs_count;
struct {
uint8_t slot;
@@ -229,7 +231,7 @@ struct ir3_shader {
struct ir3_compiler *compiler;
- struct pipe_context *pctx;
+ struct pipe_context *pctx; /* TODO replace w/ pipe_screen */
const struct tgsi_token *tokens;
struct pipe_stream_output_info stream_output;
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 83f81135590..31a93659647 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -64,6 +64,8 @@ NV50_C_SOURCES := \
nv50/nv50_3ddefs.xml.h \
nv50/nv50_3d.xml.h \
nv50/nv50_blit.h \
+ nv50/nv50_compute.c \
+ nv50/nv50_compute.xml.h \
nv50/nv50_context.c \
nv50/nv50_context.h \
nv50/nv50_defs.xml.h \
@@ -76,6 +78,10 @@ NV50_C_SOURCES := \
nv50/nv50_query.h \
nv50/nv50_query_hw.c \
nv50/nv50_query_hw.h \
+ nv50/nv50_query_hw_metric.c \
+ nv50/nv50_query_hw_metric.h \
+ nv50/nv50_query_hw_sm.c \
+ nv50/nv50_query_hw_sm.h \
nv50/nv50_resource.c \
nv50/nv50_resource.h \
nv50/nv50_screen.c \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 2a13e1086a0..9f84de03a4a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -2357,6 +2357,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
case OP_PFETCH:
emitPFETCH(insn);
break;
+ case OP_AFETCH:
+ emitAFETCH(insn);
+ break;
case OP_EMIT:
case OP_RESTART:
emitOUT(insn);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 7859c8e79bd..41d2cc9167c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1573,10 +1573,28 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
Instruction *st;
if (slot->reg.file == FILE_MEMORY_LOCAL) {
- st = new_Instruction(func, OP_STORE, ty);
- st->setSrc(0, slot);
- st->setSrc(1, lval);
lval->noSpill = 1;
+ if (ty != TYPE_B96) {
+ st = new_Instruction(func, OP_STORE, ty);
+ st->setSrc(0, slot);
+ st->setSrc(1, lval);
+ } else {
+ st = new_Instruction(func, OP_SPLIT, ty);
+ st->setSrc(0, lval);
+ for (int d = 0; d < lval->reg.size / 4; ++d)
+ st->setDef(d, new_LValue(func, FILE_GPR));
+
+ for (int d = lval->reg.size / 4 - 1; d >= 0; --d) {
+ Value *tmp = cloneShallow(func, slot);
+ tmp->reg.size = 4;
+ tmp->reg.data.offset += 4 * d;
+
+ Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32);
+ s->setSrc(0, tmp);
+ s->setSrc(1, st->getDef(d));
+ defi->bb->insertAfter(defi, s);
+ }
+ }
} else {
st = new_Instruction(func, OP_CVT, ty);
st->setDef(0, slot);
@@ -1596,7 +1614,27 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
Instruction *ld;
if (slot->reg.file == FILE_MEMORY_LOCAL) {
lval->noSpill = 1;
- ld = new_Instruction(func, OP_LOAD, ty);
+ if (ty != TYPE_B96) {
+ ld = new_Instruction(func, OP_LOAD, ty);
+ } else {
+ ld = new_Instruction(func, OP_MERGE, ty);
+ for (int d = 0; d < lval->reg.size / 4; ++d) {
+ Value *tmp = cloneShallow(func, slot);
+ LValue *val;
+ tmp->reg.size = 4;
+ tmp->reg.data.offset += 4 * d;
+
+ Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32);
+ l->setDef(0, (val = new_LValue(func, FILE_GPR)));
+ l->setSrc(0, tmp);
+ usei->bb->insertBefore(usei, l);
+ ld->setSrc(d, val);
+ val->noSpill = 1;
+ }
+ ld->setDef(0, lval);
+ usei->bb->insertBefore(usei, ld);
+ return lval;
+ }
} else {
ld = new_Instruction(func, OP_CVT, ty);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 68e69beb08f..1695553d793 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -657,8 +657,8 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
buffer->domain = NOUVEAU_BO_GART;
- } else if (buffer->base.bind &
- (screen->vidmem_bindings & screen->sysmem_bindings)) {
+ } else if (buffer->base.bind == 0 || (buffer->base.bind &
+ (screen->vidmem_bindings & screen->sysmem_bindings))) {
switch (buffer->base.usage) {
case PIPE_USAGE_DEFAULT:
case PIPE_USAGE_IMMUTABLE:
@@ -685,6 +685,10 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
if (buffer->base.bind & screen->sysmem_bindings)
buffer->domain = NOUVEAU_BO_GART;
}
+ /* There can be very special situations where we want non-gpu-mapped
+ * buffers, but never through this interface.
+ */
+ assert(buffer->domain);
ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
if (ret == false)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
new file mode 100644
index 00000000000..6d23fd66945
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2012 Francisco Jerez
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_compute.xml.h"
+
+#include "codegen/nv50_ir_driver.h"
+
+int
+nv50_screen_compute_setup(struct nv50_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_device *dev = screen->base.device;
+ struct nouveau_object *chan = screen->base.channel;
+ struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
+ unsigned obj_class;
+ int i, ret;
+
+ switch (dev->chipset & 0xf0) {
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ obj_class = NV50_COMPUTE_CLASS;
+ break;
+ case 0xa0:
+ switch (dev->chipset) {
+ case 0xa3:
+ case 0xa5:
+ case 0xa8:
+ obj_class = NVA3_COMPUTE_CLASS;
+ break;
+ default:
+ obj_class = NV50_COMPUTE_CLASS;
+ break;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ return -1;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret)
+ return ret;
+
+ BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->handle);
+
+ BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->stack_bo->offset);
+ PUSH_DATA (push, screen->stack_bo->offset);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
+ PUSH_DATA (push, 4);
+
+ BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
+ PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
+ BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
+ PUSH_DATA (push, 0x100);
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
+ PUSH_DATA (push, fifo->vram);
+
+ for (i = 0; i < 15; i++) {
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
+ PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+ }
+
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
+ PUSH_DATA (push, ~0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
+ PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
+ PUSH_DATA (push, 7);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
+ PUSH_DATA (push, 7);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
+ PUSH_DATA (push, 0x54);
+ BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
+ PUSH_DATA (push, fifo->vram);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls_bo->offset + 65536);
+ PUSH_DATA (push, screen->tls_bo->offset + 65536);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
+ PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
+
+ return 0;
+}
+
+static bool
+nv50_compute_validate_program(struct nv50_context *nv50)
+{
+ struct nv50_program *prog = nv50->compprog;
+
+ if (prog->mem)
+ return true;
+
+ if (!prog->translated) {
+ prog->translated = nv50_program_translate(
+ prog, nv50->screen->base.device->chipset, &nv50->base.debug);
+ if (!prog->translated)
+ return false;
+ }
+ if (unlikely(!prog->code_size))
+ return false;
+
+ if (likely(prog->code_size)) {
+ if (nv50_program_upload_code(nv50, prog)) {
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
+ PUSH_DATA (push, 0);
+ return true;
+ }
+ }
+ return false;
+}
+
+static void
+nv50_compute_validate_globals(struct nv50_context *nv50)
+{
+ unsigned i;
+
+ for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource *res = *util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, i);
+ if (res)
+ nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
+ nv04_resource(res), NOUVEAU_BO_RDWR);
+ }
+}
+
+static bool
+nv50_compute_state_validate(struct nv50_context *nv50)
+{
+ if (!nv50_compute_validate_program(nv50))
+ return false;
+
+ if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS)
+ nv50_compute_validate_globals(nv50);
+
+ /* TODO: validate textures, samplers, surfaces */
+
+ nv50_bufctx_fence(nv50->bufctx_cp, false);
+
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf)))
+ return false;
+ if (unlikely(nv50->state.flushed))
+ nv50_bufctx_fence(nv50->bufctx_cp, true);
+
+ return true;
+}
+
+static void
+nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+ unsigned size = align(nv50->compprog->parm_size, 0x4);
+
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ PUSH_DATA (push, (size / 4) << 8);
+
+ if (size) {
+ struct nouveau_mm_allocation *mm;
+ struct nouveau_bo *bo = NULL;
+ unsigned offset;
+
+ mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
+ assert(mm);
+
+ nouveau_bo_map(bo, 0, screen->base.client);
+ memcpy(bo->map + offset, input, size);
+
+ nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ nouveau_pushbuf_bufctx(push, nv50->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
+ nouveau_pushbuf_data(push, bo, offset, size);
+
+ nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
+ nouveau_bo_ref(NULL, &bo);
+ nouveau_bufctx_reset(nv50->bufctx, 0);
+ }
+}
+
+static uint32_t
+nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
+{
+ struct nv50_program *prog = nv50->compprog;
+ const struct nv50_ir_prog_symbol *syms =
+ (const struct nv50_ir_prog_symbol *)prog->cp.syms;
+ unsigned i;
+
+ for (i = 0; i < prog->cp.num_syms; ++i) {
+ if (syms[i].label == label)
+ return prog->code_base + syms[i].offset;
+ }
+ return prog->code_base; /* no symbols or symbol not found */
+}
+
+void
+nv50_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label, const void *input)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2];
+ struct nv50_program *cp = nv50->compprog;
+ bool ret;
+
+ ret = !nv50_compute_state_validate(nv50);
+ if (ret) {
+ NOUVEAU_ERR("Failed to launch grid !\n");
+ return;
+ }
+
+ nv50_compute_upload_input(nv50, input);
+
+ BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
+ PUSH_DATA (push, nv50_compute_find_symbol(nv50, label));
+
+ BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
+ PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
+ BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
+ PUSH_DATA (push, cp->max_gpr);
+
+ /* grid/block setup */
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
+ PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]);
+ PUSH_DATA (push, block_layout[2]);
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
+ PUSH_DATA (push, 1 << 16 | block_size);
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
+ PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]);
+ BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
+ PUSH_DATA (push, 1);
+
+ /* kernel launching */
+ BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+
+ /* bind a compute shader clobbers fragment shader state */
+ nv50->dirty |= NV50_NEW_FRAGPROG;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h
new file mode 100644
index 00000000000..268d11253b6
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h
@@ -0,0 +1,444 @@
+#ifndef NV50_COMPUTE_XML
+#define NV50_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/graph/g80_compute.xml ( 14027 bytes, from 2015-02-14 02:01:36)
+- rnndb/copyright.xml ( 6456 bytes, from 2015-02-14 02:01:36)
+- rnndb/nvchipsets.xml ( 2833 bytes, from 2015-04-28 16:28:33)
+- rnndb/fifo/nv_object.xml ( 15390 bytes, from 2015-04-22 20:36:09)
+- rnndb/g80_defs.xml ( 18210 bytes, from 2015-10-19 20:49:59)
+
+Copyright (C) 2006-2015 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- Ilia Mirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin Kościelnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NV50_COMPUTE_DMA_NOTIFY 0x00000180
+
+#define NV50_COMPUTE_DMA_GLOBAL 0x000001a0
+
+#define NV50_COMPUTE_DMA_QUERY 0x000001a4
+
+#define NV50_COMPUTE_DMA_LOCAL 0x000001b8
+
+#define NV50_COMPUTE_DMA_STACK 0x000001bc
+
+#define NV50_COMPUTE_DMA_CODE_CB 0x000001c0
+
+#define NV50_COMPUTE_DMA_TSC 0x000001c4
+
+#define NV50_COMPUTE_DMA_TIC 0x000001c8
+
+#define NV50_COMPUTE_DMA_TEXTURE 0x000001cc
+
+#define NV50_COMPUTE_UNK0200 0x00000200
+#define NV50_COMPUTE_UNK0200_UNK1__MASK 0x0000ffff
+#define NV50_COMPUTE_UNK0200_UNK1__SHIFT 0
+#define NV50_COMPUTE_UNK0200_UNK2__MASK 0x00ff0000
+#define NV50_COMPUTE_UNK0200_UNK2__SHIFT 16
+
+#define NV50_COMPUTE_UNK0204 0x00000204
+
+#define NV50_COMPUTE_UNK0208 0x00000208
+
+#define NV50_COMPUTE_UNK020C 0x0000020c
+
+#define NV50_COMPUTE_CP_ADDRESS_HIGH 0x00000210
+
+#define NV50_COMPUTE_CP_ADDRESS_LOW 0x00000214
+
+#define NV50_COMPUTE_STACK_ADDRESS_HIGH 0x00000218
+
+#define NV50_COMPUTE_STACK_ADDRESS_LOW 0x0000021c
+
+#define NV50_COMPUTE_STACK_SIZE_LOG 0x00000220
+
+#define NV50_COMPUTE_CALL_LIMIT_LOG 0x00000224
+
+#define NV50_COMPUTE_UNK0228 0x00000228
+#define NV50_COMPUTE_UNK0228_UNK0 0x00000001
+#define NV50_COMPUTE_UNK0228_UNK4__MASK 0x00000ff0
+#define NV50_COMPUTE_UNK0228_UNK4__SHIFT 4
+#define NV50_COMPUTE_UNK0228_UNK12__MASK 0x000ff000
+#define NV50_COMPUTE_UNK0228_UNK12__SHIFT 12
+
+#define NV50_COMPUTE_TSC_ADDRESS_HIGH 0x0000022c
+
+#define NV50_COMPUTE_TSC_ADDRESS_LOW 0x00000230
+#define NV50_COMPUTE_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NV50_COMPUTE_TSC_LIMIT 0x00000234
+#define NV50_COMPUTE_TSC_LIMIT__MAX 0x00001fff
+
+#define NV50_COMPUTE_CB_ADDR 0x00000238
+#define NV50_COMPUTE_CB_ADDR_ID__MASK 0x003fff00
+#define NV50_COMPUTE_CB_ADDR_ID__SHIFT 8
+#define NV50_COMPUTE_CB_ADDR_BUFFER__MASK 0x0000007f
+#define NV50_COMPUTE_CB_ADDR_BUFFER__SHIFT 0
+
+#define NV50_COMPUTE_CB_DATA(i0) (0x0000023c + 0x4*(i0))
+#define NV50_COMPUTE_CB_DATA__ESIZE 0x00000004
+#define NV50_COMPUTE_CB_DATA__LEN 0x00000010
+
+#define NV50_COMPUTE_TSC_FLUSH 0x0000027c
+#define NV50_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
+#define NV50_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_COMPUTE_TIC_FLUSH 0x00000280
+#define NV50_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
+#define NV50_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_COMPUTE_DELAY1 0x00000284
+
+#define NV50_COMPUTE_WATCHDOG_TIMER 0x00000288
+
+#define NV50_COMPUTE_DELAY2 0x0000028c
+
+#define NV50_COMPUTE_UNK0290 0x00000290
+
+#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH 0x00000294
+
+#define NV50_COMPUTE_LOCAL_ADDRESS_LOW 0x00000298
+#define NV50_COMPUTE_LOCAL_ADDRESS_LOW__ALIGN 0x00000100
+
+#define NV50_COMPUTE_LOCAL_SIZE_LOG 0x0000029c
+
+#define NV50_COMPUTE_UNK02A0 0x000002a0
+
+#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH 0x000002a4
+
+#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW 0x000002a8
+
+#define NV50_COMPUTE_CB_DEF_SET 0x000002ac
+#define NV50_COMPUTE_CB_DEF_SET_SIZE__MASK 0x0000ffff
+#define NV50_COMPUTE_CB_DEF_SET_SIZE__SHIFT 0
+#define NV50_COMPUTE_CB_DEF_SET_BUFFER__MASK 0x007f0000
+#define NV50_COMPUTE_CB_DEF_SET_BUFFER__SHIFT 16
+
+#define NV50_COMPUTE_UNK02B0 0x000002b0
+
+#define NV50_COMPUTE_BLOCK_ALLOC 0x000002b4
+#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__MASK 0x0000ffff
+#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__SHIFT 0
+#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__MASK 0x00ff0000
+#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__SHIFT 16
+
+#define NV50_COMPUTE_LANES32_ENABLE 0x000002b8
+
+#define NV50_COMPUTE_UNK02BC 0x000002bc
+#define NV50_COMPUTE_UNK02BC_UNK1__MASK 0x00000007
+#define NV50_COMPUTE_UNK02BC_UNK1__SHIFT 0
+#define NV50_COMPUTE_UNK02BC_UNK2__MASK 0x00000070
+#define NV50_COMPUTE_UNK02BC_UNK2__SHIFT 4
+
+#define NV50_COMPUTE_CP_REG_ALLOC_TEMP 0x000002c0
+
+#define NV50_COMPUTE_TIC_ADDRESS_HIGH 0x000002c4
+
+#define NV50_COMPUTE_TIC_ADDRESS_LOW 0x000002c8
+
+#define NV50_COMPUTE_TIC_LIMIT 0x000002cc
+
+#define NV50_COMPUTE_MP_PM_SET(i0) (0x000002d0 + 0x4*(i0))
+#define NV50_COMPUTE_MP_PM_SET__ESIZE 0x00000004
+#define NV50_COMPUTE_MP_PM_SET__LEN 0x00000004
+
+#define NV50_COMPUTE_MP_PM_CONTROL(i0) (0x000002e0 + 0x4*(i0))
+#define NV50_COMPUTE_MP_PM_CONTROL__ESIZE 0x00000004
+#define NV50_COMPUTE_MP_PM_CONTROL__LEN 0x00000004
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE__MASK 0x00000001
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE__SHIFT 0
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP 0x00000000
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP_PULSE 0x00000001
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__MASK 0x00000070
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__SHIFT 4
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK0 0x00000000
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK1 0x00000010
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK2 0x00000020
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK3 0x00000030
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK4 0x00000040
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK5 0x00000050
+#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__MASK 0x00ffff00
+#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__SHIFT 8
+#define NV50_COMPUTE_MP_PM_CONTROL_SIG__MASK 0xff000000
+#define NV50_COMPUTE_MP_PM_CONTROL_SIG__SHIFT 24
+
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE 0x000002f0
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_0 0x00000001
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_1 0x00000002
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_2 0x00000004
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_3 0x00000008
+
+#define NV50_COMPUTE_UNK02F4 0x000002f4
+
+#define NV50_COMPUTE_BLOCKDIM_LATCH 0x000002f8
+
+#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC 0x000002fc
+
+#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP 0x00000300
+
+#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC 0x00000304
+
+#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP 0x00000308
+
+#define NV50_COMPUTE_UNK030C 0x0000030c
+
+#define NV50_COMPUTE_QUERY_ADDRESS_HIGH 0x00000310
+
+#define NV50_COMPUTE_QUERY_ADDRESS_LOW 0x00000314
+
+#define NV50_COMPUTE_QUERY_SEQUENCE 0x00000318
+
+#define NV50_COMPUTE_QUERY_GET 0x0000031c
+#define NV50_COMPUTE_QUERY_GET_INTR 0x00000200
+#define NV50_COMPUTE_QUERY_GET_SHORT 0x00008000
+
+#define NV50_COMPUTE_COND_ADDRESS_HIGH 0x00000320
+
+#define NV50_COMPUTE_COND_ADDRESS_LOW 0x00000324
+
+#define NV50_COMPUTE_COND_MODE 0x00000328
+#define NV50_COMPUTE_COND_MODE_NEVER 0x00000000
+#define NV50_COMPUTE_COND_MODE_ALWAYS 0x00000001
+#define NV50_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
+#define NV50_COMPUTE_COND_MODE_EQUAL 0x00000003
+#define NV50_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NV50_COMPUTE_UNK032C 0x0000032c
+
+#define NV50_COMPUTE_UNK0330 0x00000330
+
+#define NV50_COMPUTE_UNK0334(i0) (0x00000334 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0334__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0334__LEN 0x00000003
+
+#define NV50_COMPUTE_UNK0340(i0) (0x00000340 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0340__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0340__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0348(i0) (0x00000348 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0348__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0348__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0350(i0) (0x00000350 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0350__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0350__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0358 0x00000358
+
+#define NV50_COMPUTE_UNK035C 0x0000035c
+
+#define NV50_COMPUTE_UNK0360 0x00000360
+#define NV50_COMPUTE_UNK0360_UNK0__MASK 0x000000f0
+#define NV50_COMPUTE_UNK0360_UNK0__SHIFT 4
+#define NV50_COMPUTE_UNK0360_UNK1__MASK 0x00000f00
+#define NV50_COMPUTE_UNK0360_UNK1__SHIFT 8
+
+#define NV50_COMPUTE_UNK0364 0x00000364
+
+#define NV50_COMPUTE_LAUNCH 0x00000368
+
+#define NV50_COMPUTE_UNK036C 0x0000036c
+
+#define NV50_COMPUTE_UNK0370 0x00000370
+
+#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374
+#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__MASK 0x000000ff
+#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__SHIFT 0
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MASK 0x0000ff00
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__SHIFT 8
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MAX 0x00000040
+
+#define NV50_COMPUTE_LINKED_TSC 0x00000378
+
+#define NV50_COMPUTE_UNK037C 0x0000037c
+#define NV50_COMPUTE_UNK037C_ALWAYS_DERIV 0x00000001
+#define NV50_COMPUTE_UNK037C_UNK16 0x00010000
+
+#define NV50_COMPUTE_CODE_CB_FLUSH 0x00000380
+
+#define NV50_COMPUTE_UNK0384 0x00000384
+
+#define NV50_COMPUTE_GRIDID 0x00000388
+
+#define NV50_COMPUTE_UNK038C(i0) (0x0000038c + 0x4*(i0))
+#define NV50_COMPUTE_UNK038C__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK038C__LEN 0x00000003
+
+#define NV50_COMPUTE_WRCACHE_FLUSH 0x00000398
+
+#define NV50_COMPUTE_UNK039C(i0) (0x0000039c + 0x4*(i0))
+#define NV50_COMPUTE_UNK039C__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK039C__LEN 0x00000002
+
+#define NV50_COMPUTE_GRIDDIM 0x000003a4
+#define NV50_COMPUTE_GRIDDIM_X__MASK 0x0000ffff
+#define NV50_COMPUTE_GRIDDIM_X__SHIFT 0
+#define NV50_COMPUTE_GRIDDIM_Y__MASK 0xffff0000
+#define NV50_COMPUTE_GRIDDIM_Y__SHIFT 16
+
+#define NV50_COMPUTE_SHARED_SIZE 0x000003a8
+#define NV50_COMPUTE_SHARED_SIZE__MAX 0x00004000
+#define NV50_COMPUTE_SHARED_SIZE__ALIGN 0x00000040
+
+#define NV50_COMPUTE_BLOCKDIM_XY 0x000003ac
+#define NV50_COMPUTE_BLOCKDIM_XY_X__MASK 0x0000ffff
+#define NV50_COMPUTE_BLOCKDIM_XY_X__SHIFT 0
+#define NV50_COMPUTE_BLOCKDIM_XY_Y__MASK 0xffff0000
+#define NV50_COMPUTE_BLOCKDIM_XY_Y__SHIFT 16
+
+#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0
+#define NV50_COMPUTE_BLOCKDIM_Z__MIN 0x00000001
+#define NV50_COMPUTE_BLOCKDIM_Z__MAX 0x00000040
+
+#define NV50_COMPUTE_CP_START_ID 0x000003b4
+
+#define NV50_COMPUTE_REG_MODE 0x000003b8
+#define NV50_COMPUTE_REG_MODE_PACKED 0x00000001
+#define NV50_COMPUTE_REG_MODE_STRIPED 0x00000002
+
+#define NV50_COMPUTE_TEX_LIMITS 0x000003bc
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007
+
+#define NV50_COMPUTE_BIND_TSC 0x000003c0
+#define NV50_COMPUTE_BIND_TSC_VALID 0x00000001
+#define NV50_COMPUTE_BIND_TSC_SAMPLER__MASK 0x000000f0
+#define NV50_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4
+#define NV50_COMPUTE_BIND_TSC_TSC__MASK 0x001ff000
+#define NV50_COMPUTE_BIND_TSC_TSC__SHIFT 12
+
+#define NV50_COMPUTE_BIND_TIC 0x000003c4
+#define NV50_COMPUTE_BIND_TIC_VALID 0x00000001
+#define NV50_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NV50_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1
+#define NV50_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NV50_COMPUTE_BIND_TIC_TIC__SHIFT 9
+
+#define NV50_COMPUTE_SET_PROGRAM_CB 0x000003c8
+#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__MASK 0x00000f00
+#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__SHIFT 8
+#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000
+#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__SHIFT 12
+#define NV50_COMPUTE_SET_PROGRAM_CB_VALID 0x000000ff
+
+#define NV50_COMPUTE_UNK03CC 0x000003cc
+
+#define NV50_COMPUTE_TEX_CACHE_CTL 0x000003d0
+#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NV50_COMPUTE_UNK03D4 0x000003d4
+
+#define NV50_COMPUTE_UNK03D8 0x000003d8
+
+#define NV50_COMPUTE_UNK03DC 0x000003dc
+
+#define NV50_COMPUTE_UNK03E0 0x000003e0
+
+#define NV50_COMPUTE_UNK03E4 0x000003e4
+
+#define NVA3_COMPUTE_TEX_MISC 0x000003e8
+#define NVA3_COMPUTE_TEX_MISC_UNK1 0x00000001
+#define NVA3_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002
+
+#define NV50_COMPUTE_GLOBAL(i0) (0x00000400 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL__ESIZE 0x00000020
+#define NV50_COMPUTE_GLOBAL__LEN 0x00000010
+
+#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(i0) (0x00000400 + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(i0) (0x00000404 + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_PITCH(i0) (0x00000408 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL_PITCH__MAX 0x00800000
+#define NV50_COMPUTE_GLOBAL_PITCH__ALIGN 0x00000100
+
+#define NV50_COMPUTE_GLOBAL_LIMIT(i0) (0x0000040c + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_MODE(i0) (0x00000410 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL_MODE_LINEAR 0x00000001
+#define NV50_COMPUTE_GLOBAL_MODE_UNK1__MASK 0x000000f0
+#define NV50_COMPUTE_GLOBAL_MODE_UNK1__SHIFT 4
+#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__MASK 0x00000f00
+#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__SHIFT 8
+
+#define NV50_COMPUTE_USER_PARAM(i0) (0x00000600 + 0x4*(i0))
+#define NV50_COMPUTE_USER_PARAM__ESIZE 0x00000004
+#define NV50_COMPUTE_USER_PARAM__LEN 0x00000040
+
+#define NV50_COMPUTE_UNK0700(i0) (0x00000700 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0700__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0700__LEN 0x00000010
+
+
+#endif /* NV50_COMPUTE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 7867c2df7f3..4874b77b1e1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -113,6 +113,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
nouveau_bufctx_del(&nv50->bufctx_3d);
nouveau_bufctx_del(&nv50->bufctx);
+ nouveau_bufctx_del(&nv50->bufctx_cp);
util_unreference_framebuffer_state(&nv50->framebuffer);
@@ -131,6 +132,14 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
if (!nv50->constbuf[s][i].user)
pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL);
}
+
+ for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource **res = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, i);
+ pipe_resource_reference(res, NULL);
+ }
+ util_dynarray_fini(&nv50->global_residents);
}
static void
@@ -159,9 +168,10 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nv50_context *nv50 = nv50_context(&ctx->pipe);
+ unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ if (bind & PIPE_BIND_RENDER_TARGET) {
assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
if (nv50->framebuffer.cbufs[i] &&
@@ -173,7 +183,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
- if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (nv50->framebuffer.zsbuf &&
nv50->framebuffer.zsbuf->texture == res) {
nv50->dirty |= NV50_NEW_FRAMEBUFFER;
@@ -183,11 +193,11 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
- if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (bind & (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_SAMPLER_VIEW)) {
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
for (i = 0; i < nv50->num_vtxbufs; ++i) {
@@ -263,10 +273,13 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
nv50->base.pushbuf = screen->base.pushbuf;
nv50->base.client = screen->base.client;
- ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT,
- &nv50->bufctx_3d);
+ ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_3D_COUNT,
+ &nv50->bufctx_3d);
if (!ret)
- ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+ ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_CP_COUNT,
+ &nv50->bufctx_cp);
if (ret)
goto out_err;
@@ -290,6 +303,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
pipe->draw_vbo = nv50_draw_vbo;
pipe->clear = nv50_clear;
+ pipe->launch_grid = nv50_launch_grid;
pipe->flush = nv50_flush;
pipe->texture_barrier = nv50_texture_barrier;
@@ -335,19 +349,30 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms);
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc);
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo);
+ if (screen->compute) {
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo);
+ }
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo);
BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
+ if (screen->compute)
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
nv50->base.scratch.bo_size = 2 << 20;
+ util_dynarray_init(&nv50->global_residents);
+
return pipe;
out_err:
if (nv50->bufctx_3d)
nouveau_bufctx_del(&nv50->bufctx_3d);
+ if (nv50->bufctx_cp)
+ nouveau_bufctx_del(&nv50->bufctx_cp);
if (nv50->bufctx)
nouveau_bufctx_del(&nv50->bufctx);
FREE(nv50->blit);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index fb74a9748a3..2cebcd99423 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -49,6 +49,10 @@
#define NV50_NEW_MIN_SAMPLES (1 << 22)
#define NV50_NEW_CONTEXT (1 << 31)
+#define NV50_NEW_CP_PROGRAM (1 << 0)
+#define NV50_NEW_CP_GLOBALS (1 << 1)
+
+/* 3d bufctx (during draw_vbo, blit_3d) */
#define NV50_BIND_FB 0
#define NV50_BIND_VERTEX 1
#define NV50_BIND_VERTEX_TMP 2
@@ -58,7 +62,15 @@
#define NV50_BIND_SO 53
#define NV50_BIND_SCREEN 54
#define NV50_BIND_TLS 55
-#define NV50_BIND_COUNT 56
+#define NV50_BIND_3D_COUNT 56
+
+/* compute bufctx (during launch_grid) */
+#define NV50_BIND_CP_GLOBAL 0
+#define NV50_BIND_CP_SCREEN 1
+#define NV50_BIND_CP_QUERY 2
+#define NV50_BIND_CP_COUNT 3
+
+/* bufctx for other operations */
#define NV50_BIND_2D 0
#define NV50_BIND_M2MF 0
#define NV50_BIND_FENCE 1
@@ -101,8 +113,10 @@ struct nv50_context {
struct nouveau_bufctx *bufctx_3d;
struct nouveau_bufctx *bufctx;
+ struct nouveau_bufctx *bufctx_cp;
uint32_t dirty;
+ uint32_t dirty_cp; /* dirty flags for compute state */
bool cb_dirty;
struct nv50_graph_state state;
@@ -115,6 +129,7 @@ struct nv50_context {
struct nv50_program *vertprog;
struct nv50_program *gmtyprog;
struct nv50_program *fragprog;
+ struct nv50_program *compprog;
struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[3];
@@ -163,6 +178,8 @@ struct nv50_context {
uint32_t cond_condmode; /* the calculated condition */
struct nv50_blitctx *blit;
+
+ struct util_dynarray global_residents;
};
static inline struct nv50_context *
@@ -302,4 +319,9 @@ struct pipe_video_buffer *
nv98_video_buffer_create(struct pipe_context *pipe,
const struct pipe_video_buffer *template);
+/* nv50_compute.c */
+void
+nv50_launch_grid(struct pipe_context *, const uint *, const uint *,
+ uint32_t, const void *);
+
#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 89e7a338283..a4b8ddfda95 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -66,7 +66,6 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
case TGSI_SEMANTIC_VERTEXID:
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
- prog->vp.vertexid = 1;
continue;
default:
break;
@@ -259,6 +258,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
return nv50_vertprog_assign_slots(info);
case PIPE_SHADER_FRAGMENT:
return nv50_fragprog_assign_slots(info);
+ case PIPE_SHADER_COMPUTE:
+ return 0;
default:
return -1;
}
@@ -355,6 +356,9 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
prog->gp.has_layer = 0;
prog->gp.has_viewport = 0;
+ if (prog->type == PIPE_SHADER_COMPUTE)
+ info->prop.cp.inputOffset = 0x10;
+
info->driverPriv = prog;
#ifdef DEBUG
@@ -378,6 +382,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
prog->tls_space = info->bin.tlsSpace;
+ prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
+
if (prog->type == PIPE_SHADER_FRAGMENT) {
if (info->prop.fp.writesDepth) {
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
@@ -401,6 +407,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
break;
}
prog->gp.vert_count = info->prop.gp.maxVertices;
+ } else
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ prog->cp.syms = info->bin.syms;
+ prog->cp.num_syms = info->bin.numSyms;
}
if (prog->pipe.stream_output.num_outputs)
@@ -423,11 +433,13 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
struct nouveau_heap *heap;
int ret;
uint32_t size = align(prog->code_size, 0x40);
+ uint8_t prog_type;
switch (prog->type) {
case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
+ case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break;
default:
assert(!"invalid program type");
return false;
@@ -450,7 +462,14 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
return false;
}
}
- prog->code_base = prog->mem->start;
+
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ /* CP code must be uploaded in FP code segment. */
+ prog_type = 1;
+ } else {
+ prog->code_base = prog->mem->start;
+ prog_type = prog->type;
+ }
ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
if (ret < 0) {
@@ -468,7 +487,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
false /* flatshade */);
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
- (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
+ (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
@@ -489,7 +508,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
FREE(p->code);
FREE(p->fixups);
-
+ FREE(p->interps);
FREE(p->so);
memset(p, 0, sizeof(*p));
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 7a33eb11d6d..1de5122a56e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -76,9 +76,9 @@ struct nv50_program {
ubyte psiz; /* output slot of point size */
ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */
ubyte edgeflag;
- ubyte vertexid;
ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */
ubyte clpd_nr;
+ bool need_vertex_id;
} vp;
struct {
@@ -98,6 +98,13 @@ struct nv50_program {
ubyte viewportid; /* hw value of viewport index output */
} gp;
+ struct {
+ uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
+ uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
+ void *syms;
+ unsigned num_syms;
+ } cp;
+
void *fixups; /* relocation records */
void *interps; /* interpolation records */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c
index f31eaa0e314..cbef95d07f6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_push.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c
@@ -24,6 +24,10 @@ struct push_context {
struct translate *translate;
bool primitive_restart;
+
+ bool need_vertex_id;
+ int32_t index_bias;
+
uint32_t prim;
uint32_t restart_index;
uint32_t instance_id;
@@ -74,6 +78,11 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
size = ctx->vertex_words * nr;
+ if (unlikely(ctx->need_vertex_id)) {
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -107,6 +116,11 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
size = ctx->vertex_words * nr;
+ if (unlikely(ctx->need_vertex_id)) {
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -140,6 +154,11 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
size = ctx->vertex_words * nr;
+ if (unlikely(ctx->need_vertex_id)) {
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -161,10 +180,18 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
static void
emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
{
+ uint32_t elts = 0;
+
while (count) {
unsigned push = MIN2(count, ctx->packet_vertex_limit);
unsigned size = ctx->vertex_words * push;
+ if (unlikely(ctx->need_vertex_id)) {
+ /* For non-indexed draws, gl_VertexID goes up after each vertex. */
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, elts++);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id,
@@ -216,7 +243,14 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
ctx.push = nv50->base.pushbuf;
ctx.translate = nv50->vertex->translate;
- ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit;
+
+ ctx.need_vertex_id = nv50->screen->base.class_3d >= NV84_3D_CLASS &&
+ nv50->vertprog->vp.need_vertex_id && (nv50->vertex->num_elements < 32);
+ ctx.index_bias = info->index_bias;
+
+ /* For indexed draws, gl_VertexID must be emitted for every vertex. */
+ ctx.packet_vertex_limit =
+ ctx.need_vertex_id ? 1 : nv50->vertex->packet_vertex_limit;
ctx.vertex_words = nv50->vertex->vertex_size;
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
@@ -307,4 +341,10 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
ctx.instance_id++;
ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
+
+ if (unlikely(ctx.need_vertex_id)) {
+ /* Reset gl_VertexID to prevent future indexed draws to be confused. */
+ BEGIN_NV04(ctx.push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx.push, nv50->state.index_bias);
+ }
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index dd9b85b7208..4cd3b615606 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,8 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_query.h"
#include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
static struct pipe_query *
nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
@@ -152,4 +154,79 @@ nv50_init_query_functions(struct nv50_context *nv50)
pipe->end_query = nv50_end_query;
pipe->get_query_result = nv50_get_query_result;
pipe->render_condition = nv50_render_condition;
+ nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS;
+}
+
+int
+nv50_screen_get_driver_query_info(struct pipe_screen *pscreen,
+ unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ int num_hw_queries = 0;
+
+ num_hw_queries = nv50_hw_get_driver_query_info(screen, 0, NULL);
+
+ if (!info)
+ return num_hw_queries;
+
+ /* Init default values. */
+ info->name = "this_is_not_the_query_you_are_looking_for";
+ info->query_type = 0xdeadd01d;
+ info->max_value.u64 = 0;
+ info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+ info->group_id = -1;
+ info->flags = 0;
+
+ return nv50_hw_get_driver_query_info(screen, id, info);
+}
+
+int
+nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
+ unsigned id,
+ struct pipe_driver_query_group_info *info)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ int count = 0;
+
+ if (screen->compute)
+ if (screen->base.class_3d >= NV84_3D_CLASS)
+ count += 2;
+
+ if (!info)
+ return count;
+
+ if (id == NV50_HW_SM_QUERY_GROUP) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = "MP counters";
+
+ /* Because we can't expose the number of hardware counters needed
+ * for each different query, we don't want to allow more than one
+ * active query simultaneously to avoid failure when the maximum
+ * number of counters is reached. Note that these groups of GPU
+ * counters are currently only used by AMD_performance_monitor.
+ */
+ info->max_active_queries = 1;
+ info->num_queries = NV50_HW_SM_QUERY_COUNT;
+ return 1;
+ }
+ }
+ } else
+ if (id == NV50_HW_METRIC_QUERY_GROUP) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = "Performance metrics";
+ info->max_active_queries = 1;
+ info->num_queries = NV50_HW_METRIC_QUERY_COUNT;
+ return 1;
+ }
+ }
+ }
+
+ /* user asked for info about non-existing query group */
+ info->name = "this_is_not_the_query_group_you_are_looking_for";
+ info->max_active_queries = 0;
+ info->num_queries = 0;
+ return 0;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h
index d990285c857..bd4c0a386f6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h
@@ -28,6 +28,12 @@ nv50_query(struct pipe_query *pipe)
return (struct nv50_query *)pipe;
}
+/*
+ * Driver queries groups:
+ */
+#define NV50_HW_SM_QUERY_GROUP 0
+#define NV50_HW_METRIC_QUERY_GROUP 1
+
void nv50_init_query_functions(struct nv50_context *);
#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index 945ce7abe50..b6ebbbf1010 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -25,6 +25,8 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
#include "nv_object.xml.h"
#define NV50_HW_QUERY_STATE_READY 0
@@ -41,7 +43,7 @@
#define NV50_HW_QUERY_ALLOC_SPACE 256
-static bool
+bool
nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
int size)
{
@@ -122,6 +124,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_query *hq = nv50_hw_query(q);
+ if (hq->funcs && hq->funcs->begin_query)
+ return hq->funcs->begin_query(nv50, hq);
+
/* For occlusion queries we have to change the storage, because a previous
* query might set the initial render condition to false even *after* we re-
* initialized it to true.
@@ -193,6 +198,11 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_query *hq = nv50_hw_query(q);
+ if (hq->funcs && hq->funcs->end_query) {
+ hq->funcs->end_query(nv50, hq);
+ return;
+ }
+
hq->state = NV50_HW_QUERY_STATE_ENDED;
switch (q->type) {
@@ -261,6 +271,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
uint64_t *data64 = (uint64_t *)hq->data;
int i;
+ if (hq->funcs && hq->funcs->get_query_result)
+ return hq->funcs->get_query_result(nv50, hq, wait, result);
+
if (hq->state != NV50_HW_QUERY_STATE_READY)
nv50_hw_query_update(q);
@@ -331,6 +344,18 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
struct nv50_hw_query *hq;
struct nv50_query *q;
+ hq = nv50_hw_sm_create_query(nv50, type);
+ if (hq) {
+ hq->base.funcs = &hw_query_funcs;
+ return (struct nv50_query *)hq;
+ }
+
+ hq = nv50_hw_metric_create_query(nv50, type);
+ if (hq) {
+ hq->base.funcs = &hw_query_funcs;
+ return (struct nv50_query *)hq;
+ }
+
hq = CALLOC_STRUCT(nv50_hw_query);
if (!hq)
return NULL;
@@ -375,6 +400,26 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
return q;
}
+int
+nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
+
+ num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL);
+ num_hw_metric_queries =
+ nv50_hw_metric_get_driver_query_info(screen, 0, NULL);
+
+ if (!info)
+ return num_hw_sm_queries + num_hw_metric_queries;
+
+ if (id < num_hw_sm_queries)
+ return nv50_hw_sm_get_driver_query_info(screen, id, info);
+
+ return nv50_hw_metric_get_driver_query_info(screen,
+ id - num_hw_sm_queries, info);
+}
+
void
nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
struct nv50_query *q, unsigned result_offset)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
index 294c67de9a4..82ec6bd2d96 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
@@ -8,8 +8,19 @@
#define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
+struct nv50_hw_query;
+
+struct nv50_hw_query_funcs {
+ void (*destroy_query)(struct nv50_context *, struct nv50_hw_query *);
+ boolean (*begin_query)(struct nv50_context *, struct nv50_hw_query *);
+ void (*end_query)(struct nv50_context *, struct nv50_hw_query *);
+ boolean (*get_query_result)(struct nv50_context *, struct nv50_hw_query *,
+ boolean, union pipe_query_result *);
+};
+
struct nv50_hw_query {
struct nv50_query base;
+ const struct nv50_hw_query_funcs *funcs;
uint32_t *data;
uint32_t sequence;
struct nouveau_bo *bo;
@@ -31,6 +42,11 @@ nv50_hw_query(struct nv50_query *q)
struct nv50_query *
nv50_hw_create_query(struct nv50_context *, unsigned, unsigned);
+int
+nv50_hw_get_driver_query_info(struct nv50_screen *, unsigned,
+ struct pipe_driver_query_info *);
+bool
+nv50_hw_query_allocate(struct nv50_context *, struct nv50_query *, int);
void
nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t,
struct nv50_query *, unsigned);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
new file mode 100644
index 00000000000..d1bccb94193
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
+
+/* === PERFORMANCE MONITORING METRICS for NV84+ === */
+static const char *nv50_hw_metric_names[] =
+{
+ "metric-branch_efficiency",
+};
+
+struct nv50_hw_metric_query_cfg {
+ uint32_t queries[4];
+ uint32_t num_queries;
+};
+
+#define _SM(n) NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_ ##n)
+#define _M(n, c) [NV50_HW_METRIC_QUERY_##n] = c
+
+/* ==== Compute capability 1.1 (G84+) ==== */
+static const struct nv50_hw_metric_query_cfg
+sm11_branch_efficiency =
+{
+ .queries[0] = _SM(BRANCH),
+ .queries[1] = _SM(DIVERGENT_BRANCH),
+ .num_queries = 2,
+};
+
+static const struct nv50_hw_metric_query_cfg *sm11_hw_metric_queries[] =
+{
+ _M(BRANCH_EFFICIENCY, &sm11_branch_efficiency),
+};
+
+#undef _SM
+#undef _M
+
+static const struct nv50_hw_metric_query_cfg *
+nv50_hw_metric_query_get_cfg(struct nv50_context *nv50,
+ struct nv50_hw_query *hq)
+{
+ struct nv50_query *q = &hq->base;
+ return sm11_hw_metric_queries[q->type - NV50_HW_METRIC_QUERY(0)];
+}
+
+static void
+nv50_hw_metric_destroy_query(struct nv50_context *nv50,
+ struct nv50_hw_query *hq)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++)
+ hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]);
+ FREE(hmq);
+}
+
+static boolean
+nv50_hw_metric_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ boolean ret = false;
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++) {
+ ret = hmq->queries[i]->funcs->begin_query(nv50, hmq->queries[i]);
+ if (!ret)
+ return ret;
+ }
+ return ret;
+}
+
+static void
+nv50_hw_metric_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++)
+ hmq->queries[i]->funcs->end_query(nv50, hmq->queries[i]);
+}
+
+static uint64_t
+sm11_hw_metric_calc_result(struct nv50_hw_query *hq, uint64_t res64[8])
+{
+ switch (hq->base.type - NV50_HW_METRIC_QUERY(0)) {
+ case NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
+ /* (branch / (branch + divergent_branch)) * 100 */
+ if (res64[0] + res64[1])
+ return (res64[0] / (double)(res64[0] + res64[1])) * 100;
+ break;
+ default:
+ debug_printf("invalid metric type: %d\n",
+ hq->base.type - NV50_HW_METRIC_QUERY(0));
+ break;
+ }
+ return 0;
+}
+
+static boolean
+nv50_hw_metric_get_query_result(struct nv50_context *nv50,
+ struct nv50_hw_query *hq, boolean wait,
+ union pipe_query_result *result)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ union pipe_query_result results[4] = {};
+ uint64_t res64[4] = {};
+ boolean ret = false;
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++) {
+ ret = hmq->queries[i]->funcs->get_query_result(nv50, hmq->queries[i],
+ wait, &results[i]);
+ if (!ret)
+ return ret;
+ res64[i] = *(uint64_t *)&results[i];
+ }
+
+ *(uint64_t *)result = sm11_hw_metric_calc_result(hq, res64);
+ return ret;
+}
+
+static const struct nv50_hw_query_funcs hw_metric_query_funcs = {
+ .destroy_query = nv50_hw_metric_destroy_query,
+ .begin_query = nv50_hw_metric_begin_query,
+ .end_query = nv50_hw_metric_end_query,
+ .get_query_result = nv50_hw_metric_get_query_result,
+};
+
+struct nv50_hw_query *
+nv50_hw_metric_create_query(struct nv50_context *nv50, unsigned type)
+{
+ const struct nv50_hw_metric_query_cfg *cfg;
+ struct nv50_hw_metric_query *hmq;
+ struct nv50_hw_query *hq;
+ unsigned i;
+
+ if (type < NV50_HW_METRIC_QUERY(0) || type > NV50_HW_METRIC_QUERY_LAST)
+ return NULL;
+
+ hmq = CALLOC_STRUCT(nv50_hw_metric_query);
+ if (!hmq)
+ return NULL;
+
+ hq = &hmq->base;
+ hq->funcs = &hw_metric_query_funcs;
+ hq->base.type = type;
+
+ cfg = nv50_hw_metric_query_get_cfg(nv50, hq);
+
+ for (i = 0; i < cfg->num_queries; i++) {
+ hmq->queries[i] = nv50_hw_sm_create_query(nv50, cfg->queries[i]);
+ if (!hmq->queries[i]) {
+ nv50_hw_metric_destroy_query(nv50, hq);
+ return NULL;
+ }
+ hmq->num_queries++;
+ }
+
+ return hq;
+}
+
+int
+nv50_hw_metric_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int count = 0;
+
+ if (screen->compute)
+ if (screen->base.class_3d >= NV84_3D_CLASS)
+ count += NV50_HW_METRIC_QUERY_COUNT;
+
+ if (!info)
+ return count;
+
+ if (id < count) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = nv50_hw_metric_names[id];
+ info->query_type = NV50_HW_METRIC_QUERY(id);
+ info->group_id = NV50_HW_METRIC_QUERY_GROUP;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h
new file mode 100644
index 00000000000..f8cfc04084f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h
@@ -0,0 +1,34 @@
+#ifndef __NV50_QUERY_HW_METRIC_H__
+#define __NV50_QUERY_HW_METRIC_H__
+
+#include "nv50_query_hw.h"
+
+struct nv50_hw_metric_query {
+ struct nv50_hw_query base;
+ struct nv50_hw_query *queries[4];
+ unsigned num_queries;
+};
+
+static inline struct nv50_hw_metric_query *
+nv50_hw_metric_query(struct nv50_hw_query *hq)
+{
+ return (struct nv50_hw_metric_query *)hq;
+}
+
+/*
+ * Driver metrics queries:
+ */
+#define NV50_HW_METRIC_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
+#define NV50_HW_METRIC_QUERY_LAST NV50_HW_METRIC_QUERY(NV50_HW_METRIC_QUERY_COUNT - 1)
+enum nv50_hw_metric_queries
+{
+ NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY = 0,
+ NV50_HW_METRIC_QUERY_COUNT
+};
+
+struct nv50_hw_query *
+nv50_hw_metric_create_query(struct nv50_context *, unsigned);
+int
+nv50_hw_metric_get_driver_query_info(struct nv50_screen *, unsigned,
+ struct pipe_driver_query_info *);
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
new file mode 100644
index 00000000000..8453ce76095
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw_sm.h"
+
+#include "nv_object.xml.h"
+#include "nv50/nv50_compute.xml.h"
+
+/* === PERFORMANCE MONITORING COUNTERS for NV84+ === */
+
+/* NOTE: intentionally using the same names as NV */
+static const char *nv50_hw_sm_query_names[] =
+{
+ "branch",
+ "divergent_branch",
+ "instructions",
+ "prof_trigger_00",
+ "prof_trigger_01",
+ "prof_trigger_02",
+ "prof_trigger_03",
+ "prof_trigger_04",
+ "prof_trigger_05",
+ "prof_trigger_06",
+ "prof_trigger_07",
+ "sm_cta_launched",
+ "warp_serialize",
+};
+
+static const uint64_t nv50_read_hw_sm_counters_code[] =
+{
+ /* and b32 $r0 $r0 0x0000ffff
+ * add b32 $c0 $r0 $r0 $r0
+ * (lg $c0) ret
+ * mov $r0 $pm0
+ * mov $r1 $pm1
+ * mov $r2 $pm2
+ * mov $r3 $pm3
+ * mov $r4 $physid
+ * ld $r5 b32 s[0x10]
+ * ld $r6 b32 s[0x14]
+ * and b32 $r4 $r4 0x000f0000
+ * shr u32 $r4 $r4 0x10
+ * mul $r4 u24 $r4 0x14
+ * add b32 $r5 $r5 $r4
+ * st b32 g15[$r5] $r0
+ * add b32 $r5 $r5 0x04
+ * st b32 g15[$r5] $r1
+ * add b32 $r5 $r5 0x04
+ * st b32 g15[$r5] $r2
+ * add b32 $r5 $r5 0x04
+ * st b32 g15[$r5] $r3
+ * add b32 $r5 $r5 0x04
+ * exit st b32 g15[$r5] $r6 */
+ 0x00000fffd03f0001ULL,
+ 0x040007c020000001ULL,
+ 0x0000028030000003ULL,
+ 0x6001078000000001ULL,
+ 0x6001478000000005ULL,
+ 0x6001878000000009ULL,
+ 0x6001c7800000000dULL,
+ 0x6000078000000011ULL,
+ 0x4400c78010000815ULL,
+ 0x4400c78010000a19ULL,
+ 0x0000f003d0000811ULL,
+ 0xe410078030100811ULL,
+ 0x0000000340540811ULL,
+ 0x0401078020000a15ULL,
+ 0xa0c00780d00f0a01ULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00780d00f0a05ULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00780d00f0a09ULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00780d00f0a0dULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00781d00f0a19ULL,
+};
+
+struct nv50_hw_sm_counter_cfg
+{
+ uint32_t mode : 4; /* LOGOP, LOGOP_PULSE */
+ uint32_t unit : 8; /* UNK[0-5] */
+ uint32_t sig : 8; /* signal selection */
+};
+
+struct nv50_hw_sm_query_cfg
+{
+ struct nv50_hw_sm_counter_cfg ctr[4];
+ uint8_t num_counters;
+};
+
+#define _Q(n, m, u, s) [NV50_HW_SM_QUERY_##n] = { { { NV50_COMPUTE_MP_PM_CONTROL_MODE_##m, NV50_COMPUTE_MP_PM_CONTROL_UNIT_##u, s, }, {}, {}, {} }, 1 }
+
+/* ==== Compute capability 1.1 (G84+) ==== */
+static const struct nv50_hw_sm_query_cfg sm11_hw_sm_queries[] =
+{
+ _Q(BRANCH, LOGOP, UNK4, 0x02),
+ _Q(DIVERGENT_BRANCH, LOGOP, UNK4, 0x09),
+ _Q(INSTRUCTIONS, LOGOP, UNK4, 0x04),
+ _Q(PROF_TRIGGER_0, LOGOP, UNK1, 0x26),
+ _Q(PROF_TRIGGER_1, LOGOP, UNK1, 0x27),
+ _Q(PROF_TRIGGER_2, LOGOP, UNK1, 0x28),
+ _Q(PROF_TRIGGER_3, LOGOP, UNK1, 0x29),
+ _Q(PROF_TRIGGER_4, LOGOP, UNK1, 0x2a),
+ _Q(PROF_TRIGGER_5, LOGOP, UNK1, 0x2b),
+ _Q(PROF_TRIGGER_6, LOGOP, UNK1, 0x2c),
+ _Q(PROF_TRIGGER_7, LOGOP, UNK1, 0x2d),
+ _Q(SM_CTA_LAUNCHED, LOGOP, UNK1, 0x33),
+ _Q(WARP_SERIALIZE, LOGOP, UNK0, 0x0b),
+};
+
+static inline uint16_t nv50_hw_sm_get_func(uint8_t slot)
+{
+ switch (slot) {
+ case 0: return 0xaaaa;
+ case 1: return 0xcccc;
+ case 2: return 0xf0f0;
+ case 3: return 0xff00;
+ }
+ return 0;
+}
+
+static const struct nv50_hw_sm_query_cfg *
+nv50_hw_sm_query_get_cfg(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_query *q = &hq->base;
+ return &sm11_hw_sm_queries[q->type - NV50_HW_SM_QUERY(0)];
+}
+
+static void
+nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_query *q = &hq->base;
+ q->funcs->destroy_query(nv50, q);
+}
+
+static boolean
+nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ const struct nv50_hw_sm_query_cfg *cfg;
+ uint16_t func;
+ int i, c;
+
+ cfg = nv50_hw_sm_query_get_cfg(nv50, hq);
+
+ /* check if we have enough free counter slots */
+ if (screen->pm.num_hw_sm_active + cfg->num_counters > 4) {
+ NOUVEAU_ERR("Not enough free MP counter slots !\n");
+ return false;
+ }
+
+ assert(cfg->num_counters <= 4);
+ PUSH_SPACE(push, 4 * 4);
+
+ /* set sequence field to 0 (used to check if result is available) */
+ for (i = 0; i < screen->MPsInTP; ++i) {
+ const unsigned b = (0x14 / 4) * i;
+ hq->data[b + 16] = 0;
+ }
+ hq->sequence++;
+
+ for (i = 0; i < cfg->num_counters; i++) {
+ screen->pm.num_hw_sm_active++;
+
+ /* find free counter slots */
+ for (c = 0; c < 4; ++c) {
+ if (!screen->pm.mp_counter[c]) {
+ hsq->ctr[i] = c;
+ screen->pm.mp_counter[c] = hsq;
+ break;
+ }
+ }
+
+ /* select func to aggregate counters */
+ func = nv50_hw_sm_get_func(c);
+
+ /* configure and reset the counter(s) */
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
+ | cfg->ctr[i].unit | cfg->ctr[i].mode);
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1);
+ PUSH_DATA (push, 0);
+ }
+ return true;
+}
+
+static void
+nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct pipe_context *pipe = &nv50->base.pipe;
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ uint32_t mask;
+ uint32_t input[3];
+ const uint block[3] = { 32, 1, 1 };
+ const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 };
+ int c;
+
+ if (unlikely(!screen->pm.prog)) {
+ struct nv50_program *prog = CALLOC_STRUCT(nv50_program);
+ prog->type = PIPE_SHADER_COMPUTE;
+ prog->translated = true;
+ prog->max_gpr = 7;
+ prog->parm_size = 8;
+ prog->code = (uint32_t *)nv50_read_hw_sm_counters_code;
+ prog->code_size = sizeof(nv50_read_hw_sm_counters_code);
+ screen->pm.prog = prog;
+ }
+
+ /* disable all counting */
+ PUSH_SPACE(push, 8);
+ for (c = 0; c < 4; c++) {
+ if (screen->pm.mp_counter[c]) {
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+ PUSH_DATA (push, 0);
+ }
+ }
+
+ /* release counters for this query */
+ for (c = 0; c < 4; c++) {
+ if (screen->pm.mp_counter[c] == hsq) {
+ screen->pm.num_hw_sm_active--;
+ screen->pm.mp_counter[c] = NULL;
+ }
+ }
+
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
+ hq->bo);
+
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+
+ pipe->bind_compute_state(pipe, screen->pm.prog);
+ input[0] = hq->bo->offset + hq->base_offset;
+ input[1] = hq->sequence;
+ pipe->launch_grid(pipe, block, grid, 0, input);
+
+ nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);
+
+ /* re-active other counters */
+ PUSH_SPACE(push, 8);
+ mask = 0;
+ for (c = 0; c < 4; c++) {
+ const struct nv50_hw_sm_query_cfg *cfg;
+ unsigned i;
+
+ hsq = screen->pm.mp_counter[c];
+ if (!hsq)
+ continue;
+
+ cfg = nv50_hw_sm_query_get_cfg(nv50, &hsq->base);
+ for (i = 0; i < cfg->num_counters; i++) {
+ uint16_t func;
+
+ if (mask & (1 << hsq->ctr[i]))
+ break;
+
+ mask |= 1 << hsq->ctr[i];
+ func = nv50_hw_sm_get_func(hsq->ctr[i]);
+
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1);
+ PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
+ | cfg->ctr[i].unit | cfg->ctr[i].mode);
+ }
+ }
+}
+
+static inline bool
+nv50_hw_sm_query_read_data(uint32_t count[32][4],
+ struct nv50_context *nv50, bool wait,
+ struct nv50_hw_query *hq,
+ const struct nv50_hw_sm_query_cfg *cfg,
+ unsigned mp_count)
+{
+ struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ unsigned p, c;
+
+ for (p = 0; p < mp_count; ++p) {
+ const unsigned b = (0x14 / 4) * p;
+
+ for (c = 0; c < cfg->num_counters; ++c) {
+ if (hq->data[b + 4] != hq->sequence) {
+ if (!wait)
+ return false;
+ if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->base.client))
+ return false;
+ }
+ count[p][c] = hq->data[b + hsq->ctr[c]];
+ }
+ }
+ return true;
+}
+
+static boolean
+nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq,
+ boolean wait, union pipe_query_result *result)
+{
+ uint32_t count[32][4];
+ uint64_t value = 0;
+ unsigned mp_count = MIN2(nv50->screen->MPsInTP, 32);
+ unsigned p, c;
+ const struct nv50_hw_sm_query_cfg *cfg;
+ bool ret;
+
+ cfg = nv50_hw_sm_query_get_cfg(nv50, hq);
+
+ ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count);
+ if (!ret)
+ return false;
+
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ value += count[p][c];
+
+ /* We only count a single TP, and simply multiply by the total number of
+ * TPs to compute result over all TPs. This is inaccurate, but enough! */
+ value *= nv50->screen->TPs;
+
+ *(uint64_t *)result = value;
+ return true;
+}
+
+static const struct nv50_hw_query_funcs hw_sm_query_funcs = {
+ .destroy_query = nv50_hw_sm_destroy_query,
+ .begin_query = nv50_hw_sm_begin_query,
+ .end_query = nv50_hw_sm_end_query,
+ .get_query_result = nv50_hw_sm_get_query_result,
+};
+
+struct nv50_hw_query *
+nv50_hw_sm_create_query(struct nv50_context *nv50, unsigned type)
+{
+ struct nv50_hw_sm_query *hsq;
+ struct nv50_hw_query *hq;
+ unsigned space;
+
+ if (type < NV50_HW_SM_QUERY(0) || type > NV50_HW_SM_QUERY_LAST)
+ return NULL;
+
+ hsq = CALLOC_STRUCT(nv50_hw_sm_query);
+ if (!hsq)
+ return NULL;
+
+ hq = &hsq->base;
+ hq->funcs = &hw_sm_query_funcs;
+ hq->base.type = type;
+
+ /*
+ * for each MP:
+ * [00] = MP.C0
+ * [04] = MP.C1
+ * [08] = MP.C2
+ * [0c] = MP.C3
+ * [10] = MP.sequence
+ */
+ space = (4 + 1) * nv50->screen->MPsInTP * sizeof(uint32_t);
+
+ if (!nv50_hw_query_allocate(nv50, &hq->base, space)) {
+ FREE(hq);
+ return NULL;
+ }
+
+ return hq;
+}
+
+int
+nv50_hw_sm_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int count = 0;
+
+ if (screen->compute)
+ if (screen->base.class_3d >= NV84_3D_CLASS)
+ count += NV50_HW_SM_QUERY_COUNT;
+
+ if (!info)
+ return count;
+
+ if (id < count) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = nv50_hw_sm_query_names[id];
+ info->query_type = NV50_HW_SM_QUERY(id);
+ info->group_id = NV50_HW_SM_QUERY_GROUP;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h
new file mode 100644
index 00000000000..c1a1cd175e3
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h
@@ -0,0 +1,45 @@
+#ifndef __NV50_QUERY_HW_SM_H__
+#define __NV50_QUERY_HW_SM_H__
+
+#include "nv50_query_hw.h"
+
+struct nv50_hw_sm_query {
+ struct nv50_hw_query base;
+ uint8_t ctr[4];
+};
+
+static inline struct nv50_hw_sm_query *
+nv50_hw_sm_query(struct nv50_hw_query *hq)
+{
+ return (struct nv50_hw_sm_query *)hq;
+}
+
+/*
+ * Performance counter queries:
+ */
+#define NV50_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NV50_HW_SM_QUERY_LAST NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_COUNT - 1)
+enum nv50_hw_sm_queries
+{
+ NV50_HW_SM_QUERY_BRANCH = 0,
+ NV50_HW_SM_QUERY_DIVERGENT_BRANCH,
+ NV50_HW_SM_QUERY_INSTRUCTIONS,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_0,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_1,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_2,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_3,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_4,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_5,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_6,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_7,
+ NV50_HW_SM_QUERY_SM_CTA_LAUNCHED,
+ NV50_HW_SM_QUERY_WARP_SERIALIZE,
+ NV50_HW_SM_QUERY_COUNT,
+};
+
+struct nv50_hw_query *
+nv50_hw_sm_create_query(struct nv50_context *, unsigned);
+int
+nv50_hw_sm_get_driver_query_info(struct nv50_screen *, unsigned,
+ struct pipe_driver_query_info *);
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index f47e998ab1e..1e4b75f18e0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -41,8 +41,6 @@
#define THREADS_IN_WARP 32
-#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
-
static boolean
nv50_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
@@ -183,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_COMPUTE:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -212,7 +211,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
- case PIPE_CAP_COMPUTE:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
@@ -251,6 +249,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_COMPUTE:
break;
default:
return 0;
@@ -336,6 +335,52 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
return 0.0f;
}
+static int
+nv50_screen_get_compute_param(struct pipe_screen *pscreen,
+ enum pipe_compute_cap param, void *data)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
+#define RET(x) do { \
+ if (data) \
+ memcpy(data, x, sizeof(x)); \
+ return sizeof(x); \
+} while (0)
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ RET((uint64_t []) { 2 });
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ RET(((uint64_t []) { 65535, 65535 }));
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ RET(((uint64_t []) { 512, 512, 64 }));
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ RET((uint64_t []) { 512 });
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
+ RET((uint64_t []) { 1ULL << 32 });
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+ RET((uint64_t []) { 4096 });
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ RET((uint32_t []) { 32 });
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ RET((uint64_t []) { 1ULL << 40 });
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ RET((uint32_t []) { 0 });
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ RET((uint32_t []) { screen->mp_count });
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
+ default:
+ return 0;
+ }
+
+#undef RET
+}
+
static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
@@ -377,6 +422,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
nouveau_object_del(&screen->tesla);
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->compute);
nouveau_object_del(&screen->sync);
nouveau_screen_fini(&screen->base);
@@ -640,7 +686,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, 0);
if (screen->base.class_3d >= NV84_3D_CLASS) {
- BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+ BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
PUSH_DATA (push, 0);
}
@@ -742,6 +788,9 @@ nv50_screen_create(struct nouveau_device *dev)
pscreen->get_param = nv50_screen_get_param;
pscreen->get_shader_param = nv50_screen_get_shader_param;
pscreen->get_paramf = nv50_screen_get_paramf;
+ pscreen->get_compute_param = nv50_screen_get_compute_param;
+ pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
+ pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info;
nv50_screen_init_resource_functions(pscreen);
@@ -851,6 +900,8 @@ nv50_screen_create(struct nouveau_device *dev)
screen->TPs = util_bitcount(value & 0xffff);
screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
+ screen->mp_count = screen->TPs * screen->MPsInTP;
+
stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
STACK_WARPS_ALLOC * 64 * 8;
@@ -902,6 +953,12 @@ nv50_screen_create(struct nouveau_device *dev)
nv50_screen_init_hwctx(screen);
+ ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
+ if (ret) {
+ NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
+ goto fail;
+ }
+
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index ce51f0fc254..2a4983d1020 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -23,6 +23,10 @@ struct nv50_context;
#define NV50_MAX_VIEWPORTS 16
+#define NV50_MAX_GLOBALS 16
+
+#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
+
struct nv50_blitter;
struct nv50_graph_state {
@@ -66,6 +70,7 @@ struct nv50_screen {
unsigned MPsInTP;
unsigned max_tls_space;
unsigned cur_tls_space;
+ unsigned mp_count;
struct nouveau_heap *vp_code_heap;
struct nouveau_heap *gp_code_heap;
@@ -90,9 +95,16 @@ struct nv50_screen {
struct nouveau_bo *bo;
} fence;
+ struct {
+ struct nv50_program *prog; /* compute state object to read MP counters */
+ struct nv50_hw_sm_query *mp_counter[4]; /* counter to query allocation */
+ uint8_t num_hw_sm_active;
+ } pm;
+
struct nouveau_object *sync;
struct nouveau_object *tesla;
+ struct nouveau_object *compute;
struct nouveau_object *eng2d;
struct nouveau_object *m2mf;
};
@@ -103,12 +115,19 @@ nv50_screen(struct pipe_screen *screen)
return (struct nv50_screen *)screen;
}
+int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+ struct pipe_driver_query_info *);
+int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned,
+ struct pipe_driver_query_group_info *);
+
bool nv50_blitter_create(struct nv50_screen *);
void nv50_blitter_destroy(struct nv50_screen *);
int nv50_screen_tic_alloc(struct nv50_screen *, void *);
int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
+int nv50_screen_compute_setup(struct nv50_screen *, struct nouveau_pushbuf *);
+
static inline void
nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
{
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index d27f12ca94b..b4ea08d4d13 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -792,6 +792,35 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
nv50->dirty |= NV50_NEW_GMTYPROG;
}
+static void *
+nv50_cp_state_create(struct pipe_context *pipe,
+ const struct pipe_compute_state *cso)
+{
+ struct nv50_program *prog;
+
+ prog = CALLOC_STRUCT(nv50_program);
+ if (!prog)
+ return NULL;
+ prog->type = PIPE_SHADER_COMPUTE;
+
+ prog->cp.smem_size = cso->req_local_mem;
+ prog->cp.lmem_size = cso->req_private_mem;
+ prog->parm_size = cso->req_input_mem;
+
+ prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog);
+
+ return (void *)prog;
+}
+
+static void
+nv50_cp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->compprog = hwcso;
+ nv50->dirty_cp |= NV50_NEW_CP_PROGRAM;
+}
+
static void
nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct pipe_constant_buffer *cb)
@@ -1134,6 +1163,70 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
nv50->dirty |= NV50_NEW_STRMOUT;
}
+static void
+nv50_set_compute_resources(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_surface **resources)
+{
+ /* TODO: bind surfaces */
+}
+
+static inline void
+nv50_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
+{
+ struct nv04_resource *buf = nv04_resource(res);
+ if (buf) {
+ uint64_t limit = (buf->address + buf->base.width0) - 1;
+ if (limit < (1ULL << 32)) {
+ *phandle = (uint32_t)buf->address;
+ } else {
+ NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: "
+ "resource not contained within 32-bit address space !\n");
+ *phandle = 0;
+ }
+ } else {
+ *phandle = 0;
+ }
+}
+
+static void
+nv50_set_global_bindings(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct pipe_resource **ptr;
+ unsigned i;
+ const unsigned end = start + nr;
+
+ if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
+ const unsigned old_size = nv50->global_residents.size;
+ const unsigned req_size = end * sizeof(struct pipe_resource *);
+ util_dynarray_resize(&nv50->global_residents, req_size);
+ memset((uint8_t *)nv50->global_residents.data + old_size, 0,
+ req_size - old_size);
+ }
+
+ if (resources) {
+ ptr = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i) {
+ pipe_resource_reference(&ptr[i], resources[i]);
+ nv50_set_global_handle(handles[i], resources[i]);
+ }
+ } else {
+ ptr = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i)
+ pipe_resource_reference(&ptr[i], NULL);
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL);
+
+ nv50->dirty_cp = NV50_NEW_CP_GLOBALS;
+}
+
void
nv50_init_state_functions(struct nv50_context *nv50)
{
@@ -1162,12 +1255,15 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->create_vs_state = nv50_vp_state_create;
pipe->create_fs_state = nv50_fp_state_create;
pipe->create_gs_state = nv50_gp_state_create;
+ pipe->create_compute_state = nv50_cp_state_create;
pipe->bind_vs_state = nv50_vp_state_bind;
pipe->bind_fs_state = nv50_fp_state_bind;
pipe->bind_gs_state = nv50_gp_state_bind;
+ pipe->bind_compute_state = nv50_cp_state_bind;
pipe->delete_vs_state = nv50_sp_state_delete;
pipe->delete_fs_state = nv50_sp_state_delete;
pipe->delete_gs_state = nv50_sp_state_delete;
+ pipe->delete_compute_state = nv50_sp_state_delete;
pipe->set_blend_color = nv50_set_blend_color;
pipe->set_stencil_ref = nv50_set_stencil_ref;
@@ -1191,6 +1287,9 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->stream_output_target_destroy = nv50_so_target_destroy;
pipe->set_stream_output_targets = nv50_set_stream_output_targets;
+ pipe->set_global_binding = nv50_set_global_bindings;
+ pipe->set_compute_resources = nv50_set_compute_resources;
+
nv50->sample_mask = ~0;
nv50->min_samples = 1;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index b6181edf24f..02a759c23ad 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -503,8 +503,7 @@ static struct state_validate {
{ nv50_validate_samplers, NV50_NEW_SAMPLERS },
{ nv50_stream_output_validate, NV50_NEW_STRMOUT |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
- NV50_NEW_VERTPROG },
+ { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
{ nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES },
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 916a7d44a31..8ba19d2cc90 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -339,12 +339,18 @@ nv50_clear_render_target(struct pipe_context *pipe,
PUSH_DATA (push, (width << 16) | dstx);
PUSH_DATA (push, (height << 16) | dsty);
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, 0x3c |
(z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}
@@ -415,12 +421,18 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
PUSH_DATA (push, (width << 16) | dstx);
PUSH_DATA (push, (height << 16) | dsty);
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, mode |
(z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}
@@ -673,6 +685,9 @@ nv50_clear_buffer(struct pipe_context *pipe,
PUSH_DATA (push, (width << 16));
PUSH_DATA (push, (height << 16));
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
PUSH_DATA (push, 0x3c);
@@ -690,6 +705,9 @@ nv50_clear_buffer(struct pipe_context *pipe,
PUSH_DATA (push, 0x3c);
}
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 9aa593f919e..85878d5fcc7 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -294,8 +294,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
uint64_t addrs[PIPE_MAX_ATTRIBS];
uint32_t limits[PIPE_MAX_ATTRIBS];
struct nouveau_pushbuf *push = nv50->base.pushbuf;
- struct nv50_vertex_stateobj dummy = {};
- struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
+ struct nv50_vertex_stateobj *vertex = nv50->vertex;
struct pipe_vertex_buffer *vb;
struct nv50_vertex_element *ve;
uint32_t mask;
@@ -303,14 +302,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
unsigned i;
const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
- /* A vertexid is not generated for inline data uploads. Have to use a
- * VBO. This check must come after the vertprog has been validated,
- * otherwise vertexid may be unset.
- */
- assert(nv50->vertprog->translated);
- if (nv50->vertprog->vp.vertexid)
- nv50->vbo_push_hint = 0;
-
if (unlikely(vertex->need_conversion))
nv50->vbo_fifo = ~0;
else
@@ -487,7 +478,7 @@ nv50_draw_arrays(struct nv50_context *nv50,
BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, 0);
if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {
- BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+ BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
PUSH_DATA (push, 0);
}
nv50->state.index_bias = 0;
@@ -613,7 +604,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, index_bias);
if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {
- BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+ BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
PUSH_DATA (push, index_bias);
}
nv50->state.index_bias = index_bias;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
index 76f1b41ea70..68002305d72 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define SUBC_3D(m) 3, (m)
#define NV50_3D(n) SUBC_3D(NV50_3D_##n)
+#define NV84_3D(n) SUBC_3D(NV84_3D_##n)
#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n)
#define SUBC_2D(m) 4, (m)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 82ed5a1864e..162661ff2a7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -180,9 +180,10 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
+ unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ if (bind & PIPE_BIND_RENDER_TARGET) {
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
if (nvc0->framebuffer.cbufs[i] &&
nvc0->framebuffer.cbufs[i]->texture == res) {
@@ -193,7 +194,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
- if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.zsbuf->texture == res) {
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -203,12 +204,12 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
- if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_COMMAND_ARGS_BUFFER |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (bind & (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_COMMAND_ARGS_BUFFER |
+ PIPE_BIND_SAMPLER_VIEW)) {
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
if (nvc0->vtxbuf[i].buffer == res) {
nvc0->dirty |= NVC0_NEW_ARRAYS;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f53921092a5..d992b10a23c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+ info->flags = 0;
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
if (id < num_sw_queries)
@@ -200,7 +201,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
if (id == NVC0_HW_SM_QUERY_GROUP) {
if (screen->compute) {
info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
/* Because we can't expose the number of hardware counters needed for
* each different query, we don't want to allow more than one active
@@ -224,7 +224,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
if (screen->compute) {
if (screen->base.class_3d < NVE4_3D_CLASS) {
info->name = "Performance metrics";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
info->max_active_queries = 1;
info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
return 1;
@@ -234,7 +233,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
info->name = "Driver statistics";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
return 1;
@@ -245,7 +243,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
info->name = "this_is_not_the_query_group_you_are_looking_for";
info->max_active_queries = 0;
info->num_queries = 0;
- info->type = 0;
return 0;
}
@@ -260,4 +257,5 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
pipe->end_query = nvc0_end_query;
pipe->get_query_result = nvc0_get_query_result;
pipe->render_condition = nvc0_render_condition;
+ nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 44b222e5134..7962143d45a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -1014,14 +1014,15 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
prog->type = PIPE_SHADER_COMPUTE;
prog->translated = true;
- prog->num_gprs = 14;
prog->parm_size = 12;
if (is_nve4) {
prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
+ prog->num_gprs = 14;
} else {
prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
+ prog->num_gprs = 12;
}
screen->pm.prog = prog;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index cdb1fc1145f..6a4ae5be2ab 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -341,12 +341,16 @@ nvc0_clear_render_target(struct pipe_context *pipe,
nvc0_resource_fence(res, NOUVEAU_BO_WR);
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, 0x3c |
(z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
@@ -470,6 +474,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
if (width * height != elements) {
@@ -486,6 +492,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -545,12 +553,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
PUSH_DATA (push, dst->u.tex.first_layer);
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, mode |
(z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index f63790c329e..1dbad2f39e3 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -7,12 +7,14 @@ C_SOURCES := \
r600_pipe_common.c \
r600_pipe_common.h \
r600_query.c \
+ r600_query.h \
r600_streamout.c \
r600_texture.c \
radeon_uvd.c \
radeon_uvd.h \
radeon_vce_40_2_2.c \
radeon_vce_50.c \
+ radeon_vce_52.c \
radeon_vce.c \
radeon_vce.h \
radeon_video.c \
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 3599692a857..7464f677398 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -27,6 +27,7 @@
#include "r600_pipe_common.h"
#include "r600_cs.h"
#include "tgsi/tgsi_parse.h"
+#include "util/list.h"
#include "util/u_draw_quad.h"
#include "util/u_memory.h"
#include "util/u_format_s3tc.h"
@@ -135,12 +136,10 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
void r600_preflush_suspend_features(struct r600_common_context *ctx)
{
/* suspend queries */
- ctx->queries_suspended_for_flush = false;
- if (ctx->num_cs_dw_nontimer_queries_suspend) {
+ if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries))
r600_suspend_nontimer_queries(ctx);
+ if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
r600_suspend_timer_queries(ctx);
- ctx->queries_suspended_for_flush = true;
- }
ctx->streamout.suspended = false;
if (ctx->streamout.begin_emitted) {
@@ -157,10 +156,10 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
}
/* resume queries */
- if (ctx->queries_suspended_for_flush) {
- r600_resume_nontimer_queries(ctx);
+ if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
r600_resume_timer_queries(ctx);
- }
+ if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries))
+ r600_resume_nontimer_queries(ctx);
}
static void r600_flush_from_st(struct pipe_context *ctx,
@@ -718,50 +717,6 @@ static uint64_t r600_get_timestamp(struct pipe_screen *screen)
rscreen->info.r600_clock_crystal_freq;
}
-static int r600_get_driver_query_info(struct pipe_screen *screen,
- unsigned index,
- struct pipe_driver_query_info *info)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct pipe_driver_query_info list[] = {
- {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
- PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
- {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
- PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
- {"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
- {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
- {"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
- {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
- PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
- {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
- {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES,
- PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
- {"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
- {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
- {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
- {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
- {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
- {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
- };
- unsigned num_queries;
-
- if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
- num_queries = Elements(list);
- else if (rscreen->info.drm_major == 3)
- num_queries = Elements(list) - 3;
- else
- num_queries = Elements(list) - 4;
-
- if (!info)
- return num_queries;
-
- if (index >= num_queries)
- return 0;
-
- *info = list[index];
- return 1;
-}
-
static void r600_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **dst,
struct pipe_fence_handle *src)
@@ -949,7 +904,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->b.get_device_vendor = r600_get_device_vendor;
rscreen->b.get_compute_param = r600_get_compute_param;
rscreen->b.get_paramf = r600_get_paramf;
- rscreen->b.get_driver_query_info = r600_get_driver_query_info;
rscreen->b.get_timestamp = r600_get_timestamp;
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
@@ -965,6 +919,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
}
r600_init_screen_texture_functions(rscreen);
+ r600_init_screen_query_functions(rscreen);
rscreen->ws = ws;
rscreen->family = rscreen->info.family;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index ebe633b9125..fbdc5c410ae 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -47,21 +47,6 @@
#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
-#define R600_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define R600_QUERY_REQUESTED_VRAM (PIPE_QUERY_DRIVER_SPECIFIC + 1)
-#define R600_QUERY_REQUESTED_GTT (PIPE_QUERY_DRIVER_SPECIFIC + 2)
-#define R600_QUERY_BUFFER_WAIT_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 3)
-#define R600_QUERY_NUM_CS_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 4)
-#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
-#define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6)
-#define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7)
-#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8)
-#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9)
-#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10)
-#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11)
-#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
-#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
-
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
#define R600_CONTEXT_PRIVATE_FLAG (1u << 1)
@@ -408,8 +393,6 @@ struct r600_common_context {
struct list_head active_timer_queries;
unsigned num_cs_dw_nontimer_queries_suspend;
unsigned num_cs_dw_timer_queries_suspend;
- /* If queries have been suspended. */
- bool queries_suspended_for_flush;
/* Additional hardware info. */
unsigned backend_mask;
unsigned max_db; /* for OQ */
@@ -526,6 +509,7 @@ uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen);
unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
/* r600_query.c */
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
void r600_query_init(struct r600_common_context *rctx);
void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
void r600_resume_nontimer_queries(struct r600_common_context *ctx);
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 8c2b601a96c..b1cfb6e462b 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -22,81 +22,218 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "r600_query.h"
#include "r600_cs.h"
#include "util/u_memory.h"
+/* Queries without buffer handling or suspend/resume. */
+struct r600_query_sw {
+ struct r600_query b;
-struct r600_query_buffer {
- /* The buffer where query results are stored. */
- struct r600_resource *buf;
- /* Offset of the next free result after current query data */
- unsigned results_end;
- /* If a query buffer is full, a new buffer is created and the old one
- * is put in here. When we calculate the result, we sum up the samples
- * from all buffers. */
- struct r600_query_buffer *previous;
-};
-
-struct r600_query {
- /* The query buffer and how many results are in it. */
- struct r600_query_buffer buffer;
- /* The type of query */
- unsigned type;
- /* Size of the result in memory for both begin_query and end_query,
- * this can be one or two numbers, or it could even be a size of a structure. */
- unsigned result_size;
- /* The number of dwords for begin_query or end_query. */
- unsigned num_cs_dw;
- /* linked list of queries */
- struct list_head list;
- /* for custom non-GPU queries */
uint64_t begin_result;
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
- /* For transform feedback: which stream the query is for */
- unsigned stream;
};
-
-static bool r600_is_timer_query(unsigned type)
+static void r600_query_sw_destroy(struct r600_common_context *rctx,
+ struct r600_query *rquery)
{
- return type == PIPE_QUERY_TIME_ELAPSED ||
- type == PIPE_QUERY_TIMESTAMP;
+ struct pipe_screen *screen = rctx->b.screen;
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ screen->fence_reference(screen, &query->fence, NULL);
+ FREE(query);
}
-static bool r600_query_needs_begin(unsigned type)
+static enum radeon_value_id winsys_id_from_type(unsigned type)
{
- return type != PIPE_QUERY_GPU_FINISHED &&
- type != PIPE_QUERY_TIMESTAMP;
+ switch (type) {
+ case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
+ case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
+ case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
+ case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES;
+ case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
+ case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
+ case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
+ case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
+ case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
+ case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+ default: unreachable("query type does not correspond to winsys id");
+ }
}
-static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type)
+static boolean r600_query_sw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery)
{
- unsigned j, i, num_results, buf_size = 4096;
- uint32_t *results;
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
- /* Non-GPU queries. */
- switch (type) {
+ switch(query->b.type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
+ break;
case R600_QUERY_DRAW_CALLS:
+ query->begin_result = rctx->num_draw_calls;
+ break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_GPU_TEMPERATURE:
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ query->begin_result = 0;
+ break;
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
- case R600_QUERY_NUM_BYTES_MOVED:
+ case R600_QUERY_NUM_BYTES_MOVED: {
+ enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+ query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+ break;
+ }
+ case R600_QUERY_GPU_LOAD:
+ query->begin_result = r600_gpu_load_begin(rctx->screen);
+ break;
+ case R600_QUERY_NUM_COMPILATIONS:
+ query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ break;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ break;
+ default:
+ unreachable("r600_query_sw_begin: bad query type");
+ }
+
+ return TRUE;
+}
+
+static void r600_query_sw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ switch(query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ rctx->b.flush(&rctx->b, &query->fence, 0);
+ break;
+ case R600_QUERY_DRAW_CALLS:
+ query->begin_result = rctx->num_draw_calls;
+ break;
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_NUM_CS_FLUSHES:
+ case R600_QUERY_NUM_BYTES_MOVED: {
+ enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+ query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+ break;
+ }
case R600_QUERY_GPU_LOAD:
+ query->end_result = r600_gpu_load_end(rctx->screen,
+ query->begin_result);
+ query->begin_result = 0;
+ break;
case R600_QUERY_NUM_COMPILATIONS:
+ query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ break;
case R600_QUERY_NUM_SHADERS_CREATED:
+ query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ break;
+ default:
+ unreachable("r600_query_sw_end: bad query type");
+ }
+}
+
+static boolean r600_query_sw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ boolean wait,
+ union pipe_query_result *result)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ /* Convert from cycles per millisecond to cycles per second (Hz). */
+ result->timestamp_disjoint.frequency =
+ (uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000;
+ result->timestamp_disjoint.disjoint = FALSE;
+ return TRUE;
+ case PIPE_QUERY_GPU_FINISHED: {
+ struct pipe_screen *screen = rctx->b.screen;
+ result->b = screen->fence_finish(screen, query->fence,
+ wait ? PIPE_TIMEOUT_INFINITE : 0);
+ return result->b;
+ }
+ }
+
+ result->u64 = query->end_result - query->begin_result;
+
+ switch (query->b.type) {
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_GPU_TEMPERATURE:
+ result->u64 /= 1000;
+ break;
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ result->u64 *= 1000000;
+ break;
+ }
+
+ return TRUE;
+}
+
+static struct r600_query_ops sw_query_ops = {
+ .destroy = r600_query_sw_destroy,
+ .begin = r600_query_sw_begin,
+ .end = r600_query_sw_end,
+ .get_result = r600_query_sw_get_result
+};
+
+static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx,
+ unsigned query_type)
+{
+ struct r600_query_sw *query;
+
+ query = CALLOC_STRUCT(r600_query_sw);
+ if (query == NULL)
return NULL;
+
+ query->b.type = query_type;
+ query->b.ops = &sw_query_ops;
+
+ return (struct pipe_query *)query;
+}
+
+void r600_query_hw_destroy(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+ struct r600_query_buffer *prev = query->buffer.previous;
+
+ /* Release all query buffers. */
+ while (prev) {
+ struct r600_query_buffer *qbuf = prev;
+ prev = prev->previous;
+ pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
+ FREE(qbuf);
}
+ pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
+ FREE(rquery);
+}
+
+static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx,
+ struct r600_query_hw *query)
+{
+ unsigned buf_size = 4096;
+
/* Queries are normally read by the CPU after
* being written by the gpu, hence staging is probably a good
* usage pattern.
@@ -105,14 +242,30 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING, buf_size);
- switch (type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
- memset(results, 0, buf_size);
+ if (query->flags & R600_QUERY_HW_FLAG_PREDICATE)
+ query->ops->prepare_buffer(ctx, query, buf);
+
+ return buf;
+}
+
+static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer)
+{
+ /* Callers ensure that the buffer is currently unused by the GPU. */
+ uint32_t *results = ctx->ws->buffer_map(buffer->cs_buf, NULL,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_UNSYNCHRONIZED);
+
+ memset(results, 0, buffer->b.b.width0);
+
+ if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
+ query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ unsigned num_results;
+ unsigned i, j;
/* Set top bits for unused backends. */
- num_results = buf_size / (16 * ctx->max_db);
+ num_results = buffer->b.b.width0 / (16 * ctx->max_db);
for (j = 0; j < num_results; j++) {
for (i = 0; i < ctx->max_db; i++) {
if (!(ctx->backend_mask & (1<<i))) {
@@ -122,22 +275,109 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
}
results += 4 * ctx->max_db;
}
+ }
+}
+
+static struct r600_query_ops query_hw_ops = {
+ .destroy = r600_query_hw_destroy,
+ .begin = r600_query_hw_begin,
+ .end = r600_query_hw_end,
+ .get_result = r600_query_hw_get_result,
+};
+
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va);
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va);
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+ struct r600_query_hw *, void *buffer,
+ union pipe_query_result *result);
+static void r600_query_hw_clear_result(struct r600_query_hw *,
+ union pipe_query_result *);
+
+static struct r600_query_hw_ops query_hw_default_hw_ops = {
+ .prepare_buffer = r600_query_hw_prepare_buffer,
+ .emit_start = r600_query_hw_do_emit_start,
+ .emit_stop = r600_query_hw_do_emit_stop,
+ .clear_result = r600_query_hw_clear_result,
+ .add_result = r600_query_hw_add_result,
+};
+
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+ struct r600_query_hw *query)
+{
+ query->buffer.buf = r600_new_query_buffer(rctx, query);
+ if (!query->buffer.buf)
+ return FALSE;
+
+ return TRUE;
+}
+
+static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
+ unsigned query_type,
+ unsigned index)
+{
+ struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw);
+ if (!query)
+ return NULL;
+
+ query->b.type = query_type;
+ query->b.ops = &query_hw_ops;
+ query->ops = &query_hw_default_hw_ops;
+
+ switch (query_type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ query->result_size = 16 * rctx->max_db;
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6;
+ query->flags |= R600_QUERY_HW_FLAG_PREDICATE;
break;
case PIPE_QUERY_TIME_ELAPSED:
+ query->result_size = 16;
+ query->num_cs_dw_begin = 8;
+ query->num_cs_dw_end = 8;
+ query->flags = R600_QUERY_HW_FLAG_TIMER;
+ break;
case PIPE_QUERY_TIMESTAMP:
+ query->result_size = 8;
+ query->num_cs_dw_end = 8;
+ query->flags = R600_QUERY_HW_FLAG_TIMER |
+ R600_QUERY_HW_FLAG_NO_START;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+ query->result_size = 32;
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6;
+ query->stream = index;
+ query->flags |= R600_QUERY_HW_FLAG_PREDICATE;
+ break;
case PIPE_QUERY_PIPELINE_STATISTICS:
- results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
- memset(results, 0, buf_size);
+ /* 11 values on EG, 8 on R600. */
+ query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6;
break;
default:
assert(0);
+ FREE(query);
+ return NULL;
}
- return buf;
+
+ if (!r600_query_hw_init(rctx, query)) {
+ FREE(query);
+ return NULL;
+ }
+
+ return (struct pipe_query *)query;
}
static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
@@ -159,7 +399,7 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
}
}
-static unsigned event_type_for_stream(struct r600_query *query)
+static unsigned event_type_for_stream(struct r600_query_hw *query)
{
switch (query->stream) {
default:
@@ -170,28 +410,14 @@ static unsigned event_type_for_stream(struct r600_query *query)
}
}
-static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query)
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va)
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
- uint64_t va;
-
- r600_update_occlusion_query_state(ctx, query->type, 1);
- r600_update_prims_generated_query_state(ctx, query->type, 1);
- ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE);
-
- /* Get a new query buffer if needed. */
- if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
- struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
- *qbuf = query->buffer;
- query->buffer.buf = r600_new_query_buffer(ctx, query->type);
- query->buffer.results_end = 0;
- query->buffer.previous = qbuf;
- }
-
- /* emit begin query */
- va = query->buffer.buf->gpu_address + query->buffer.results_end;
- switch (query->type) {
+ switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
@@ -227,30 +453,50 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
}
r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_QUERY);
-
- if (r600_is_timer_query(query->type))
- ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
- else
- ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
}
-static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query)
+static void r600_query_hw_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query)
{
- struct radeon_winsys_cs *cs = ctx->gfx.cs;
uint64_t va;
- /* The queries which need begin already called this in begin_query. */
- if (!r600_query_needs_begin(query->type)) {
- ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE);
+ r600_update_occlusion_query_state(ctx, query->b.type, 1);
+ r600_update_prims_generated_query_state(ctx, query->b.type, 1);
+
+ ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
+ TRUE);
+
+ /* Get a new query buffer if needed. */
+ if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
+ struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
+ *qbuf = query->buffer;
+ query->buffer.buf = r600_new_query_buffer(ctx, query);
+ query->buffer.results_end = 0;
+ query->buffer.previous = qbuf;
}
- va = query->buffer.buf->gpu_address;
+ /* emit begin query */
+ va = query->buffer.buf->gpu_address + query->buffer.results_end;
+
+ query->ops->emit_start(ctx, query, query->buffer.buf, va);
- /* emit end query */
- switch (query->type) {
+ if (query->flags & R600_QUERY_HW_FLAG_TIMER)
+ ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end;
+ else
+ ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end;
+}
+
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+ switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
- va += query->buffer.results_end + 8;
+ va += 8;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va);
@@ -260,14 +506,14 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- va += query->buffer.results_end + query->result_size/2;
+ va += query->result_size/2;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32) & 0xFFFF);
break;
case PIPE_QUERY_TIME_ELAPSED:
- va += query->buffer.results_end + query->result_size/2;
+ va += query->result_size/2;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
@@ -278,7 +524,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
radeon_emit(cs, 0);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
- va += query->buffer.results_end + query->result_size/2;
+ va += query->result_size/2;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va);
@@ -289,25 +535,41 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
}
r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_QUERY);
+}
+
+static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query)
+{
+ uint64_t va;
+
+ /* The queries which need begin already called this in begin_query. */
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
+ ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, FALSE);
+ }
+
+ /* emit end query */
+ va = query->buffer.buf->gpu_address + query->buffer.results_end;
+
+ query->ops->emit_stop(ctx, query, query->buffer.buf, va);
query->buffer.results_end += query->result_size;
- if (r600_query_needs_begin(query->type)) {
- if (r600_is_timer_query(query->type))
- ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw;
+ if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) {
+ if (query->flags & R600_QUERY_HW_FLAG_TIMER)
+ ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw_end;
else
- ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
+ ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw_end;
}
- r600_update_occlusion_query_state(ctx, query->type, -1);
- r600_update_prims_generated_query_state(ctx, query->type, -1);
+ r600_update_occlusion_query_state(ctx, query->b.type, -1);
+ r600_update_prims_generated_query_state(ctx, query->b.type, -1);
}
static void r600_emit_query_predication(struct r600_common_context *ctx,
struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
- struct r600_query *query = (struct r600_query*)ctx->render_cond;
+ struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond;
struct r600_query_buffer *qbuf;
uint32_t op;
bool flag_wait;
@@ -318,7 +580,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
- switch (query->type) {
+ switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
op = PRED_OP(PREDICATION_OP_ZPASS);
@@ -364,94 +626,21 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query *query;
- bool skip_allocation = false;
- query = CALLOC_STRUCT(r600_query);
- if (query == NULL)
- return NULL;
-
- query->type = query_type;
-
- switch (query_type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- query->result_size = 16 * rctx->max_db;
- query->num_cs_dw = 6;
- break;
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- query->result_size = 16;
- query->num_cs_dw = 8;
- break;
- case PIPE_QUERY_TIMESTAMP:
- query->result_size = 8;
- query->num_cs_dw = 8;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
- query->result_size = 32;
- query->num_cs_dw = 6;
- query->stream = index;
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- /* 11 values on EG, 8 on R600. */
- query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
- query->num_cs_dw = 6;
- break;
- /* Non-GPU queries and queries not requiring a buffer. */
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- case PIPE_QUERY_GPU_FINISHED:
- case R600_QUERY_DRAW_CALLS:
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_NUM_CS_FLUSHES:
- case R600_QUERY_NUM_BYTES_MOVED:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- case R600_QUERY_GPU_LOAD:
- case R600_QUERY_NUM_COMPILATIONS:
- case R600_QUERY_NUM_SHADERS_CREATED:
- skip_allocation = true;
- break;
- default:
- assert(0);
- FREE(query);
- return NULL;
- }
+ if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
+ query_type == PIPE_QUERY_GPU_FINISHED ||
+ query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
+ return r600_query_sw_create(ctx, query_type);
- if (!skip_allocation) {
- query->buffer.buf = r600_new_query_buffer(rctx, query_type);
- if (!query->buffer.buf) {
- FREE(query);
- return NULL;
- }
- }
- return (struct pipe_query*)query;
+ return r600_query_hw_create(rctx, query_type, index);
}
static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
{
- struct r600_query *rquery = (struct r600_query*)query;
- struct r600_query_buffer *prev = rquery->buffer.previous;
-
- /* Release all query buffers. */
- while (prev) {
- struct r600_query_buffer *qbuf = prev;
- prev = prev->previous;
- pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
- FREE(qbuf);
- }
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
- pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
- FREE(query);
+ rquery->ops->destroy(rctx, rquery);
}
static boolean r600_begin_query(struct pipe_context *ctx,
@@ -459,48 +648,14 @@ static boolean r600_begin_query(struct pipe_context *ctx,
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
- struct r600_query_buffer *prev = rquery->buffer.previous;
- if (!r600_query_needs_begin(rquery->type)) {
- assert(0);
- return false;
- }
+ return rquery->ops->begin(rctx, rquery);
+}
- /* Non-GPU queries. */
- switch (rquery->type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- return true;
- case R600_QUERY_DRAW_CALLS:
- rquery->begin_result = rctx->num_draw_calls;
- return true;
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- rquery->begin_result = 0;
- return true;
- case R600_QUERY_BUFFER_WAIT_TIME:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
- return true;
- case R600_QUERY_NUM_CS_FLUSHES:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
- return true;
- case R600_QUERY_NUM_BYTES_MOVED:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
- return true;
- case R600_QUERY_GPU_LOAD:
- rquery->begin_result = r600_gpu_load_begin(rctx->screen);
- return true;
- case R600_QUERY_NUM_COMPILATIONS:
- rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations);
- return true;
- case R600_QUERY_NUM_SHADERS_CREATED:
- rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
- return true;
- }
+static void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
+ struct r600_query_hw *query)
+{
+ struct r600_query_buffer *prev = query->buffer.previous;
/* Discard the old query buffers. */
while (prev) {
@@ -510,22 +665,39 @@ static boolean r600_begin_query(struct pipe_context *ctx,
FREE(qbuf);
}
- /* Obtain a new buffer if the current one can't be mapped without a stall. */
- if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
- !rctx->ws->buffer_wait(rquery->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
- pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
- rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
+ if (query->flags & R600_QUERY_HW_FLAG_PREDICATE) {
+ /* Obtain a new buffer if the current one can't be mapped without a stall. */
+ if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
+ !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
+ pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
+ query->buffer.buf = r600_new_query_buffer(rctx, query);
+ } else {
+ query->ops->prepare_buffer(rctx, query, query->buffer.buf);
+ }
}
- rquery->buffer.results_end = 0;
- rquery->buffer.previous = NULL;
+ query->buffer.results_end = 0;
+ query->buffer.previous = NULL;
+}
- r600_emit_query_begin(rctx, rquery);
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
- if (r600_is_timer_query(rquery->type))
- LIST_ADDTAIL(&rquery->list, &rctx->active_timer_queries);
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
+ assert(0);
+ return false;
+ }
+
+ r600_query_hw_reset_buffers(rctx, query);
+
+ r600_query_hw_emit_start(rctx, query);
+
+ if (query->flags & R600_QUERY_HW_FLAG_TIMER)
+ LIST_ADDTAIL(&query->list, &rctx->active_timer_queries);
else
- LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries);
+ LIST_ADDTAIL(&query->list, &rctx->active_nontimer_queries);
return true;
}
@@ -534,64 +706,24 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
- /* Non-GPU queries. */
- switch (rquery->type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- return;
- case PIPE_QUERY_GPU_FINISHED:
- ctx->flush(ctx, &rquery->fence, 0);
- return;
- case R600_QUERY_DRAW_CALLS:
- rquery->end_result = rctx->num_draw_calls;
- return;
- case R600_QUERY_REQUESTED_VRAM:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY);
- return;
- case R600_QUERY_REQUESTED_GTT:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY);
- return;
- case R600_QUERY_BUFFER_WAIT_TIME:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
- return;
- case R600_QUERY_NUM_CS_FLUSHES:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
- return;
- case R600_QUERY_NUM_BYTES_MOVED:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
- return;
- case R600_QUERY_VRAM_USAGE:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE);
- return;
- case R600_QUERY_GTT_USAGE:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE);
- return;
- case R600_QUERY_GPU_TEMPERATURE:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000;
- return;
- case R600_QUERY_CURRENT_GPU_SCLK:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000;
- return;
- case R600_QUERY_CURRENT_GPU_MCLK:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000;
- return;
- case R600_QUERY_GPU_LOAD:
- rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result);
- return;
- case R600_QUERY_NUM_COMPILATIONS:
- rquery->end_result = p_atomic_read(&rctx->screen->num_compilations);
- return;
- case R600_QUERY_NUM_SHADERS_CREATED:
- rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
- return;
- }
+ rquery->ops->end(rctx, rquery);
+}
- r600_emit_query_end(rctx, rquery);
+void r600_query_hw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START)
+ r600_query_hw_reset_buffers(rctx, query);
- if (r600_query_needs_begin(rquery->type))
- LIST_DELINIT(&rquery->list);
+ r600_query_hw_emit_stop(rctx, query);
+
+ if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
+ LIST_DELINIT(&query->list);
}
-static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
+static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index,
bool test_status_bit)
{
uint32_t *current_result = (uint32_t*)map;
@@ -609,80 +741,36 @@ static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned
return 0;
}
-static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
- struct r600_query *query,
- struct r600_query_buffer *qbuf,
- boolean wait,
- union pipe_query_result *result)
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ void *buffer,
+ union pipe_query_result *result)
{
- struct pipe_screen *screen = ctx->b.screen;
- unsigned results_base = 0;
- char *map;
-
- /* Non-GPU queries. */
- switch (query->type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* Convert from cycles per millisecond to cycles per second (Hz). */
- result->timestamp_disjoint.frequency =
- (uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000;
- result->timestamp_disjoint.disjoint = FALSE;
- return TRUE;
- case PIPE_QUERY_GPU_FINISHED:
- result->b = screen->fence_finish(screen, query->fence,
- wait ? PIPE_TIMEOUT_INFINITE : 0);
- return result->b;
- case R600_QUERY_DRAW_CALLS:
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_NUM_CS_FLUSHES:
- case R600_QUERY_NUM_BYTES_MOVED:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- case R600_QUERY_NUM_COMPILATIONS:
- case R600_QUERY_NUM_SHADERS_CREATED:
- result->u64 = query->end_result - query->begin_result;
- return TRUE;
- case R600_QUERY_GPU_LOAD:
- result->u64 = query->end_result;
- return TRUE;
- }
-
- map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
- PIPE_TRANSFER_READ |
- (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
- if (!map)
- return FALSE;
-
- /* count all results across all data blocks */
- switch (query->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- while (results_base != qbuf->results_end) {
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER: {
+ unsigned results_base = 0;
+ while (results_base != query->result_size) {
result->u64 +=
- r600_query_read_result(map + results_base, 0, 2, true);
+ r600_query_read_result(buffer + results_base, 0, 2, true);
results_base += 16;
}
break;
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- while (results_base != qbuf->results_end) {
+ }
+ case PIPE_QUERY_OCCLUSION_PREDICATE: {
+ unsigned results_base = 0;
+ while (results_base != query->result_size) {
result->b = result->b ||
- r600_query_read_result(map + results_base, 0, 2, true) != 0;
+ r600_query_read_result(buffer + results_base, 0, 2, true) != 0;
results_base += 16;
}
break;
+ }
case PIPE_QUERY_TIME_ELAPSED:
- while (results_base != qbuf->results_end) {
- result->u64 +=
- r600_query_read_result(map + results_base, 0, 2, false);
- results_base += query->result_size;
- }
+ result->u64 += r600_query_read_result(buffer, 0, 2, false);
break;
case PIPE_QUERY_TIMESTAMP:
{
- uint32_t *current_result = (uint32_t*)map;
+ uint32_t *current_result = (uint32_t*)buffer;
result->u64 = (uint64_t)current_result[0] |
(uint64_t)current_result[1] << 32;
break;
@@ -694,84 +782,64 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
* u64 PrimitiveStorageNeeded;
* }
* We only need NumPrimitivesWritten here. */
- while (results_base != qbuf->results_end) {
- result->u64 +=
- r600_query_read_result(map + results_base, 2, 6, true);
- results_base += query->result_size;
- }
+ result->u64 += r600_query_read_result(buffer, 2, 6, true);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
/* Here we read PrimitiveStorageNeeded. */
- while (results_base != qbuf->results_end) {
- result->u64 +=
- r600_query_read_result(map + results_base, 0, 4, true);
- results_base += query->result_size;
- }
+ result->u64 += r600_query_read_result(buffer, 0, 4, true);
break;
case PIPE_QUERY_SO_STATISTICS:
- while (results_base != qbuf->results_end) {
- result->so_statistics.num_primitives_written +=
- r600_query_read_result(map + results_base, 2, 6, true);
- result->so_statistics.primitives_storage_needed +=
- r600_query_read_result(map + results_base, 0, 4, true);
- results_base += query->result_size;
- }
+ result->so_statistics.num_primitives_written +=
+ r600_query_read_result(buffer, 2, 6, true);
+ result->so_statistics.primitives_storage_needed +=
+ r600_query_read_result(buffer, 0, 4, true);
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- while (results_base != qbuf->results_end) {
- result->b = result->b ||
- r600_query_read_result(map + results_base, 2, 6, true) !=
- r600_query_read_result(map + results_base, 0, 4, true);
- results_base += query->result_size;
- }
+ result->b = result->b ||
+ r600_query_read_result(buffer, 2, 6, true) !=
+ r600_query_read_result(buffer, 0, 4, true);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
if (ctx->chip_class >= EVERGREEN) {
- while (results_base != qbuf->results_end) {
- result->pipeline_statistics.ps_invocations +=
- r600_query_read_result(map + results_base, 0, 22, false);
- result->pipeline_statistics.c_primitives +=
- r600_query_read_result(map + results_base, 2, 24, false);
- result->pipeline_statistics.c_invocations +=
- r600_query_read_result(map + results_base, 4, 26, false);
- result->pipeline_statistics.vs_invocations +=
- r600_query_read_result(map + results_base, 6, 28, false);
- result->pipeline_statistics.gs_invocations +=
- r600_query_read_result(map + results_base, 8, 30, false);
- result->pipeline_statistics.gs_primitives +=
- r600_query_read_result(map + results_base, 10, 32, false);
- result->pipeline_statistics.ia_primitives +=
- r600_query_read_result(map + results_base, 12, 34, false);
- result->pipeline_statistics.ia_vertices +=
- r600_query_read_result(map + results_base, 14, 36, false);
- result->pipeline_statistics.hs_invocations +=
- r600_query_read_result(map + results_base, 16, 38, false);
- result->pipeline_statistics.ds_invocations +=
- r600_query_read_result(map + results_base, 18, 40, false);
- result->pipeline_statistics.cs_invocations +=
- r600_query_read_result(map + results_base, 20, 42, false);
- results_base += query->result_size;
- }
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(buffer, 0, 22, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(buffer, 2, 24, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(buffer, 4, 26, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(buffer, 6, 28, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(buffer, 8, 30, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(buffer, 10, 32, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(buffer, 12, 34, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(buffer, 14, 36, false);
+ result->pipeline_statistics.hs_invocations +=
+ r600_query_read_result(buffer, 16, 38, false);
+ result->pipeline_statistics.ds_invocations +=
+ r600_query_read_result(buffer, 18, 40, false);
+ result->pipeline_statistics.cs_invocations +=
+ r600_query_read_result(buffer, 20, 42, false);
} else {
- while (results_base != qbuf->results_end) {
- result->pipeline_statistics.ps_invocations +=
- r600_query_read_result(map + results_base, 0, 16, false);
- result->pipeline_statistics.c_primitives +=
- r600_query_read_result(map + results_base, 2, 18, false);
- result->pipeline_statistics.c_invocations +=
- r600_query_read_result(map + results_base, 4, 20, false);
- result->pipeline_statistics.vs_invocations +=
- r600_query_read_result(map + results_base, 6, 22, false);
- result->pipeline_statistics.gs_invocations +=
- r600_query_read_result(map + results_base, 8, 24, false);
- result->pipeline_statistics.gs_primitives +=
- r600_query_read_result(map + results_base, 10, 26, false);
- result->pipeline_statistics.ia_primitives +=
- r600_query_read_result(map + results_base, 12, 28, false);
- result->pipeline_statistics.ia_vertices +=
- r600_query_read_result(map + results_base, 14, 30, false);
- results_base += query->result_size;
- }
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(buffer, 0, 16, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(buffer, 2, 18, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(buffer, 4, 20, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(buffer, 6, 22, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(buffer, 8, 24, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(buffer, 10, 26, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(buffer, 12, 28, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(buffer, 14, 30, false);
}
#if 0 /* for testing */
printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
@@ -793,23 +861,47 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
default:
assert(0);
}
-
- return TRUE;
}
static boolean r600_get_query_result(struct pipe_context *ctx,
- struct pipe_query *query,
- boolean wait, union pipe_query_result *result)
+ struct pipe_query *query, boolean wait,
+ union pipe_query_result *result)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
+
+ return rquery->ops->get_result(rctx, rquery, wait, result);
+}
+
+static void r600_query_hw_clear_result(struct r600_query_hw *query,
+ union pipe_query_result *result)
+{
+ util_query_clear_result(result, query->b.type);
+}
+
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ boolean wait, union pipe_query_result *result)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *qbuf;
- util_query_clear_result(result, rquery->type);
+ query->ops->clear_result(query, result);
- for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) {
- if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) {
+ for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+ unsigned results_base = 0;
+ void *map;
+
+ map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf,
+ PIPE_TRANSFER_READ |
+ (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
+ if (!map)
return FALSE;
+
+ while (results_base != qbuf->results_end) {
+ query->ops->add_result(rctx, query, map + results_base,
+ result);
+ results_base += query->result_size;
}
}
@@ -827,7 +919,7 @@ static void r600_render_condition(struct pipe_context *ctx,
uint mode)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query *rquery = (struct r600_query*)query;
+ struct r600_query_hw *rquery = (struct r600_query_hw *)query;
struct r600_query_buffer *qbuf;
struct r600_atom *atom = &rctx->render_cond_atom;
@@ -837,8 +929,10 @@ static void r600_render_condition(struct pipe_context *ctx,
/* Compute the size of SET_PREDICATION packets. */
atom->num_dw = 0;
- for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
- atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+ if (query) {
+ for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
+ atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+ }
rctx->set_atom_dirty(rctx, atom, query != NULL);
}
@@ -847,10 +941,10 @@ static void r600_suspend_queries(struct r600_common_context *ctx,
struct list_head *query_list,
unsigned *num_cs_dw_queries_suspend)
{
- struct r600_query *query;
+ struct r600_query_hw *query;
LIST_FOR_EACH_ENTRY(query, query_list, list) {
- r600_emit_query_end(ctx, query);
+ r600_query_hw_emit_stop(ctx, query);
}
assert(*num_cs_dw_queries_suspend == 0);
}
@@ -870,19 +964,19 @@ void r600_suspend_timer_queries(struct r600_common_context *ctx)
static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
struct list_head *query_list)
{
- struct r600_query *query;
+ struct r600_query_hw *query;
unsigned num_dw = 0;
LIST_FOR_EACH_ENTRY(query, query_list, list) {
/* begin + end */
- num_dw += query->num_cs_dw * 2;
+ num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
/* Workaround for the fact that
* num_cs_dw_nontimer_queries_suspend is incremented for every
* resumed query, which raises the bar in need_cs_space for
* queries about to be resumed.
*/
- num_dw += query->num_cs_dw;
+ num_dw += query->num_cs_dw_end;
}
/* primitives generated query */
num_dw += ctx->streamout.enable_atom.num_dw;
@@ -896,7 +990,7 @@ static void r600_resume_queries(struct r600_common_context *ctx,
struct list_head *query_list,
unsigned *num_cs_dw_queries_suspend)
{
- struct r600_query *query;
+ struct r600_query_hw *query;
unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list);
assert(*num_cs_dw_queries_suspend == 0);
@@ -905,7 +999,7 @@ static void r600_resume_queries(struct r600_common_context *ctx,
ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE);
LIST_FOR_EACH_ENTRY(query, query_list, list) {
- r600_emit_query_begin(ctx, query);
+ r600_query_hw_emit_start(ctx, query);
}
}
@@ -1002,6 +1096,76 @@ err:
return;
}
+#define X(name_, query_type_, type_, result_type_) \
+ { \
+ .name = name_, \
+ .query_type = R600_QUERY_##query_type_, \
+ .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
+ .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
+ .group_id = ~(unsigned)0 \
+ }
+
+static struct pipe_driver_query_info r600_driver_query_list[] = {
+ X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
+ X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
+ X("draw-calls", DRAW_CALLS, UINT64, CUMULATIVE),
+ X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
+ X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
+ X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
+ X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, CUMULATIVE),
+ X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
+ X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
+ X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
+ X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
+ X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
+ X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
+ X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE),
+};
+
+#undef X
+
+static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
+{
+ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
+ return Elements(r600_driver_query_list);
+ else if (rscreen->info.drm_major == 3)
+ return Elements(r600_driver_query_list) - 3;
+ else
+ return Elements(r600_driver_query_list) - 4;
+}
+
+static int r600_get_driver_query_info(struct pipe_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ unsigned num_queries = r600_get_num_queries(rscreen);
+
+ if (!info)
+ return num_queries;
+
+ if (index >= num_queries)
+ return 0;
+
+ *info = r600_driver_query_list[index];
+
+ switch (info->query_type) {
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_VRAM_USAGE:
+ info->max_value.u64 = rscreen->info.vram_size;
+ break;
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_GTT_USAGE:
+ info->max_value.u64 = rscreen->info.gart_size;
+ break;
+ case R600_QUERY_GPU_TEMPERATURE:
+ info->max_value.u64 = 125;
+ break;
+ }
+
+ return 1;
+}
+
void r600_query_init(struct r600_common_context *rctx)
{
rctx->b.create_query = r600_create_query;
@@ -1017,3 +1181,8 @@ void r600_query_init(struct r600_common_context *rctx)
LIST_INITHEAD(&rctx->active_nontimer_queries);
LIST_INITHEAD(&rctx->active_timer_queries);
}
+
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
+{
+ rscreen->b.get_driver_query_info = r600_get_driver_query_info;
+}
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
new file mode 100644
index 00000000000..0ea5707ca45
--- /dev/null
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Nicolai Hähnle <[email protected]>
+ *
+ */
+
+#ifndef R600_QUERY_H
+#define R600_QUERY_H
+
+#include "pipe/p_defines.h"
+#include "util/list.h"
+
+struct r600_common_context;
+struct r600_query;
+struct r600_query_hw;
+struct r600_resource;
+
+#define R600_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define R600_QUERY_REQUESTED_VRAM (PIPE_QUERY_DRIVER_SPECIFIC + 1)
+#define R600_QUERY_REQUESTED_GTT (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define R600_QUERY_BUFFER_WAIT_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 3)
+#define R600_QUERY_NUM_CS_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 4)
+#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6)
+#define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7)
+#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8)
+#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
+#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
+#define R600_QUERY_FIRST_PERFCOUNTER (PIPE_QUERY_DRIVER_SPECIFIC + 100)
+
+struct r600_query_ops {
+ void (*destroy)(struct r600_common_context *, struct r600_query *);
+ boolean (*begin)(struct r600_common_context *, struct r600_query *);
+ void (*end)(struct r600_common_context *, struct r600_query *);
+ boolean (*get_result)(struct r600_common_context *,
+ struct r600_query *, boolean wait,
+ union pipe_query_result *result);
+};
+
+struct r600_query {
+ struct r600_query_ops *ops;
+
+ /* The type of query */
+ unsigned type;
+};
+
+enum {
+ R600_QUERY_HW_FLAG_NO_START = (1 << 0),
+ R600_QUERY_HW_FLAG_TIMER = (1 << 1),
+ R600_QUERY_HW_FLAG_PREDICATE = (1 << 2),
+};
+
+struct r600_query_hw_ops {
+ void (*prepare_buffer)(struct r600_common_context *,
+ struct r600_query_hw *,
+ struct r600_resource *);
+ void (*emit_start)(struct r600_common_context *,
+ struct r600_query_hw *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*emit_stop)(struct r600_common_context *,
+ struct r600_query_hw *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
+ void (*add_result)(struct r600_common_context *ctx,
+ struct r600_query_hw *, void *buffer,
+ union pipe_query_result *result);
+};
+
+struct r600_query_buffer {
+ /* The buffer where query results are stored. */
+ struct r600_resource *buf;
+ /* Offset of the next free result after current query data */
+ unsigned results_end;
+ /* If a query buffer is full, a new buffer is created and the old one
+ * is put in here. When we calculate the result, we sum up the samples
+ * from all buffers. */
+ struct r600_query_buffer *previous;
+};
+
+struct r600_query_hw {
+ struct r600_query b;
+ struct r600_query_hw_ops *ops;
+ unsigned flags;
+
+ /* The query buffer and how many results are in it. */
+ struct r600_query_buffer buffer;
+ /* Size of the result in memory for both begin_query and end_query,
+ * this can be one or two numbers, or it could even be a size of a structure. */
+ unsigned result_size;
+ /* The number of dwords for begin_query or end_query. */
+ unsigned num_cs_dw_begin;
+ unsigned num_cs_dw_end;
+ /* Linked list of queries */
+ struct list_head list;
+ /* For transform feedback: which stream the query is for */
+ unsigned stream;
+};
+
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+ struct r600_query_hw *query);
+void r600_query_hw_destroy(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+void r600_query_hw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ boolean wait,
+ union pipe_query_result *result);
+
+#endif /* R600_QUERY_H */
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index 0dac6fbbdce..8a60441c056 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -49,6 +49,7 @@
#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
+#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
/**
* flush commands to the hardware
@@ -405,7 +406,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
enc->use_vm = true;
if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
enc->use_vui = true;
- if (rscreen->info.family >= CHIP_TONGA)
+ if (rscreen->info.family >= CHIP_TONGA &&
+ rscreen->info.family != CHIP_STONEY)
enc->dual_pipe = true;
/* TODO enable B frame with dual instance */
if ((rscreen->info.family >= CHIP_TONGA) &&
@@ -478,6 +480,10 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
radeon_vce_50_init(enc);
break;
+ case FW_52_0_3:
+ radeon_vce_52_init(enc);
+ break;
+
default:
goto error;
}
@@ -500,11 +506,17 @@ error:
*/
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
{
- return rscreen->info.vce_fw_version == FW_40_2_2 ||
- rscreen->info.vce_fw_version == FW_50_0_1 ||
- rscreen->info.vce_fw_version == FW_50_1_2 ||
- rscreen->info.vce_fw_version == FW_50_10_2 ||
- rscreen->info.vce_fw_version == FW_50_17_3;
+ switch (rscreen->info.vce_fw_version) {
+ case FW_40_2_2:
+ case FW_50_0_1:
+ case FW_50_1_2:
+ case FW_50_10_2:
+ case FW_50_17_3:
+ case FW_52_0_3:
+ return true;
+ default:
+ return false;
+ }
}
/**
diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h
index 624bda479f8..25e2133521f 100644
--- a/src/gallium/drivers/radeon/radeon_vce.h
+++ b/src/gallium/drivers/radeon/radeon_vce.h
@@ -140,4 +140,7 @@ void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
/* init vce fw 50 specific callbacks */
void radeon_vce_50_init(struct rvce_encoder *enc);
+/* init vce fw 52 specific callbacks */
+void radeon_vce_52_init(struct rvce_encoder *enc);
+
#endif
diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c b/src/gallium/drivers/radeon/radeon_vce_52.c
new file mode 100644
index 00000000000..fbae1f97f41
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_vce_52.c
@@ -0,0 +1,242 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+
+static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
+
+static void create(struct rvce_encoder *enc)
+{
+ enc->task_info(enc, 0x00000000, 0, 0, 0);
+
+ RVCE_BEGIN(0x01000001); // create cmd
+ RVCE_CS(0x00000000); // encUseCircularBuffer
+ RVCE_CS(profiles[enc->base.profile -
+ PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile
+ RVCE_CS(enc->base.level); // encLevel
+ RVCE_CS(0x00000000); // encPicStructRestriction
+ RVCE_CS(enc->base.width); // encImageWidth
+ RVCE_CS(enc->base.height); // encImageHeight
+ RVCE_CS(enc->luma->level[0].pitch_bytes); // encRefPicLumaPitch
+ RVCE_CS(enc->chroma->level[0].pitch_bytes); // encRefPicChromaPitch
+ RVCE_CS(align(enc->luma->npix_y, 16) / 8); // encRefYHeightInQw
+ RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO
+
+ RVCE_CS(0x00000000); // encPreEncodeContextBufferOffset
+ RVCE_CS(0x00000000); // encPreEncodeInputLumaBufferOffset
+ RVCE_CS(0x00000000); // encPreEncodeInputChromaBufferOffs
+ RVCE_CS(0x00000000); // encPreEncodeMode|ChromaFlag|VBAQMode|SceneChangeSensitivity
+ RVCE_END();
+}
+
+static void encode(struct rvce_encoder *enc)
+{
+ signed luma_offset, chroma_offset, bs_offset;
+ unsigned dep, bs_idx = enc->bs_idx++;
+ int i;
+
+ if (enc->dual_inst) {
+ if (bs_idx == 0)
+ dep = 1;
+ else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
+ dep = 0;
+ else
+ dep = 2;
+ } else
+ dep = 0;
+
+ enc->task_info(enc, 0x00000003, dep, 0, bs_idx);
+
+ RVCE_BEGIN(0x05000001); // context buffer
+ RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo
+ RVCE_END();
+
+ bs_offset = -(signed)(bs_idx * enc->bs_size);
+
+ RVCE_BEGIN(0x05000004); // video bitstream buffer
+ RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo
+ RVCE_CS(enc->bs_size); // videoBitstreamRingSize
+ RVCE_END();
+
+ if (enc->dual_pipe) {
+ unsigned aux_offset = enc->cpb.res->buf->size -
+ RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
+ RVCE_BEGIN(0x05000002); // auxiliary buffer
+ for (i = 0; i < 8; ++i) {
+ RVCE_CS(aux_offset);
+ aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE;
+ }
+ for (i = 0; i < 8; ++i)
+ RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE);
+ RVCE_END();
+ }
+
+ RVCE_BEGIN(0x03000001); // encode
+ RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders
+ RVCE_CS(0x00000000); // pictureStructure
+ RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize
+ RVCE_CS(0x00000000); // forceRefreshMap
+ RVCE_CS(0x00000000); // insertAUD
+ RVCE_CS(0x00000000); // endOfSequence
+ RVCE_CS(0x00000000); // endOfStream
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
+ RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
+ RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
+ RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+ if (enc->dual_pipe)
+ RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+ else
+ RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+ RVCE_CS(0x00000000); // encInputPicTileConfig
+ RVCE_CS(enc->pic.picture_type); // encPicType
+ RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag
+ RVCE_CS(0x00000000); // encIdrPicId
+ RVCE_CS(0x00000000); // encMGSKeyPic
+ RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag
+ RVCE_CS(0x00000000); // encTemporalLayerIndex
+ RVCE_CS(0x00000000); // num_ref_idx_active_override_flag
+ RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1
+ RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1
+
+ i = enc->pic.frame_num - enc->pic.ref_idx_l0;
+ if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) {
+ RVCE_CS(0x00000001); // encRefListModificationOp
+ RVCE_CS(i - 1); // encRefListModificationNum
+ } else {
+ RVCE_CS(0x00000000); // encRefListModificationOp
+ RVCE_CS(0x00000000); // encRefListModificationNum
+ }
+
+ for (i = 0; i < 3; ++i) {
+ RVCE_CS(0x00000000); // encRefListModificationOp
+ RVCE_CS(0x00000000); // encRefListModificationNum
+ }
+ for (i = 0; i < 4; ++i) {
+ RVCE_CS(0x00000000); // encDecodedPictureMarkingOp
+ RVCE_CS(0x00000000); // encDecodedPictureMarkingNum
+ RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx
+ RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp
+ RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum
+ }
+
+ // encReferencePictureL0[0]
+ RVCE_CS(0x00000000); // pictureStructure
+ if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
+ enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+ struct rvce_cpb_slot *l0 = l0_slot(enc);
+ rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
+ RVCE_CS(l0->picture_type); // encPicType
+ RVCE_CS(l0->frame_num); // frameNumber
+ RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
+ RVCE_CS(luma_offset); // lumaOffset
+ RVCE_CS(chroma_offset); // chromaOffset
+ } else {
+ RVCE_CS(0x00000000); // encPicType
+ RVCE_CS(0x00000000); // frameNumber
+ RVCE_CS(0x00000000); // pictureOrderCount
+ RVCE_CS(0xffffffff); // lumaOffset
+ RVCE_CS(0xffffffff); // chromaOffset
+ }
+
+ // encReferencePictureL0[1]
+ RVCE_CS(0x00000000); // pictureStructure
+ RVCE_CS(0x00000000); // encPicType
+ RVCE_CS(0x00000000); // frameNumber
+ RVCE_CS(0x00000000); // pictureOrderCount
+ RVCE_CS(0xffffffff); // lumaOffset
+ RVCE_CS(0xffffffff); // chromaOffset
+
+ // encReferencePictureL1[0]
+ RVCE_CS(0x00000000); // pictureStructure
+ if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+ struct rvce_cpb_slot *l1 = l1_slot(enc);
+ rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
+ RVCE_CS(l1->picture_type); // encPicType
+ RVCE_CS(l1->frame_num); // frameNumber
+ RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
+ RVCE_CS(luma_offset); // lumaOffset
+ RVCE_CS(chroma_offset); // chromaOffset
+ } else {
+ RVCE_CS(0x00000000); // encPicType
+ RVCE_CS(0x00000000); // frameNumber
+ RVCE_CS(0x00000000); // pictureOrderCount
+ RVCE_CS(0xffffffff); // lumaOffset
+ RVCE_CS(0xffffffff); // chromaOffset
+ }
+
+ rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+ RVCE_CS(luma_offset); // encReconstructedLumaOffset
+ RVCE_CS(chroma_offset); // encReconstructedChromaOffset
+ RVCE_CS(0x00000000); // encColocBufferOffset
+ RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset
+ RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset
+ RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset
+ RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset
+ RVCE_CS(0x00000000); // pictureCount
+ RVCE_CS(enc->pic.frame_num); // frameNumber
+ RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount
+ RVCE_CS(0x00000000); // numIPicRemainInRCGOP
+ RVCE_CS(0x00000000); // numPPicRemainInRCGOP
+ RVCE_CS(0x00000000); // numBPicRemainInRCGOP
+ RVCE_CS(0x00000000); // numIRPicRemainInRCGOP
+ RVCE_CS(0x00000000); // enableIntraRefresh
+
+ RVCE_CS(0x00000000); // aq_variance_en
+ RVCE_CS(0x00000000); // aq_block_size
+ RVCE_CS(0x00000000); // aq_mb_variance_sel
+ RVCE_CS(0x00000000); // aq_frame_variance_sel
+ RVCE_CS(0x00000000); // aq_param_a
+ RVCE_CS(0x00000000); // aq_param_b
+ RVCE_CS(0x00000000); // aq_param_c
+ RVCE_CS(0x00000000); // aq_param_d
+ RVCE_CS(0x00000000); // aq_param_e
+
+ RVCE_CS(0x00000000); // contextInSFB
+ RVCE_END();
+}
+
+void radeon_vce_52_init(struct rvce_encoder *enc)
+{
+ radeon_vce_50_init(enc);
+
+ enc->create = create;
+ enc->encode = encode;
+}
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 93847d5ec2f..209b940aa11 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3256,25 +3256,34 @@ si_write_harvested_raster_configs(struct si_context *sctx,
}
}
- /* GRBM_GFX_INDEX is privileged on VI */
- if (sctx->b.chip_class <= CIK)
+ /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
+ if (sctx->b.chip_class < CIK)
si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
SE_INDEX(se) | SH_BROADCAST_WRITES |
INSTANCE_BROADCAST_WRITES);
+ else
+ si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
}
- /* GRBM_GFX_INDEX is privileged on VI */
- if (sctx->b.chip_class <= CIK)
+ /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
+ if (sctx->b.chip_class < CIK)
si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
INSTANCE_BROADCAST_WRITES);
+ else
+ si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
}
static void si_init_config(struct si_context *sctx)
{
+ struct si_screen *sscreen = sctx->screen;
unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
unsigned raster_config, raster_config_1;
@@ -3345,9 +3354,14 @@ static void si_init_config(struct si_context *sctx)
raster_config_1 = 0x0000002e;
break;
case CHIP_FIJI:
- /* Fiji should be same as Hawaii, but that causes corruption in some cases */
- raster_config = 0x16000012; /* 0x3a00161a */
- raster_config_1 = 0x0000002a; /* 0x0000002e */
+ if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) {
+ /* old kernels with old tiling config */
+ raster_config = 0x16000012;
+ raster_config_1 = 0x0000002a;
+ } else {
+ raster_config = 0x3a00161a;
+ raster_config_1 = 0x0000002e;
+ }
break;
case CHIP_TONGA:
raster_config = 0x16000012;
diff --git a/src/gallium/drivers/softpipe/Automake.inc b/src/gallium/drivers/softpipe/Automake.inc
index 6455f3caa3d..bd3c2eead16 100644
--- a/src/gallium/drivers/softpipe/Automake.inc
+++ b/src/gallium/drivers/softpipe/Automake.inc
@@ -3,13 +3,10 @@ if HAVE_GALLIUM_SOFTPIPE
TARGET_DRIVERS += swrast
TARGET_CPPFLAGS += -DGALLIUM_SOFTPIPE
TARGET_LIB_DEPS += \
- $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la \
$(top_builddir)/src/gallium/drivers/softpipe/libsoftpipe.la
-if HAVE_DRI2
+if HAVE_DRISW_KMS
TARGET_DRIVERS += kms_swrast
-TARGET_LIB_DEPS += \
- $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la
endif
endif
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index bcce18a3502..6a4f9d8d076 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -51,14 +51,16 @@
#define SVGA_QUERY_NUM_VALIDATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 3)
#define SVGA_QUERY_MAP_BUFFER_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 4)
#define SVGA_QUERY_NUM_RESOURCES_MAPPED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define SVGA_QUERY_NUM_BYTES_UPLOADED (PIPE_QUERY_DRIVER_SPECIFIC + 6)
+
/* running total counters */
-#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 6)
-#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 7)
-#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 8)
-#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 9)
-#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 7)
+#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 8)
+#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 11)
/*SVGA_QUERY_MAX has to be last because it is size of an array*/
-#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 12)
/**
* Maximum supported number of constant buffers per shader
@@ -485,6 +487,7 @@ struct svga_context
uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */
uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */
uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */
+ uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */
} hud;
/** The currently bound stream output targets */
diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c
index 28b8064bf70..2b549dfa5bb 100644
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -53,17 +53,17 @@ static const struct vgpu10_format_entry format_conversion_table[] =
{ PIPE_FORMAT_A8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_X8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_B5G5R5A1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G5R5A1_UNORM, 0 },
- { PIPE_FORMAT_B4G4R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A4R4G4B4, 0 },
+ { PIPE_FORMAT_B4G4R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_B5G6R5_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G6R5_UNORM, 0 },
{ PIPE_FORMAT_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, 0 },
- { PIPE_FORMAT_L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_LUMINANCE8, 0 },
+ { PIPE_FORMAT_L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A8_UNORM, 0 },
{ PIPE_FORMAT_I8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_L8A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_L16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_UYVY, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_YUYV, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
- { PIPE_FORMAT_Z16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_Z_D16, 0 },
+ { PIPE_FORMAT_Z16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_D16_UNORM, 0 },
{ PIPE_FORMAT_Z32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_Z32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT, 0 },
{ PIPE_FORMAT_Z24_UNORM_S8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, 0 },
@@ -152,14 +152,14 @@ static const struct vgpu10_format_entry format_conversion_table[] =
{ PIPE_FORMAT_A8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_X8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_R8G8B8A8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_R8G8B8A8_UNORM_SRGB, 0 },
- { PIPE_FORMAT_DXT1_RGB, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
- { PIPE_FORMAT_DXT1_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
- { PIPE_FORMAT_DXT3_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT3, 0 },
- { PIPE_FORMAT_DXT5_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT5, 0 },
- { PIPE_FORMAT_DXT1_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
- { PIPE_FORMAT_DXT1_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
- { PIPE_FORMAT_DXT3_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT3, 0 },
- { PIPE_FORMAT_DXT5_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT5, 0 },
+ { PIPE_FORMAT_DXT1_RGB, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM, 0 },
+ { PIPE_FORMAT_DXT1_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM, 0 },
+ { PIPE_FORMAT_DXT3_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC2_UNORM, 0 },
+ { PIPE_FORMAT_DXT5_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC3_UNORM, 0 },
+ { PIPE_FORMAT_DXT1_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM_SRGB, 0 },
+ { PIPE_FORMAT_DXT1_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM_SRGB, 0 },
+ { PIPE_FORMAT_DXT3_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC2_UNORM_SRGB, 0 },
+ { PIPE_FORMAT_DXT5_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC3_UNORM_SRGB, 0 },
{ PIPE_FORMAT_RGTC1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_UNORM, 0 },
{ PIPE_FORMAT_RGTC1_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_SNORM, 0 },
{ PIPE_FORMAT_RGTC2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC5_UNORM, 0 },
@@ -326,6 +326,34 @@ static const struct vgpu10_format_entry format_conversion_table[] =
{ PIPE_FORMAT_ETC2_R11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_ETC2_RG11_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_ETC2_RG11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_4x4, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_5x4, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_5x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_6x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_6x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x10, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_12x10, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_12x12, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_4x4_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_5x4_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_5x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_6x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_6x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_12x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_12x12_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
};
@@ -472,7 +500,7 @@ struct format_cap {
* PIPE_FORMAT_Z24_UNORM_S8_UINT is converted to SVGA3D_D24_UNORM_S8_UINT
* for rendering but converted to SVGA3D_R24_UNORM_X8_TYPELESS for sampling.
* If we want to query if a format supports both rendering and sampling the
- * host will tell us no for both SVGA3D_D24_UNORM_S8_UINT and
+ * host will tell us no for SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D16_UNORM and
* SVGA3D_R24_UNORM_X8_TYPELESS. So we override the host query for those
* formats and report that both can do rendering and sampling.
*/
@@ -1410,27 +1438,50 @@ static const struct format_cap format_cap_table[] = {
},
{
"SVGA3D_BC1_TYPELESS",
- SVGA3D_BC1_TYPELESS, 0, 0, 0, 0, 0
+ SVGA3D_BC1_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS,
+ 4, 4, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC1_UNORM_SRGB",
- SVGA3D_BC1_UNORM_SRGB, 0, 0, 0, 0, 0
+ SVGA3D_BC1_UNORM_SRGB,
+ SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB,
+ 4, 4, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC2_TYPELESS",
- SVGA3D_BC2_TYPELESS, 0, 0, 0, 0, 0
+ SVGA3D_BC2_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC2_UNORM_SRGB",
- SVGA3D_BC2_UNORM_SRGB, 0, 0, 0, 0, 0
+ SVGA3D_BC2_UNORM_SRGB,
+ SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC3_TYPELESS",
- SVGA3D_BC3_TYPELESS, 0, 0, 0, 0, 0
+ SVGA3D_BC3_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC3_UNORM_SRGB",
- SVGA3D_BC3_UNORM_SRGB, 0, 0, 0, 0, 0
+ SVGA3D_BC3_UNORM_SRGB,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC4_TYPELESS",
@@ -1671,7 +1722,7 @@ static const struct format_cap format_cap_table[] = {
{
"SVGA3D_D16_UNORM",
SVGA3D_D16_UNORM,
- SVGA3D_DEVCAP_DXFMT_D16_UNORM,
+ 0, /*SVGA3D_DEVCAP_DXFMT_D16_UNORM*/
1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
@@ -1690,15 +1741,27 @@ static const struct format_cap format_cap_table[] = {
},
{
"SVGA3D_BC1_UNORM",
- SVGA3D_BC1_UNORM, 0, 0, 0, 0, 0
+ SVGA3D_BC1_UNORM,
+ SVGA3D_DEVCAP_DXFMT_BC1_UNORM,
+ 4, 4, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC2_UNORM",
- SVGA3D_BC2_UNORM, 0, 0, 0, 0, 0
+ SVGA3D_BC2_UNORM,
+ SVGA3D_DEVCAP_DXFMT_BC2_UNORM,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC3_UNORM",
- SVGA3D_BC3_UNORM, 0, 0, 0, 0, 0
+ SVGA3D_BC3_UNORM,
+ SVGA3D_DEVCAP_DXFMT_BC3_UNORM,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_B5G6R5_UNORM",
@@ -2053,6 +2116,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
case SVGA3D_R8G8_UINT:
case SVGA3D_R8G8_SINT:
return SVGA3D_R8G8_TYPELESS;
+ case SVGA3D_D16_UNORM:
case SVGA3D_R16_UNORM:
case SVGA3D_R16_UINT:
case SVGA3D_R16_SNORM:
@@ -2070,6 +2134,15 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
case SVGA3D_B8G8R8X8_UNORM_SRGB:
case SVGA3D_B8G8R8X8_UNORM:
return SVGA3D_B8G8R8X8_TYPELESS;
+ case SVGA3D_BC1_UNORM:
+ case SVGA3D_BC1_UNORM_SRGB:
+ return SVGA3D_BC1_TYPELESS;
+ case SVGA3D_BC2_UNORM:
+ case SVGA3D_BC2_UNORM_SRGB:
+ return SVGA3D_BC2_TYPELESS;
+ case SVGA3D_BC3_UNORM:
+ case SVGA3D_BC3_UNORM_SRGB:
+ return SVGA3D_BC3_TYPELESS;
case SVGA3D_BC4_UNORM:
case SVGA3D_BC4_SNORM:
return SVGA3D_BC4_TYPELESS;
@@ -2079,18 +2152,10 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
/* Special cases (no corresponding _TYPELESS formats) */
case SVGA3D_A8_UNORM:
- case SVGA3D_A4R4G4B4:
case SVGA3D_B5G5R5A1_UNORM:
case SVGA3D_B5G6R5_UNORM:
- case SVGA3D_DXT1:
- case SVGA3D_DXT2:
- case SVGA3D_DXT3:
- case SVGA3D_DXT4:
- case SVGA3D_DXT5:
case SVGA3D_R11G11B10_FLOAT:
case SVGA3D_R9G9B9E5_SHAREDEXP:
- case SVGA3D_Z_D32:
- case SVGA3D_Z_D16:
return format;
default:
debug_printf("Unexpected format %s in %s\n",
@@ -2098,3 +2163,26 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
return format;
}
}
+
+
+/**
+ * Given a surface format, return the corresponding format to use for
+ * a texture sampler. In most cases, it's the format unchanged, but there
+ * are some special cases.
+ */
+SVGA3dSurfaceFormat
+svga_sampler_format(SVGA3dSurfaceFormat format)
+{
+ switch (format) {
+ case SVGA3D_D16_UNORM:
+ return SVGA3D_R16_UNORM;
+ case SVGA3D_D24_UNORM_S8_UINT:
+ return SVGA3D_R24_UNORM_X8_TYPELESS;
+ case SVGA3D_D32_FLOAT:
+ return SVGA3D_R32_FLOAT;
+ case SVGA3D_D32_FLOAT_S8X24_UINT:
+ return SVGA3D_R32_FLOAT_X8X24_TYPELESS;
+ default:
+ return format;
+ }
+}
diff --git a/src/gallium/drivers/svga/svga_format.h b/src/gallium/drivers/svga/svga_format.h
index 0af218cb01a..9f9a530d473 100644
--- a/src/gallium/drivers/svga/svga_format.h
+++ b/src/gallium/drivers/svga/svga_format.h
@@ -93,4 +93,8 @@ SVGA3dSurfaceFormat
svga_typeless_format(SVGA3dSurfaceFormat format);
+SVGA3dSurfaceFormat
+svga_sampler_format(SVGA3dSurfaceFormat format);
+
+
#endif /* SVGA_FORMAT_H_ */
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 8b9818334ca..5416a009dcb 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -731,6 +731,7 @@ svga_create_query(struct pipe_context *pipe,
case SVGA_QUERY_MAP_BUFFER_TIME:
case SVGA_QUERY_NUM_SURFACE_VIEWS:
case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+ case SVGA_QUERY_NUM_BYTES_UPLOADED:
break;
default:
assert(!"unexpected query type in svga_create_query()");
@@ -797,6 +798,7 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_MAP_BUFFER_TIME:
case SVGA_QUERY_NUM_SURFACE_VIEWS:
case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+ case SVGA_QUERY_NUM_BYTES_UPLOADED:
/* nothing */
break;
default:
@@ -876,6 +878,9 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_NUM_RESOURCES_MAPPED:
sq->begin_count = svga->hud.num_resources_mapped;
break;
+ case SVGA_QUERY_NUM_BYTES_UPLOADED:
+ sq->begin_count = svga->hud.num_bytes_uploaded;
+ break;
case SVGA_QUERY_MEMORY_USED:
case SVGA_QUERY_NUM_SHADERS:
case SVGA_QUERY_NUM_RESOURCES:
@@ -966,6 +971,9 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_NUM_RESOURCES_MAPPED:
sq->end_count = svga->hud.num_resources_mapped;
break;
+ case SVGA_QUERY_NUM_BYTES_UPLOADED:
+ sq->end_count = svga->hud.num_bytes_uploaded;
+ break;
case SVGA_QUERY_MEMORY_USED:
case SVGA_QUERY_NUM_SHADERS:
case SVGA_QUERY_NUM_RESOURCES:
@@ -1061,6 +1069,7 @@ svga_get_query_result(struct pipe_context *pipe,
case SVGA_QUERY_NUM_FLUSHES:
case SVGA_QUERY_NUM_VALIDATIONS:
case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+ case SVGA_QUERY_NUM_BYTES_UPLOADED:
case SVGA_QUERY_MAP_BUFFER_TIME:
vresult->u64 = sq->end_count - sq->begin_count;
break;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index 71f2f4f2779..449cc149a81 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -80,6 +80,11 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
uint8_t *map;
int64_t begin = os_time_get();
+ assert(box->y == 0);
+ assert(box->z == 0);
+ assert(box->height == 1);
+ assert(box->depth == 1);
+
transfer = CALLOC_STRUCT(pipe_transfer);
if (transfer == NULL) {
return NULL;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 69e5f75e208..8c5cff5abc1 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -429,6 +429,8 @@ svga_buffer_upload_flush(struct svga_context *svga,
assert(box->x <= sbuf->b.b.width0);
assert(box->x + box->w <= sbuf->b.b.width0);
+
+ svga->hud.num_bytes_uploaded += box->w;
}
}
else {
@@ -454,6 +456,8 @@ svga_buffer_upload_flush(struct svga_context *svga,
assert(box->x <= sbuf->b.b.width0);
assert(box->x + box->w <= sbuf->b.b.width0);
+
+ svga->hud.num_bytes_uploaded += box->w;
}
}
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index a02d1e495ff..81594777258 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -380,6 +380,12 @@ svga_texture_transfer_map(struct pipe_context *pipe,
break;
}
+ if (usage & PIPE_TRANSFER_WRITE) {
+ /* record texture upload for HUD */
+ svga->hud.num_bytes_uploaded +=
+ nblocksx * nblocksy * d * util_format_get_blocksize(texture->format);
+ }
+
if (!use_direct_map) {
/* Use a DMA buffer */
st->hw_nblocksy = nblocksy;
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index a80bc9b9119..09a3d33552b 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -789,6 +789,8 @@ svga_get_driver_query_info(struct pipe_screen *screen,
{"map-buffer-time", SVGA_QUERY_MAP_BUFFER_TIME, {0},
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS},
{"num-resources-mapped", SVGA_QUERY_NUM_RESOURCES_MAPPED, {0}},
+ {"num-bytes-uploaded", SVGA_QUERY_NUM_BYTES_UPLOADED, {0},
+ PIPE_DRIVER_QUERY_TYPE_BYTES, PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE},
/* running total counters */
{"memory-used", SVGA_QUERY_MEMORY_USED, {0},
diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c
index 611d2c6102f..c5d52bbfd14 100644
--- a/src/gallium/drivers/svga/svga_state_sampler.c
+++ b/src/gallium/drivers/svga/svga_state_sampler.c
@@ -108,6 +108,9 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
PIPE_BIND_SAMPLER_VIEW);
assert(format != SVGA3D_FORMAT_INVALID);
+ /* Convert the format to a sampler-friendly format, if needed */
+ format = svga_sampler_format(format);
+
if (texture->target == PIPE_BUFFER) {
viewDesc.buffer.firstElement = sv->base.u.buf.first_element;
viewDesc.buffer.numElements = (sv->base.u.buf.last_element -
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index 8b02680c77e..62a51e9a94d 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -456,9 +456,6 @@ trace_screen_create(struct pipe_screen *screen)
{
struct trace_screen *tr_scr;
- if(!screen)
- goto error1;
-
if (!trace_enabled())
goto error1;
diff --git a/src/gallium/drivers/vc4/Automake.inc b/src/gallium/drivers/vc4/Automake.inc
index 6fa3e190cac..5664c2ab14e 100644
--- a/src/gallium/drivers/vc4/Automake.inc
+++ b/src/gallium/drivers/vc4/Automake.inc
@@ -6,8 +6,4 @@ TARGET_LIB_DEPS += \
$(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \
$(top_builddir)/src/gallium/drivers/vc4/libvc4.la
-if USE_VC4_SIMULATOR
-TARGET_CPPFLAGS += -DUSE_VC4_SIMULATOR
-endif
-
endif
diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am
index f4a57ba3404..a3bf72fc72a 100644
--- a/src/gallium/drivers/vc4/Makefile.am
+++ b/src/gallium/drivers/vc4/Makefile.am
@@ -23,7 +23,6 @@ include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
if USE_VC4_SIMULATOR
-SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1
SIM_LDFLAGS = -lsimpenrose
endif
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 373c9e12d11..0672a92226f 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -509,8 +509,8 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
nir_ssa_def *src_color[4], *unpacked_dst_color[4];
for (unsigned i = 0; i < 4; i++) {
- src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
- unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
+ src_color[i] = nir_channel(b, intr->src[0].ssa, i);
+ unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
}
vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index 7ea263afb68..1afe52a63f4 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -84,7 +84,7 @@ vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan)
static nir_ssa_def *
vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan)
{
- return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false);
+ return nir_channel(b, nir_unpack_unorm_4x8(b, src), chan);
}
static nir_ssa_def *
@@ -326,9 +326,8 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
assert(intr->src[0].is_ssa);
- intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b,
- intr->src[0].ssa,
- &i, 1, false));
+ intr_comp->src[0] =
+ nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i));
nir_builder_instr_insert(b, &intr_comp->instr);
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index f1bab810eff..07a92266dd2 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -144,6 +144,8 @@ qir_opt_algebraic(struct vc4_compile *c)
case QOP_SEL_X_Y_ZC:
case QOP_SEL_X_Y_NS:
case QOP_SEL_X_Y_NC:
+ case QOP_SEL_X_Y_CS:
+ case QOP_SEL_X_Y_CC:
if (is_zero(c, inst->src[1])) {
/* Replace references to a 0 uniform value
* with the SEL_X_0 equivalent.
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index a48dad804e2..197577b6c20 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -987,6 +987,10 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
qir_SF(c, qir_SUB(c, src[0], src[1]));
*dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
break;
+ case nir_op_uge:
+ qir_SF(c, qir_SUB(c, src[0], src[1]));
+ *dest = qir_SEL_X_0_CC(c, qir_uniform_ui(c, ~0));
+ break;
case nir_op_ilt:
qir_SF(c, qir_SUB(c, src[0], src[1]));
*dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
@@ -1167,7 +1171,7 @@ emit_point_size_write(struct vc4_compile *c)
struct qreg point_size;
if (c->output_point_size_index != -1)
- point_size = c->outputs[c->output_point_size_index + 3];
+ point_size = c->outputs[c->output_point_size_index];
else
point_size = qir_uniform_f(c, 1.0);
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 7894b081b19..f2855e159fc 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -69,10 +69,14 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
[QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
[QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true },
+ [QOP_SEL_X_0_CS] = { "fsel_x_0_cs", 1, 1, false, true },
+ [QOP_SEL_X_0_CC] = { "fsel_x_0_cc", 1, 1, false, true },
[QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true },
[QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true },
[QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true },
[QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true },
+ [QOP_SEL_X_Y_CS] = { "fsel_x_y_cs", 1, 2, false, true },
+ [QOP_SEL_X_Y_CC] = { "fsel_x_y_cc", 1, 2, false, true },
[QOP_RCP] = { "rcp", 1, 1, false, true },
[QOP_RSQ] = { "rsq", 1, 1, false, true },
@@ -218,10 +222,14 @@ qir_depends_on_flags(struct qinst *inst)
case QOP_SEL_X_0_NC:
case QOP_SEL_X_0_ZS:
case QOP_SEL_X_0_ZC:
+ case QOP_SEL_X_0_CS:
+ case QOP_SEL_X_0_CC:
case QOP_SEL_X_Y_NS:
case QOP_SEL_X_Y_NC:
case QOP_SEL_X_Y_ZS:
case QOP_SEL_X_Y_ZC:
+ case QOP_SEL_X_Y_CS:
+ case QOP_SEL_X_Y_CC:
return true;
default:
return false;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index a92ad93ee07..ddb35e41fcf 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -99,11 +99,15 @@ enum qop {
QOP_SEL_X_0_ZC,
QOP_SEL_X_0_NS,
QOP_SEL_X_0_NC,
+ QOP_SEL_X_0_CS,
+ QOP_SEL_X_0_CC,
/* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
QOP_SEL_X_Y_ZS,
QOP_SEL_X_Y_ZC,
QOP_SEL_X_Y_NS,
QOP_SEL_X_Y_NC,
+ QOP_SEL_X_Y_CS,
+ QOP_SEL_X_Y_CC,
QOP_FTOI,
QOP_ITOF,
@@ -567,10 +571,14 @@ QIR_ALU1(SEL_X_0_ZS)
QIR_ALU1(SEL_X_0_ZC)
QIR_ALU1(SEL_X_0_NS)
QIR_ALU1(SEL_X_0_NC)
+QIR_ALU1(SEL_X_0_CS)
+QIR_ALU1(SEL_X_0_CC)
QIR_ALU2(SEL_X_Y_ZS)
QIR_ALU2(SEL_X_Y_ZC)
QIR_ALU2(SEL_X_Y_NS)
QIR_ALU2(SEL_X_Y_NC)
+QIR_ALU2(SEL_X_Y_CS)
+QIR_ALU2(SEL_X_Y_CC)
QIR_ALU2(FMIN)
QIR_ALU2(FMAX)
QIR_ALU2(FMINABS)
diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index f087c3b81b5..a57e100593c 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -22,14 +22,10 @@
*/
/**
- * @file vc4_opt_algebraic.c
+ * @file vc4_qir_lower_uniforms.c
*
- * This is the optimization pass for miscellaneous changes to instructions
- * where we can simplify the operation by some knowledge about the specific
- * operations.
- *
- * Mostly this will be a matter of turning things into MOVs so that they can
- * later be copy-propagated out.
+ * This is the pre-code-generation pass for fixing up instructions that try to
+ * read from multiple uniform values.
*/
#include "vc4_qir.h"
@@ -85,6 +81,33 @@ is_lowerable_uniform(struct qinst *inst, int i)
return true;
}
+/* Returns the number of different uniform values referenced by the
+ * instruction.
+ */
+static uint32_t
+qir_get_instruction_uniform_count(struct qinst *inst)
+{
+ uint32_t count = 0;
+
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file != QFILE_UNIF)
+ continue;
+
+ bool is_duplicate = false;
+ for (int j = 0; j < i; j++) {
+ if (inst->src[j].file == QFILE_UNIF &&
+ inst->src[j].index == inst->src[i].index) {
+ is_duplicate = true;
+ break;
+ }
+ }
+ if (!is_duplicate)
+ count++;
+ }
+
+ return count;
+}
+
void
qir_lower_uniforms(struct vc4_compile *c)
{
@@ -98,13 +121,7 @@ qir_lower_uniforms(struct vc4_compile *c)
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
uint32_t nsrc = qir_get_op_nsrc(inst->op);
- uint32_t count = 0;
- for (int i = 0; i < nsrc; i++) {
- if (inst->src[i].file == QFILE_UNIF)
- count++;
- }
-
- if (count <= 1)
+ if (qir_get_instruction_uniform_count(inst) <= 1)
continue;
for (int i = 0; i < nsrc; i++) {
@@ -140,23 +157,22 @@ qir_lower_uniforms(struct vc4_compile *c)
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
uint32_t nsrc = qir_get_op_nsrc(inst->op);
- uint32_t count = 0;
- for (int i = 0; i < nsrc; i++) {
- if (inst->src[i].file == QFILE_UNIF)
- count++;
- }
+ uint32_t count = qir_get_instruction_uniform_count(inst);
if (count <= 1)
continue;
+ bool removed = false;
for (int i = 0; i < nsrc; i++) {
if (is_lowerable_uniform(inst, i) &&
inst->src[i].index == max_index) {
inst->src[i] = temp;
remove_uniform(ht, unif);
- count--;
+ removed = true;
}
}
+ if (removed)
+ count--;
/* If the instruction doesn't need lowering any more,
* then drop it from the list.
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 133e1385178..e0d3633da42 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -311,6 +311,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_SEL_X_0_ZC:
case QOP_SEL_X_0_NS:
case QOP_SEL_X_0_NC:
+ case QOP_SEL_X_0_CS:
+ case QOP_SEL_X_0_CC:
queue(c, qpu_a_MOV(dst, src[0]) | unpack);
set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
QPU_COND_ZS);
@@ -324,6 +326,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_SEL_X_Y_ZC:
case QOP_SEL_X_Y_NS:
case QOP_SEL_X_Y_NC:
+ case QOP_SEL_X_Y_CS:
+ case QOP_SEL_X_Y_CC:
queue(c, qpu_a_MOV(dst, src[0]));
if (qinst->src[0].pack)
*(last_inst(c)) |= unpack;
diff --git a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
index 7f11fba2340..85a0c95e851 100644
--- a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
@@ -44,18 +44,28 @@ qir_reorder_uniforms(struct vc4_compile *c)
uint32_t next_uniform = 0;
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
+ uint32_t new = ~0;
+
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
if (inst->src[i].file != QFILE_UNIF)
continue;
- uint32_t new = next_uniform++;
- if (uniform_index_size <= new) {
- uniform_index_size =
- MAX2(uniform_index_size * 2, 16);
- uniform_index =
- realloc(uniform_index,
- uniform_index_size *
- sizeof(uint32_t));
+ if (new == ~0) {
+ new = next_uniform++;
+ if (uniform_index_size <= new) {
+ uniform_index_size =
+ MAX2(uniform_index_size * 2, 16);
+ uniform_index =
+ realloc(uniform_index,
+ uniform_index_size *
+ sizeof(uint32_t));
+ }
+ } else {
+ /* If we've got two uniform references in this
+ * instruction, they need to be the same
+ * uniform value.
+ */
+ assert(inst->src[i].index == uniform_index[new]);
}
uniform_index[new] = inst->src[i].index;
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 27f358f8fb9..be7447de67d 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -116,6 +116,25 @@ struct pipe_context {
unsigned query_type,
unsigned index );
+ /**
+ * Create a query object that queries all given query types simultaneously.
+ *
+ * This can only be used for those query types for which
+ * get_driver_query_info indicates that it must be used. Only one batch
+ * query object may be active at a time.
+ *
+ * There may be additional constraints on which query types can be used
+ * together, in particular those that are implied by
+ * get_driver_query_group_info.
+ *
+ * \param num_queries the number of query types
+ * \param query_types array of \p num_queries query types
+ * \return a query object, or NULL on error.
+ */
+ struct pipe_query *(*create_batch_query)( struct pipe_context *pipe,
+ unsigned num_queries,
+ unsigned *query_types );
+
void (*destroy_query)(struct pipe_context *pipe,
struct pipe_query *q);
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 7240154727e..b3c8b9f7360 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics
};
/**
+ * For batch queries.
+ */
+union pipe_numeric_type_union
+{
+ uint64_t u64;
+ uint32_t u32;
+ float f;
+};
+
+/**
* Query result (returned by pipe_context::get_query_result).
*/
union pipe_query_result
@@ -791,6 +801,8 @@ union pipe_query_result
/* PIPE_QUERY_PRIMITIVES_GENERATED */
/* PIPE_QUERY_PRIMITIVES_EMITTED */
/* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+ /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+ /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */
/* PIPE_DRIVER_QUERY_TYPE_HZ */
uint64_t u64;
@@ -809,6 +821,9 @@ union pipe_query_result
/* PIPE_QUERY_PIPELINE_STATISTICS */
struct pipe_query_data_pipeline_statistics pipeline_statistics;
+
+ /* batch queries */
+ union pipe_numeric_type_union batch[0];
};
union pipe_color_union
@@ -829,12 +844,6 @@ enum pipe_driver_query_type
PIPE_DRIVER_QUERY_TYPE_HZ = 6,
};
-enum pipe_driver_query_group_type
-{
- PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0,
- PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
-};
-
/* Whether an average value per frame or a cumulative value should be
* displayed.
*/
@@ -844,12 +853,13 @@ enum pipe_driver_query_result_type
PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
};
-union pipe_numeric_type_union
-{
- uint64_t u64;
- uint32_t u32;
- float f;
-};
+/**
+ * Some hardware requires some hardware-specific queries to be submitted
+ * as batched queries. The corresponding query objects are created using
+ * create_batch_query, and at most one such query may be active at
+ * any time.
+ */
+#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0)
struct pipe_driver_query_info
{
@@ -859,12 +869,12 @@ struct pipe_driver_query_info
enum pipe_driver_query_type type;
enum pipe_driver_query_result_type result_type;
unsigned group_id;
+ unsigned flags;
};
struct pipe_driver_query_group_info
{
const char *name;
- enum pipe_driver_query_group_type type;
unsigned max_active_queries;
unsigned num_queries;
};
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 5f0690e5ae6..d9c9f9b5cc2 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -359,6 +359,36 @@ enum pipe_format {
PIPE_FORMAT_ETC2_RG11_UNORM = 277,
PIPE_FORMAT_ETC2_RG11_SNORM = 278,
+ PIPE_FORMAT_ASTC_4x4 = 279,
+ PIPE_FORMAT_ASTC_5x4 = 280,
+ PIPE_FORMAT_ASTC_5x5 = 281,
+ PIPE_FORMAT_ASTC_6x5 = 282,
+ PIPE_FORMAT_ASTC_6x6 = 283,
+ PIPE_FORMAT_ASTC_8x5 = 284,
+ PIPE_FORMAT_ASTC_8x6 = 285,
+ PIPE_FORMAT_ASTC_8x8 = 286,
+ PIPE_FORMAT_ASTC_10x5 = 287,
+ PIPE_FORMAT_ASTC_10x6 = 288,
+ PIPE_FORMAT_ASTC_10x8 = 289,
+ PIPE_FORMAT_ASTC_10x10 = 290,
+ PIPE_FORMAT_ASTC_12x10 = 291,
+ PIPE_FORMAT_ASTC_12x12 = 292,
+
+ PIPE_FORMAT_ASTC_4x4_SRGB = 293,
+ PIPE_FORMAT_ASTC_5x4_SRGB = 294,
+ PIPE_FORMAT_ASTC_5x5_SRGB = 295,
+ PIPE_FORMAT_ASTC_6x5_SRGB = 296,
+ PIPE_FORMAT_ASTC_6x6_SRGB = 297,
+ PIPE_FORMAT_ASTC_8x5_SRGB = 298,
+ PIPE_FORMAT_ASTC_8x6_SRGB = 299,
+ PIPE_FORMAT_ASTC_8x8_SRGB = 300,
+ PIPE_FORMAT_ASTC_10x5_SRGB = 301,
+ PIPE_FORMAT_ASTC_10x6_SRGB = 302,
+ PIPE_FORMAT_ASTC_10x8_SRGB = 303,
+ PIPE_FORMAT_ASTC_10x10_SRGB = 304,
+ PIPE_FORMAT_ASTC_12x10_SRGB = 305,
+ PIPE_FORMAT_ASTC_12x12_SRGB = 306,
+
PIPE_FORMAT_COUNT
};
diff --git a/src/gallium/include/state_tracker/drm_driver.h b/src/gallium/include/state_tracker/drm_driver.h
index 740c4bbe1a6..959a7625e30 100644
--- a/src/gallium/include/state_tracker/drm_driver.h
+++ b/src/gallium/include/state_tracker/drm_driver.h
@@ -117,10 +117,4 @@ struct drm_driver_descriptor driver_descriptor = { \
.configuration = (conf), \
};
-extern struct pipe_screen *dd_create_screen(int fd);
-
-extern const char *dd_driver_name(void);
-
-extern const struct drm_conf_ret *dd_configuration(enum drm_conf conf);
-
#endif
diff --git a/src/gallium/include/state_tracker/sw_driver.h b/src/gallium/include/state_tracker/sw_driver.h
new file mode 100644
index 00000000000..0eb2b44d6fd
--- /dev/null
+++ b/src/gallium/include/state_tracker/sw_driver.h
@@ -0,0 +1,21 @@
+
+#ifndef _SW_DRIVER_H_
+#define _SW_DRIVER_H_
+
+#include "pipe/p_compiler.h"
+
+struct pipe_screen;
+struct sw_winsys;
+
+struct sw_driver_descriptor
+{
+ struct pipe_screen *(*create_screen)(struct sw_winsys *ws);
+ struct {
+ const char * const name;
+ struct sw_winsys *(*create_winsys)();
+ } winsys[];
+};
+
+extern struct sw_driver_descriptor swrast_driver_descriptor;
+
+#endif
diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am
index fd0ccf88cc5..3c9421692fc 100644
--- a/src/gallium/state_trackers/clover/Makefile.am
+++ b/src/gallium/state_trackers/clover/Makefile.am
@@ -1,8 +1,6 @@
include Makefile.sources
AM_CPPFLAGS = \
- $(GALLIUM_PIPE_LOADER_DEFINES) \
- -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src \
-I$(top_srcdir)/src/gallium/include \
diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
index 6efff79c7f4..1be2f6413f4 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -41,7 +41,7 @@ namespace {
device::device(clover::platform &platform, pipe_loader_device *ldev) :
platform(platform), ldev(ldev) {
- pipe = pipe_loader_create_screen(ldev, PIPE_SEARCH_DIR);
+ pipe = pipe_loader_create_screen(ldev);
if (!pipe || !pipe->get_param(pipe, PIPE_CAP_COMPUTE)) {
if (pipe)
pipe->destroy(pipe);
diff --git a/src/gallium/state_trackers/dri/Android.mk b/src/gallium/state_trackers/dri/Android.mk
index 43f0de9b464..f0eb18dcacf 100644
--- a/src/gallium/state_trackers/dri/Android.mk
+++ b/src/gallium/state_trackers/dri/Android.mk
@@ -29,9 +29,6 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(common_SOURCES)
-LOCAL_CFLAGS := \
- -DGALLIUM_STATIC_TARGETS=1 \
-
LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/mesa \
diff --git a/src/gallium/state_trackers/dri/Makefile.am b/src/gallium/state_trackers/dri/Makefile.am
index 9f4deba0c1e..74bccaa6416 100644
--- a/src/gallium/state_trackers/dri/Makefile.am
+++ b/src/gallium/state_trackers/dri/Makefile.am
@@ -25,8 +25,6 @@ include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
AM_CPPFLAGS = \
- $(GALLIUM_PIPE_LOADER_DEFINES) \
- -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/mesa \
@@ -36,15 +34,10 @@ AM_CPPFLAGS = \
$(LIBDRM_CFLAGS) \
$(VISIBILITY_CFLAGS)
-if HAVE_GALLIUM_STATIC_TARGETS
-AM_CPPFLAGS += \
- -DGALLIUM_STATIC_TARGETS=1
-
if HAVE_GALLIUM_SOFTPIPE
AM_CPPFLAGS += \
-DGALLIUM_SOFTPIPE
endif # HAVE_GALLIUM_SOFTPIPE
-endif # HAVE_GALLIUM_STATIC_TARGETS
noinst_LTLIBRARIES = libdri.la
libdri_la_SOURCES = $(common_SOURCES)
diff --git a/src/gallium/state_trackers/dri/SConscript b/src/gallium/state_trackers/dri/SConscript
index 657300baf13..fa48fb8a0d7 100644
--- a/src/gallium/state_trackers/dri/SConscript
+++ b/src/gallium/state_trackers/dri/SConscript
@@ -15,10 +15,6 @@ env.Append(CPPPATH = [
xmlpool_options.dir.dir, # Dir to generated xmlpool/options.h
])
-env.Append(CPPDEFINES = [
- ('GALLIUM_STATIC_TARGETS', '1'),
-])
-
sources = env.ParseSourceList('Makefile.sources', 'common_SOURCES')
# XXX: if HAVE_DRISW
diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index 019414b56fe..beb0866c83f 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1444,8 +1444,8 @@ dri2_init_screen(__DRIscreen * sPriv)
const __DRIconfig **configs;
struct dri_screen *screen;
struct pipe_screen *pscreen = NULL;
- const struct drm_conf_ret *throttle_ret = NULL;
- const struct drm_conf_ret *dmabuf_ret = NULL;
+ const struct drm_conf_ret *throttle_ret;
+ const struct drm_conf_ret *dmabuf_ret;
screen = CALLOC_STRUCT(dri_screen);
if (!screen)
@@ -1457,19 +1457,14 @@ dri2_init_screen(__DRIscreen * sPriv)
sPriv->driverPrivate = (void *)screen;
-#if GALLIUM_STATIC_TARGETS
- pscreen = dd_create_screen(screen->fd);
+ if (pipe_loader_drm_probe_fd(&screen->dev, dup(screen->fd)))
+ pscreen = pipe_loader_create_screen(screen->dev);
- throttle_ret = dd_configuration(DRM_CONF_THROTTLE);
- dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD);
-#else
- if (pipe_loader_drm_probe_fd(&screen->dev, screen->fd)) {
- pscreen = pipe_loader_create_screen(screen->dev, PIPE_SEARCH_DIR);
+ if (!pscreen)
+ goto fail;
- throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE);
- dmabuf_ret = pipe_loader_configuration(screen->dev, DRM_CONF_SHARE_FD);
- }
-#endif // GALLIUM_STATIC_TARGETS
+ throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE);
+ dmabuf_ret = pipe_loader_configuration(screen->dev, DRM_CONF_SHARE_FD);
if (throttle_ret && throttle_ret->val.val_int != -1) {
screen->throttling_enabled = TRUE;
@@ -1486,20 +1481,14 @@ dri2_init_screen(__DRIscreen * sPriv)
}
}
- if (pscreen && pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) {
+ if (pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) {
sPriv->extensions = dri_robust_screen_extensions;
screen->has_reset_status_query = true;
}
else
sPriv->extensions = dri_screen_extensions;
- /* dri_init_screen_helper checks pscreen for us */
-
-#if GALLIUM_STATIC_TARGETS
- configs = dri_init_screen_helper(screen, pscreen, dd_driver_name());
-#else
configs = dri_init_screen_helper(screen, pscreen, screen->dev->driver_name);
-#endif // GALLIUM_STATIC_TARGETS
if (!configs)
goto fail;
@@ -1511,10 +1500,8 @@ dri2_init_screen(__DRIscreen * sPriv)
return configs;
fail:
dri_destroy_screen_helper(screen);
-#if !GALLIUM_STATIC_TARGETS
if (screen->dev)
pipe_loader_release(&screen->dev, 1);
-#endif // !GALLIUM_STATIC_TARGETS
FREE(screen);
return NULL;
}
@@ -1527,7 +1514,6 @@ fail:
static const __DRIconfig **
dri_kms_init_screen(__DRIscreen * sPriv)
{
-#if GALLIUM_STATIC_TARGETS
#if defined(GALLIUM_SOFTPIPE)
const __DRIconfig **configs;
struct dri_screen *screen;
@@ -1543,7 +1529,11 @@ dri_kms_init_screen(__DRIscreen * sPriv)
sPriv->driverPrivate = (void *)screen;
- pscreen = kms_swrast_create_screen(screen->fd);
+ if (pipe_loader_sw_probe_kms(&screen->dev, dup(screen->fd)))
+ pscreen = pipe_loader_create_screen(screen->dev);
+
+ if (!pscreen)
+ goto fail;
if (drmGetCap(sPriv->fd, DRM_CAP_PRIME, &cap) == 0 &&
(cap & DRM_PRIME_CAP_IMPORT)) {
@@ -1553,7 +1543,6 @@ dri_kms_init_screen(__DRIscreen * sPriv)
sPriv->extensions = dri_screen_extensions;
- /* dri_init_screen_helper checks pscreen for us */
configs = dri_init_screen_helper(screen, pscreen, "swrast");
if (!configs)
goto fail;
@@ -1566,9 +1555,10 @@ dri_kms_init_screen(__DRIscreen * sPriv)
return configs;
fail:
dri_destroy_screen_helper(screen);
+ if (screen->dev)
+ pipe_loader_release(&screen->dev, 1);
FREE(screen);
#endif // GALLIUM_SOFTPIPE
-#endif // GALLIUM_STATIC_TARGETS
return NULL;
}
diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c
index c4c2d9c8fb1..2ac55c88926 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -390,9 +390,7 @@ dri_destroy_screen(__DRIscreen * sPriv)
dri_destroy_screen_helper(screen);
-#if !GALLIUM_STATIC_TARGETS
pipe_loader_release(&screen->dev, 1);
-#endif // !GALLIUM_STATIC_TARGETS
free(screen);
sPriv->driverPrivate = NULL;
@@ -416,11 +414,6 @@ dri_init_screen_helper(struct dri_screen *screen,
const char* driver_name)
{
screen->base.screen = pscreen;
- if (!screen->base.screen) {
- debug_printf("%s: failed to create pipe_screen\n", __FUNCTION__);
- return NULL;
- }
-
screen->base.get_egl_image = dri_get_egl_image;
screen->base.get_param = dri_get_param;
diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c
index 753c59d696a..b85a73c57d2 100644
--- a/src/gallium/state_trackers/dri/drisw.c
+++ b/src/gallium/state_trackers/dri/drisw.c
@@ -39,6 +39,7 @@
#include "util/u_inlines.h"
#include "util/u_box.h"
#include "pipe/p_context.h"
+#include "pipe-loader/pipe_loader.h"
#include "state_tracker/drisw_api.h"
#include "state_tracker/st_context.h"
@@ -382,7 +383,7 @@ drisw_init_screen(__DRIscreen * sPriv)
{
const __DRIconfig **configs;
struct dri_screen *screen;
- struct pipe_screen *pscreen;
+ struct pipe_screen *pscreen = NULL;
screen = CALLOC_STRUCT(dri_screen);
if (!screen)
@@ -396,8 +397,11 @@ drisw_init_screen(__DRIscreen * sPriv)
sPriv->driverPrivate = (void *)screen;
sPriv->extensions = drisw_screen_extensions;
- pscreen = drisw_create_screen(&drisw_lf);
- /* dri_init_screen_helper checks pscreen for us */
+ if (pipe_loader_sw_probe_dri(&screen->dev, &drisw_lf))
+ pscreen = pipe_loader_create_screen(screen->dev);
+
+ if (!pscreen)
+ goto fail;
configs = dri_init_screen_helper(screen, pscreen, "swrast");
if (!configs)
@@ -406,6 +410,8 @@ drisw_init_screen(__DRIscreen * sPriv)
return configs;
fail:
dri_destroy_screen_helper(screen);
+ if (screen->dev)
+ pipe_loader_release(&screen->dev, 1);
FREE(screen);
return NULL;
}
diff --git a/src/gallium/state_trackers/omx/entrypoint.c b/src/gallium/state_trackers/omx/entrypoint.c
index 7df90b16a84..da9ca104d93 100644
--- a/src/gallium/state_trackers/omx/entrypoint.c
+++ b/src/gallium/state_trackers/omx/entrypoint.c
@@ -33,6 +33,7 @@
#include <assert.h>
#include <string.h>
+#include <stdbool.h>
#include <X11/Xlib.h>
@@ -73,29 +74,30 @@ int omx_component_library_Setup(stLoaderComponentType **stComponents)
struct vl_screen *omx_get_screen(void)
{
+ static bool first_time = true;
pipe_mutex_lock(omx_lock);
- if (!omx_display) {
- omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL);
- if (!omx_render_node) {
- omx_display = XOpenDisplay(NULL);
- if (!omx_display)
- goto error;
- }
- }
-
if (!omx_screen) {
+ if (first_time) {
+ omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL);
+ first_time = false;
+ }
if (omx_render_node) {
drm_fd = loader_open_device(omx_render_node);
if (drm_fd < 0)
goto error;
+
omx_screen = vl_drm_screen_create(drm_fd);
if (!omx_screen) {
close(drm_fd);
goto error;
}
} else {
- omx_screen = vl_screen_create(omx_display, 0);
+ omx_display = XOpenDisplay(NULL);
+ if (!omx_display)
+ goto error;
+
+ omx_screen = vl_dri2_screen_create(omx_display, 0);
if (!omx_screen) {
XCloseDisplay(omx_display);
goto error;
@@ -117,16 +119,13 @@ void omx_put_screen(void)
{
pipe_mutex_lock(omx_lock);
if ((--omx_usecount) == 0) {
- if (!omx_render_node) {
- vl_screen_destroy(omx_screen);
- if (omx_display)
- XCloseDisplay(omx_display);
- } else {
- close(drm_fd);
- vl_drm_screen_destroy(omx_screen);
- }
+ omx_screen->destroy(omx_screen);
omx_screen = NULL;
- omx_display = NULL;
+
+ if (omx_render_node)
+ close(drm_fd);
+ else
+ XCloseDisplay(omx_display);
}
pipe_mutex_unlock(omx_lock);
}
diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c
index 98c4104da48..f0051e5f6a5 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -102,7 +102,6 @@ PUBLIC VAStatus
VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
{
vlVaDriver *drv;
- int drm_fd;
struct drm_state *drm_info;
if (!ctx)
@@ -119,26 +118,20 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
return VA_STATUS_ERROR_UNIMPLEMENTED;
case VA_DISPLAY_GLX:
case VA_DISPLAY_X11:
- drv->vscreen = vl_screen_create(ctx->native_dpy, ctx->x11_screen);
+ drv->vscreen = vl_dri2_screen_create(ctx->native_dpy, ctx->x11_screen);
if (!drv->vscreen)
goto error_screen;
break;
case VA_DISPLAY_DRM:
case VA_DISPLAY_DRM_RENDERNODES: {
drm_info = (struct drm_state *) ctx->drm_state;
- if (!drm_info) {
- FREE(drv);
- return VA_STATUS_ERROR_INVALID_PARAMETER;
- }
-
- drm_fd = drm_info->fd;
- if (drm_fd < 0) {
+ if (!drm_info || drm_info->fd < 0) {
FREE(drv);
return VA_STATUS_ERROR_INVALID_PARAMETER;
}
- drv->vscreen = vl_drm_screen_create(drm_fd);
+ drv->vscreen = vl_drm_screen_create(drm_info->fd);
if (!drv->vscreen)
goto error_screen;
}
@@ -182,10 +175,7 @@ error_htab:
drv->pipe->destroy(drv->pipe);
error_pipe:
- if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11)
- vl_screen_destroy(drv->vscreen);
- else
- vl_drm_screen_destroy(drv->vscreen);
+ drv->vscreen->destroy(drv->vscreen);
error_screen:
FREE(drv);
@@ -322,10 +312,7 @@ vlVaTerminate(VADriverContextP ctx)
vl_compositor_cleanup_state(&drv->cstate);
vl_compositor_cleanup(&drv->compositor);
drv->pipe->destroy(drv->pipe);
- if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11)
- vl_screen_destroy(drv->vscreen);
- else
- vl_drm_screen_destroy(drv->vscreen);
+ drv->vscreen->destroy(drv->vscreen);
handle_table_destroy(drv->htab);
FREE(drv);
diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c
index 5e7841a0521..a37a9b791db 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -763,7 +763,7 @@ handleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *context, v
dst_rect.x1 = pipeline_param->output_region->x + pipeline_param->output_region->width;
dst_rect.y1 = pipeline_param->output_region->y + pipeline_param->output_region->height;
- dirty_area = vl_screen_get_dirty_area(drv->vscreen);
+ dirty_area = drv->vscreen->get_dirty_area(drv->vscreen);
vl_compositor_clear_layers(&drv->cstate);
vl_compositor_set_buffer_layer(&drv->cstate, &drv->compositor, 0, src_surface->buffer, &src_rect, NULL, VL_COMPOSITOR_WEAVE);
diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c
index 589d6860b6a..c052c8f2284 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -229,6 +229,7 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s
struct pipe_screen *screen;
struct pipe_resource *tex;
struct pipe_surface surf_templ, *surf_draw;
+ struct vl_screen *vscreen;
struct u_rect src_rect, *dirty_area;
struct u_rect dst_rect = {destx, destx + destw, desty, desty + desth};
VAStatus status;
@@ -242,17 +243,18 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s
return VA_STATUS_ERROR_INVALID_SURFACE;
screen = drv->pipe->screen;
+ vscreen = drv->vscreen;
if(surf->fence) {
screen->fence_finish(screen, surf->fence, PIPE_TIMEOUT_INFINITE);
screen->fence_reference(screen, &surf->fence, NULL);
}
- tex = vl_screen_texture_from_drawable(drv->vscreen, (Drawable)draw);
+ tex = vscreen->texture_from_drawable(vscreen, draw);
if (!tex)
return VA_STATUS_ERROR_INVALID_DISPLAY;
- dirty_area = vl_screen_get_dirty_area(drv->vscreen);
+ dirty_area = vscreen->get_dirty_area(vscreen);
memset(&surf_templ, 0, sizeof(surf_templ));
surf_templ.format = tex->format;
@@ -276,11 +278,8 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s
if (status)
return status;
- screen->flush_frontbuffer
- (
- screen, tex, 0, 0,
- vl_screen_get_private(drv->vscreen), NULL
- );
+ screen->flush_frontbuffer(screen, tex, 0, 0,
+ vscreen->get_private(vscreen), NULL);
screen->fence_reference(screen, &surf->fence, NULL);
drv->pipe->flush(drv->pipe, &surf->fence, 0);
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 31c95054f56..c70cc6e2752 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -63,7 +63,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
pipe_reference_init(&dev->reference, 1);
- dev->vscreen = vl_screen_create(display, screen);
+ dev->vscreen = vl_dri2_screen_create(display, screen);
if (!dev->vscreen) {
ret = VDP_STATUS_RESOURCES;
goto no_vscreen;
@@ -136,7 +136,7 @@ no_handle:
no_resource:
dev->context->destroy(dev->context);
no_context:
- vl_screen_destroy(dev->vscreen);
+ dev->vscreen->destroy(dev->vscreen);
no_vscreen:
FREE(dev);
no_dev:
@@ -227,7 +227,7 @@ vlVdpDeviceFree(vlVdpDevice *dev)
vl_compositor_cleanup(&dev->compositor);
pipe_sampler_view_reference(&dev->dummy_sv, NULL);
dev->context->destroy(dev->context);
- vl_screen_destroy(dev->vscreen);
+ dev->vscreen->destroy(dev->vscreen);
FREE(dev);
vlDestroyHTAB();
}
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index e53303708b2..e7f387e6173 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -186,7 +186,8 @@ vlVdpPresentationQueueGetTime(VdpPresentationQueue presentation_queue,
return VDP_STATUS_INVALID_HANDLE;
pipe_mutex_lock(pq->device->mutex);
- *current_time = vl_screen_get_timestamp(pq->device->vscreen, pq->drawable);
+ *current_time = pq->device->vscreen->get_timestamp(pq->device->vscreen,
+ (void *)pq->drawable);
pipe_mutex_unlock(pq->device->mutex);
return VDP_STATUS_OK;
@@ -214,6 +215,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
struct vl_compositor *compositor;
struct vl_compositor_state *cstate;
+ struct vl_screen *vscreen;
pq = vlGetDataHTAB(presentation_queue);
if (!pq)
@@ -226,15 +228,16 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
pipe = pq->device->context;
compositor = &pq->device->compositor;
cstate = &pq->cstate;
+ vscreen = pq->device->vscreen;
pipe_mutex_lock(pq->device->mutex);
- tex = vl_screen_texture_from_drawable(pq->device->vscreen, pq->drawable);
+ tex = vscreen->texture_from_drawable(vscreen, (void *)pq->drawable);
if (!tex) {
pipe_mutex_unlock(pq->device->mutex);
return VDP_STATUS_INVALID_HANDLE;
}
- dirty_area = vl_screen_get_dirty_area(pq->device->vscreen);
+ dirty_area = vscreen->get_dirty_area(vscreen);
memset(&surf_templ, 0, sizeof(surf_templ));
surf_templ.format = tex->format;
@@ -267,12 +270,9 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
vl_compositor_render(cstate, compositor, surf_draw, dirty_area, true);
}
- vl_screen_set_next_timestamp(pq->device->vscreen, earliest_presentation_time);
- pipe->screen->flush_frontbuffer
- (
- pipe->screen, tex, 0, 0,
- vl_screen_get_private(pq->device->vscreen), NULL
- );
+ vscreen->set_next_timestamp(vscreen, earliest_presentation_time);
+ pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0,
+ vscreen->get_private(vscreen), NULL);
pipe->screen->fence_reference(pipe->screen, &surf->fence, NULL);
pipe->flush(pipe, &surf->fence, 0);
diff --git a/src/gallium/state_trackers/xa/Makefile.am b/src/gallium/state_trackers/xa/Makefile.am
index 5051e8246e3..968778f995c 100644
--- a/src/gallium/state_trackers/xa/Makefile.am
+++ b/src/gallium/state_trackers/xa/Makefile.am
@@ -28,15 +28,6 @@ AM_CFLAGS = \
$(GALLIUM_CFLAGS) \
$(VISIBILITY_CFLAGS)
-AM_CPPFLAGS = \
- $(GALLIUM_PIPE_LOADER_DEFINES) \
- -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
-
-if HAVE_GALLIUM_STATIC_TARGETS
-AM_CPPFLAGS += \
- -DGALLIUM_STATIC_TARGETS=1
-endif
-
xa_includedir = $(includedir)
xa_include_HEADERS = \
xa_composite.h \
diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c
index 4fdbdc96ae6..faa630c144b 100644
--- a/src/gallium/state_trackers/xa/xa_tracker.c
+++ b/src/gallium/state_trackers/xa/xa_tracker.c
@@ -152,21 +152,13 @@ xa_tracker_create(int drm_fd)
struct xa_tracker *xa = calloc(1, sizeof(struct xa_tracker));
enum xa_surface_type stype;
unsigned int num_formats;
- int loader_fd;
if (!xa)
return NULL;
-#if GALLIUM_STATIC_TARGETS
- xa->screen = dd_create_screen(drm_fd);
- (void) loader_fd; /* silence unused var warning */
-#else
- loader_fd = dup(drm_fd);
- if (loader_fd == -1)
- return NULL;
- if (pipe_loader_drm_probe_fd(&xa->dev, loader_fd))
- xa->screen = pipe_loader_create_screen(xa->dev, PIPE_SEARCH_DIR);
-#endif
+ if (pipe_loader_drm_probe_fd(&xa->dev, dup(drm_fd)))
+ xa->screen = pipe_loader_create_screen(xa->dev);
+
if (!xa->screen)
goto out_no_screen;
@@ -214,10 +206,8 @@ xa_tracker_create(int drm_fd)
out_no_pipe:
xa->screen->destroy(xa->screen);
out_no_screen:
-#if !GALLIUM_STATIC_TARGETS
if (xa->dev)
pipe_loader_release(&xa->dev, 1);
-#endif
free(xa);
return NULL;
}
@@ -228,9 +218,7 @@ xa_tracker_destroy(struct xa_tracker *xa)
free(xa->supported_formats);
xa_context_destroy(xa->default_ctx);
xa->screen->destroy(xa->screen);
-#if !GALLIUM_STATIC_TARGETS
pipe_loader_release(&xa->dev, 1);
-#endif
free(xa);
}
diff --git a/src/gallium/state_trackers/xvmc/context.c b/src/gallium/state_trackers/xvmc/context.c
index 4702b44d1f4..a6991ab8d61 100644
--- a/src/gallium/state_trackers/xvmc/context.c
+++ b/src/gallium/state_trackers/xvmc/context.c
@@ -229,7 +229,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
return BadAlloc;
/* TODO: Reuse screen if process creates another context */
- vscreen = vl_screen_create(dpy, scrn);
+ vscreen = vl_dri2_screen_create(dpy, scrn);
if (!vscreen) {
XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL screen.\n");
@@ -240,7 +240,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
pipe = vscreen->pscreen->context_create(vscreen->pscreen, vscreen, 0);
if (!pipe) {
XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL context.\n");
- vl_screen_destroy(vscreen);
+ vscreen->destroy(vscreen);
FREE(context_priv);
return BadAlloc;
}
@@ -258,7 +258,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
if (!context_priv->decoder) {
XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL decoder.\n");
pipe->destroy(pipe);
- vl_screen_destroy(vscreen);
+ vscreen->destroy(vscreen);
FREE(context_priv);
return BadAlloc;
}
@@ -267,7 +267,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL compositor.\n");
context_priv->decoder->destroy(context_priv->decoder);
pipe->destroy(pipe);
- vl_screen_destroy(vscreen);
+ vscreen->destroy(vscreen);
FREE(context_priv);
return BadAlloc;
}
@@ -277,7 +277,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
vl_compositor_cleanup(&context_priv->compositor);
context_priv->decoder->destroy(context_priv->decoder);
pipe->destroy(pipe);
- vl_screen_destroy(vscreen);
+ vscreen->destroy(vscreen);
FREE(context_priv);
return BadAlloc;
}
@@ -332,7 +332,7 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
vl_compositor_cleanup_state(&context_priv->cstate);
vl_compositor_cleanup(&context_priv->compositor);
context_priv->pipe->destroy(context_priv->pipe);
- vl_screen_destroy(context_priv->vscreen);
+ context_priv->vscreen->destroy(context_priv->vscreen);
FREE(context_priv);
context->privData = NULL;
diff --git a/src/gallium/state_trackers/xvmc/surface.c b/src/gallium/state_trackers/xvmc/surface.c
index 15eae59ff6e..199712ba168 100644
--- a/src/gallium/state_trackers/xvmc/surface.c
+++ b/src/gallium/state_trackers/xvmc/surface.c
@@ -355,6 +355,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
struct pipe_context *pipe;
struct vl_compositor *compositor;
struct vl_compositor_state *cstate;
+ struct vl_screen *vscreen;
XvMCSurfacePrivate *surface_priv;
XvMCContextPrivate *context_priv;
@@ -386,9 +387,10 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
pipe = context_priv->pipe;
compositor = &context_priv->compositor;
cstate = &context_priv->cstate;
+ vscreen = context_priv->vscreen;
- tex = vl_screen_texture_from_drawable(context_priv->vscreen, drawable);
- dirty_area = vl_screen_get_dirty_area(context_priv->vscreen);
+ tex = vscreen->texture_from_drawable(vscreen, (void *)drawable);
+ dirty_area = vscreen->get_dirty_area(vscreen);
memset(&surf_templ, 0, sizeof(surf_templ));
surf_templ.format = tex->format;
@@ -444,11 +446,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
- pipe->screen->flush_frontbuffer
- (
- pipe->screen, tex, 0, 0,
- vl_screen_get_private(context_priv->vscreen), NULL
- );
+ pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0,
+ vscreen->get_private(vscreen), NULL);
if(dump_window == -1) {
dump_window = debug_get_num_option("XVMC_DUMP", 0);
diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am
index b5221472ef0..d1d9829b6c5 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -34,19 +34,6 @@ AM_CFLAGS = \
$(GALLIUM_TARGET_CFLAGS) \
$(VISIBILITY_CFLAGS)
-if HAVE_GALLIUM_STATIC_TARGETS
-AM_CPPFLAGS = \
- -DNINE_TARGET \
- -DGALLIUM_STATIC_TARGETS=1
-
-else
-
-AM_CPPFLAGS = \
- -DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \
- $(GALLIUM_PIPE_LOADER_DEFINES)
-
-endif
-
ninedir = $(D3D_DRIVER_INSTALL_DIR)
nine_LTLIBRARIES = d3dadapter9.la
@@ -78,7 +65,6 @@ d3dadapter9_la_LIBADD = \
$(top_builddir)/src/glsl/libnir.la \
$(top_builddir)/src/gallium/state_trackers/nine/libninetracker.la \
$(top_builddir)/src/util/libmesautil.la \
- $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
$(EXPAT_LIBS) \
$(GALLIUM_COMMON_LIB_DEPS)
@@ -87,7 +73,7 @@ EXTRA_DIST = d3dadapter9.sym
TARGET_DRIVERS =
TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc
@@ -111,14 +97,16 @@ include $(top_srcdir)/src/gallium/drivers/llvmpipe/Automake.inc
if HAVE_GALLIUM_STATIC_TARGETS
d3dadapter9_la_CPPFLAGS = $(AM_CPPFLAGS) $(TARGET_CPPFLAGS)
-d3dadapter9_la_LIBADD += $(TARGET_LIB_DEPS) \
+d3dadapter9_la_LIBADD += \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+ $(TARGET_LIB_DEPS) \
$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
else # HAVE_GALLIUM_STATIC_TARGETS
d3dadapter9_la_LIBADD += \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index fabc820f268..ad712db05eb 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -20,6 +20,7 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/* XXX: header order is slightly screwy here */
#include "loader.h"
#include "adapter9.h"
@@ -29,8 +30,7 @@
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
-#include "target-helpers/inline_drm_helper.h"
-#include "target-helpers/inline_sw_helper.h"
+#include "target-helpers/drm_helper.h"
#include "state_tracker/drm_driver.h"
#include "d3dadapter/d3dadapter9.h"
@@ -91,53 +91,15 @@ drm_destroy( struct d3dadapter9_context *ctx )
else if (ctx->hal)
ctx->hal->destroy(ctx->hal);
-#if !GALLIUM_STATIC_TARGETS
if (drm->swdev)
pipe_loader_release(&drm->swdev, 1);
if (drm->dev)
pipe_loader_release(&drm->dev, 1);
-#endif
close(drm->fd);
FREE(ctx);
}
-/* read a DWORD in the form 0xnnnnnnnn, which is how sysfs pci id stuff is
- * formatted. */
-static inline DWORD
-read_file_dword( const char *name )
-{
- char buf[32];
- int fd, r;
-
- fd = open(name, O_RDONLY);
- if (fd < 0) {
- DBG("Unable to get PCI information from `%s'\n", name);
- return 0;
- }
-
- r = read(fd, buf, 32);
- close(fd);
-
- return (r > 0) ? (DWORD)strtol(buf, NULL, 0) : 0;
-}
-
-/* sysfs doesn't expose the revision as its own file, so this function grabs a
- * dword at an offset in the raw PCI header. The reason this isn't used for all
- * data is that the kernel will make corrections but not expose them in the raw
- * header bytes. */
-static inline DWORD
-read_config_dword( int fd,
- unsigned offset )
-{
- DWORD r = 0;
-
- if (lseek(fd, offset, SEEK_SET) != offset) { return 0; }
- if (read(fd, &r, 4) != 4) { return 0; }
-
- return r;
-}
-
static inline void
get_bus_info( int fd,
DWORD *vendorid,
@@ -215,26 +177,16 @@ drm_create_adapter( int fd,
driOptionCache userInitOptions;
int throttling_value_user = -2;
-#if !GALLIUM_STATIC_TARGETS
- const char *paths[] = {
- getenv("D3D9_DRIVERS_PATH"),
- getenv("D3D9_DRIVERS_DIR"),
- PIPE_SEARCH_DIR
- };
-#endif
-
if (!ctx) { return E_OUTOFMEMORY; }
ctx->base.destroy = drm_destroy;
+ /* Although the fd is provided from external source, mesa/nine
+ * takes ownership of it. */
fd = loader_get_user_preferred_fd(fd, &different_device);
ctx->fd = fd;
ctx->base.linear_framebuffer = !!different_device;
-#if GALLIUM_STATIC_TARGETS
- ctx->base.hal = dd_create_screen(fd);
-#else
- /* use pipe-loader to dlopen appropriate drm driver */
if (!pipe_loader_drm_probe_fd(&ctx->dev, fd)) {
ERR("Failed to probe drm fd %d.\n", fd);
FREE(ctx);
@@ -242,26 +194,15 @@ drm_create_adapter( int fd,
return D3DERR_DRIVERINTERNALERROR;
}
- /* use pipe-loader to create a drm screen (hal) */
- ctx->base.hal = NULL;
- for (i = 0; !ctx->base.hal && i < Elements(paths); ++i) {
- if (!paths[i]) { continue; }
- ctx->base.hal = pipe_loader_create_screen(ctx->dev, paths[i]);
- }
-#endif
+ ctx->base.hal = pipe_loader_create_screen(ctx->dev);
if (!ctx->base.hal) {
ERR("Unable to load requested driver.\n");
drm_destroy(&ctx->base);
return D3DERR_DRIVERINTERNALERROR;
}
-#if GALLIUM_STATIC_TARGETS
- dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD);
- throttle_ret = dd_configuration(DRM_CONF_THROTTLE);
-#else
dmabuf_ret = pipe_loader_configuration(ctx->dev, DRM_CONF_SHARE_FD);
throttle_ret = pipe_loader_configuration(ctx->dev, DRM_CONF_THROTTLE);
-#endif // GALLIUM_STATIC_TARGETS
if (!dmabuf_ret || !dmabuf_ret->val.val_bool) {
ERR("The driver is not capable of dma-buf sharing."
"Abandon to load nine state tracker\n");
@@ -308,18 +249,10 @@ drm_create_adapter( int fd,
driDestroyOptionCache(&userInitOptions);
driDestroyOptionInfo(&defaultInitOptions);
-#if GALLIUM_STATIC_TARGETS
- ctx->base.ref = ninesw_create_screen(ctx->base.hal);
-#else
/* wrap it to create a software screen that can share resources */
- if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal)) {
- ctx->base.ref = NULL;
- for (i = 0; !ctx->base.ref && i < Elements(paths); ++i) {
- if (!paths[i]) { continue; }
- ctx->base.ref = pipe_loader_create_screen(ctx->swdev, paths[i]);
- }
- }
-#endif
+ if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal))
+ ctx->base.ref = pipe_loader_create_screen(ctx->swdev);
+
if (!ctx->base.ref) {
ERR("Couldn't wrap drm screen to swrast screen. Software devices "
"will be unavailable.\n");
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index a33d7f83671..2d9610ee9ab 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -35,7 +35,7 @@ endif
LOCAL_SRC_FILES := target.c
-LOCAL_CFLAGS := -DDRI_TARGET
+LOCAL_CFLAGS :=
LOCAL_SHARED_LIBRARIES := \
libdl \
@@ -108,6 +108,7 @@ LOCAL_STATIC_LIBRARIES := \
libmesa_dri_common \
libmesa_megadriver_stub \
libmesa_gallium \
+ libmesa_pipe_loader \
libmesa_util \
libmesa_loader \
diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am
index 95efdd4451c..2666524fbfe 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -10,7 +10,6 @@ AM_CFLAGS = \
AM_CPPFLAGS = \
$(DEFINES) \
- -DDRI_TARGET \
-DGALLIUM_DDEBUG \
-DGALLIUM_NOOP \
-DGALLIUM_RBUG \
@@ -65,7 +64,7 @@ EXTRA_DIST = \
TARGET_DRIVERS =
TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc
@@ -92,14 +91,16 @@ if HAVE_GALLIUM_STATIC_TARGETS
gallium_dri_la_SOURCES += target.c
gallium_dri_la_CPPFLAGS = $(AM_CPPFLAGS) $(TARGET_CPPFLAGS)
-gallium_dri_la_LIBADD += $(TARGET_LIB_DEPS) \
+gallium_dri_la_LIBADD += \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+ $(TARGET_LIB_DEPS) \
$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
else # HAVE_GALLIUM_STATIC_TARGETS
gallium_dri_la_LIBADD += \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/dri/SConscript b/src/gallium/targets/dri/SConscript
index 2fb0da09200..b4516598675 100644
--- a/src/gallium/targets/dri/SConscript
+++ b/src/gallium/targets/dri/SConscript
@@ -30,7 +30,6 @@ env.PkgUseModules('DRM')
env.Append(CPPDEFINES = [
'GALLIUM_VMWGFX',
'GALLIUM_SOFTPIPE',
- 'DRI_TARGET',
])
env.Prepend(LIBS = [
@@ -39,6 +38,7 @@ env.Prepend(LIBS = [
svga,
ws_dri,
softpipe,
+ pipe_loader,
libloader,
mesautil,
mesa,
diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c
index 32a11ef6281..d6fbd01b88f 100644
--- a/src/gallium/targets/dri/target.c
+++ b/src/gallium/targets/dri/target.c
@@ -1,2 +1,163 @@
-#include "target-helpers/inline_drm_helper.h"
-#include "target-helpers/inline_sw_helper.h"
+#include "target-helpers/drm_helper.h"
+
+#include "dri_screen.h"
+
+#if defined(GALLIUM_SOFTPIPE)
+
+const __DRIextension **__driDriverGetExtensions_swrast(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void)
+{
+ globalDriverAPI = &galliumsw_driver_api;
+ return galliumsw_driver_extensions;
+}
+
+#if defined(HAVE_LIBDRM)
+
+const __DRIextension **__driDriverGetExtensions_kms_swrast(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_kms_swrast(void)
+{
+ globalDriverAPI = &dri_kms_driver_api;
+ return galliumdrm_driver_extensions;
+}
+
+#endif
+#endif
+
+#if defined(GALLIUM_I915)
+
+const __DRIextension **__driDriverGetExtensions_i915(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_i915(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_ILO)
+
+const __DRIextension **__driDriverGetExtensions_i965(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_NOUVEAU)
+
+const __DRIextension **__driDriverGetExtensions_nouveau(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_nouveau(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_R300)
+
+const __DRIextension **__driDriverGetExtensions_r300(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_r300(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_R600)
+
+const __DRIextension **__driDriverGetExtensions_r600(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_r600(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_RADEONSI)
+
+const __DRIextension **__driDriverGetExtensions_radeonsi(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_radeonsi(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_VMWGFX)
+
+const __DRIextension **__driDriverGetExtensions_vmwgfx(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_vmwgfx(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_FREEDRENO)
+
+const __DRIextension **__driDriverGetExtensions_msm(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_msm(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+
+const __DRIextension **__driDriverGetExtensions_kgsl(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_kgsl(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_VIRGL)
+
+const __DRIextension **__driDriverGetExtensions_virtio_gpu(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_VC4)
+
+const __DRIextension **__driDriverGetExtensions_vc4(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_vc4(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+
+#if defined(USE_VC4_SIMULATOR)
+const __DRIextension **__driDriverGetExtensions_i965(void);
+
+/**
+ * When building using the simulator (on x86), we advertise ourselves as the
+ * i965 driver so that you can just make a directory with a link from
+ * i965_dri.so to the built vc4_dri.so, and point LIBGL_DRIVERS_PATH to that
+ * on your i965-using host to run the driver under simulation.
+ *
+ * This is, of course, incompatible with building with the ilo driver, but you
+ * shouldn't be building that anyway.
+ */
+PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
+{
+ globalDriverAPI = &galliumdrm_driver_api;
+ return galliumdrm_driver_extensions;
+}
+#endif
+#endif
diff --git a/src/gallium/targets/omx/Makefile.am b/src/gallium/targets/omx/Makefile.am
index a4dff487dd8..3bdb9eb7e61 100644
--- a/src/gallium/targets/omx/Makefile.am
+++ b/src/gallium/targets/omx/Makefile.am
@@ -40,7 +40,7 @@ if HAVE_GALLIUM_STATIC_TARGETS
TARGET_DRIVERS =
TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc
@@ -50,14 +50,16 @@ include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc
libomx_mesa_la_SOURCES += target.c
libomx_mesa_la_CPPFLAGS = $(TARGET_CPPFLAGS)
-libomx_mesa_la_LIBADD += $(TARGET_LIB_DEPS) \
+libomx_mesa_la_LIBADD += \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+ $(TARGET_LIB_DEPS) \
$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
else # HAVE_GALLIUM_STATIC_TARGETS
libomx_mesa_la_LIBADD += \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/omx/target.c b/src/gallium/targets/omx/target.c
index fde4a4a7dcf..42b1346d341 100644
--- a/src/gallium/targets/omx/target.c
+++ b/src/gallium/targets/omx/target.c
@@ -1 +1 @@
-#include "target-helpers/inline_drm_helper.h"
+#include "target-helpers/drm_helper.h"
diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am
index c78b26832ff..3cb29766724 100644
--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -15,11 +15,10 @@ lib@OPENCL_LIBNAME@_la_LDFLAGS += \
endif
lib@OPENCL_LIBNAME@_la_LIBADD = \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la \
$(top_builddir)/src/gallium/state_trackers/clover/libclover.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
$(ELF_LIB) \
-ldl \
-lclangCodeGen \
diff --git a/src/gallium/targets/pipe-loader/Makefile.am b/src/gallium/targets/pipe-loader/Makefile.am
index 4f25b4f6073..4bc3b55f26b 100644
--- a/src/gallium/targets/pipe-loader/Makefile.am
+++ b/src/gallium/targets/pipe-loader/Makefile.am
@@ -27,6 +27,7 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ $(GALLIUM_PIPE_LOADER_DEFINES) \
$(LIBDRM_CFLAGS) \
$(VISIBILITY_CFLAGS) \
-DGALLIUM_RBUG \
@@ -208,6 +209,10 @@ AM_CPPFLAGS += -DGALLIUM_LLVMPIPE
pipe_swrast_la_LIBADD += \
$(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la
endif
+
+pipe_swrast_la_LIBADD += \
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+
endif
EXTRA_DIST = pipe.sym
diff --git a/src/gallium/targets/pipe-loader/pipe.sym b/src/gallium/targets/pipe-loader/pipe.sym
index 19b1d77b040..b2fa619f7de 100644
--- a/src/gallium/targets/pipe-loader/pipe.sym
+++ b/src/gallium/targets/pipe-loader/pipe.sym
@@ -1,7 +1,7 @@
{
global:
driver_descriptor;
- swrast_create_screen;
+ swrast_driver_descriptor;
local:
*;
};
diff --git a/src/gallium/targets/pipe-loader/pipe_swrast.c b/src/gallium/targets/pipe-loader/pipe_swrast.c
index f7f354acf3f..cf617f37e20 100644
--- a/src/gallium/targets/pipe-loader/pipe_swrast.c
+++ b/src/gallium/targets/pipe-loader/pipe_swrast.c
@@ -1,7 +1,11 @@
#include "target-helpers/inline_sw_helper.h"
#include "target-helpers/inline_debug_helper.h"
-#include "state_tracker/drm_driver.h"
+#include "state_tracker/sw_driver.h"
+#include "sw/dri/dri_sw_winsys.h"
+#include "sw/kms-dri/kms_dri_sw_winsys.h"
+#include "sw/null/null_sw_winsys.h"
+#include "sw/wrapper/wrapper_sw_winsys.h"
PUBLIC struct pipe_screen *
swrast_create_screen(struct sw_winsys *ws);
@@ -17,3 +21,31 @@ swrast_create_screen(struct sw_winsys *ws)
return screen;
}
+
+PUBLIC
+struct sw_driver_descriptor swrast_driver_descriptor = {
+ .create_screen = swrast_create_screen,
+ .winsys = {
+#ifdef HAVE_PIPE_LOADER_DRI
+ {
+ .name = "dri",
+ .create_winsys = dri_create_sw_winsys,
+ },
+#endif
+#ifdef HAVE_PIPE_LOADER_KMS
+ {
+ .name = "kms_dri",
+ .create_winsys = kms_dri_create_winsys,
+ },
+#endif
+ {
+ .name = "null",
+ .create_winsys = null_sw_create,
+ },
+ {
+ .name = "wrapped",
+ .create_winsys = wrapper_sw_winsys_wrap_pipe_screen,
+ },
+ { 0 },
+ }
+};
diff --git a/src/gallium/targets/va/Makefile.am b/src/gallium/targets/va/Makefile.am
index 9613f041b58..733e7acb455 100644
--- a/src/gallium/targets/va/Makefile.am
+++ b/src/gallium/targets/va/Makefile.am
@@ -40,21 +40,23 @@ if HAVE_GALLIUM_STATIC_TARGETS
TARGET_DRIVERS =
TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
include $(top_srcdir)/src/gallium/drivers/r600/Automake.inc
include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc
gallium_drv_video_la_SOURCES += target.c
gallium_drv_video_la_CPPFLAGS = $(TARGET_CPPFLAGS)
-gallium_drv_video_la_LIBADD += $(TARGET_LIB_DEPS) \
+gallium_drv_video_la_LIBADD += \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+ $(TARGET_LIB_DEPS) \
$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
else # HAVE_GALLIUM_STATIC_TARGETS
gallium_drv_video_la_LIBADD += \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/va/target.c b/src/gallium/targets/va/target.c
index fde4a4a7dcf..42b1346d341 100644
--- a/src/gallium/targets/va/target.c
+++ b/src/gallium/targets/va/target.c
@@ -1 +1 @@
-#include "target-helpers/inline_drm_helper.h"
+#include "target-helpers/drm_helper.h"
diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am
index 7eb62c1cc78..d388f8b5014 100644
--- a/src/gallium/targets/vdpau/Makefile.am
+++ b/src/gallium/targets/vdpau/Makefile.am
@@ -47,7 +47,7 @@ EXTRA_DIST = \
TARGET_DRIVERS =
TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc
@@ -59,14 +59,16 @@ if HAVE_GALLIUM_STATIC_TARGETS
libvdpau_gallium_la_SOURCES += target.c
libvdpau_gallium_la_CPPFLAGS = $(TARGET_CPPFLAGS)
-libvdpau_gallium_la_LIBADD += $(TARGET_LIB_DEPS) \
+libvdpau_gallium_la_LIBADD += \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+ $(TARGET_LIB_DEPS) \
$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
else # HAVE_GALLIUM_STATIC_TARGETS
libvdpau_gallium_la_LIBADD += \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/vdpau/target.c b/src/gallium/targets/vdpau/target.c
index fde4a4a7dcf..42b1346d341 100644
--- a/src/gallium/targets/vdpau/target.c
+++ b/src/gallium/targets/vdpau/target.c
@@ -1 +1 @@
-#include "target-helpers/inline_drm_helper.h"
+#include "target-helpers/drm_helper.h"
diff --git a/src/gallium/targets/xa/Makefile.am b/src/gallium/targets/xa/Makefile.am
index 02c42c665ed..a63fd6903a4 100644
--- a/src/gallium/targets/xa/Makefile.am
+++ b/src/gallium/targets/xa/Makefile.am
@@ -60,7 +60,7 @@ if HAVE_GALLIUM_STATIC_TARGETS
TARGET_DRIVERS =
TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc
@@ -74,13 +74,15 @@ include $(top_srcdir)/src/gallium/drivers/freedreno/Automake.inc
libxatracker_la_SOURCES += target.c
libxatracker_la_CPPFLAGS = $(TARGET_CPPFLAGS)
-libxatracker_la_LIBADD += $(TARGET_LIB_DEPS)
+libxatracker_la_LIBADD += \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+ $(TARGET_LIB_DEPS)
else # HAVE_GALLIUM_STATIC_TARGETS
libxatracker_la_LIBADD += \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/xa/target.c b/src/gallium/targets/xa/target.c
index fde4a4a7dcf..42b1346d341 100644
--- a/src/gallium/targets/xa/target.c
+++ b/src/gallium/targets/xa/target.c
@@ -1 +1 @@
-#include "target-helpers/inline_drm_helper.h"
+#include "target-helpers/drm_helper.h"
diff --git a/src/gallium/targets/xvmc/Makefile.am b/src/gallium/targets/xvmc/Makefile.am
index b3285890822..fdc5f4b7318 100644
--- a/src/gallium/targets/xvmc/Makefile.am
+++ b/src/gallium/targets/xvmc/Makefile.am
@@ -38,7 +38,7 @@ EXTRA_DIST = xvmc.sym
TARGET_DRIVERS =
TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc
@@ -48,13 +48,15 @@ if HAVE_GALLIUM_STATIC_TARGETS
libXvMCgallium_la_SOURCES += target.c
libXvMCgallium_la_CPPFLAGS = $(TARGET_CPPFLAGS)
-libXvMCgallium_la_LIBADD += $(TARGET_LIB_DEPS) \
+libXvMCgallium_la_LIBADD += \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+ $(TARGET_LIB_DEPS) \
$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
else # HAVE_GALLIUM_STATIC_TARGETS
libXvMCgallium_la_LIBADD += \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/xvmc/target.c b/src/gallium/targets/xvmc/target.c
index fde4a4a7dcf..42b1346d341 100644
--- a/src/gallium/targets/xvmc/target.c
+++ b/src/gallium/targets/xvmc/target.c
@@ -1 +1 @@
-#include "target-helpers/inline_drm_helper.h"
+#include "target-helpers/drm_helper.h"
diff --git a/src/gallium/tests/trivial/Makefile.am b/src/gallium/tests/trivial/Makefile.am
index 56b7f3ffc66..585fb699e6c 100644
--- a/src/gallium/tests/trivial/Makefile.am
+++ b/src/gallium/tests/trivial/Makefile.am
@@ -5,17 +5,10 @@ PIPE_SRC_DIR = $(top_builddir)/src/gallium/targets/pipe-loader
AM_CFLAGS = \
$(GALLIUM_CFLAGS)
-AM_CPPFLAGS = \
- -I$(top_srcdir)/src/gallium/drivers \
- -I$(top_srcdir)/src/gallium/winsys \
- -DPIPE_SEARCH_DIR=\"$(PIPE_SRC_DIR)/.libs\" \
- $(GALLIUM_PIPE_LOADER_DEFINES)
-
LDADD = \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
$(GALLIUM_COMMON_LIB_DEPS)
noinst_PROGRAMS = compute tri quad-tex
diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c
index b344f78b25c..bcdfb11c4f1 100644
--- a/src/gallium/tests/trivial/compute.c
+++ b/src/gallium/tests/trivial/compute.c
@@ -74,7 +74,7 @@ static void init_ctx(struct context *ctx)
ret = pipe_loader_probe(&ctx->dev, 1);
assert(ret);
- ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR);
+ ctx->screen = pipe_loader_create_screen(ctx->dev);
assert(ctx->screen);
ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0);
diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c
index f66f63043da..4c5a9200a52 100644
--- a/src/gallium/tests/trivial/quad-tex.c
+++ b/src/gallium/tests/trivial/quad-tex.c
@@ -96,7 +96,7 @@ static void init_prog(struct program *p)
assert(ret);
/* init a pipe screen */
- p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR);
+ p->screen = pipe_loader_create_screen(p->dev);
assert(p->screen);
/* create the pipe driver context and cso context */
diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c
index a555200842e..c71a63f44e5 100644
--- a/src/gallium/tests/trivial/tri.c
+++ b/src/gallium/tests/trivial/tri.c
@@ -91,7 +91,7 @@ static void init_prog(struct program *p)
assert(ret);
/* init a pipe screen */
- p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR);
+ p->screen = pipe_loader_create_screen(p->dev);
assert(p->screen);
/* create the pipe driver context and cso context */
diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
index 6898fb0d492..59cc8577a6e 100644
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -38,7 +38,8 @@ LOCAL_C_INCLUDES += \
$(MESA_TOP)/src/glsl/nir
LOCAL_EXPORT_C_INCLUDE_DIRS += \
- $(intermediates)/nir
+ $(intermediates)/nir \
+ $(MESA_TOP)/src/glsl/nir
LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
$(LIBGLCPP_GENERATED_FILES) \
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 957fd6b90ba..0c9fd75d206 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -22,10 +22,12 @@ NIR_FILES = \
nir/glsl_to_nir.h \
nir/glsl_types.cpp \
nir/glsl_types.h \
+ nir/builtin_type_macros.h \
nir/nir.c \
nir/nir.h \
nir/nir_array.h \
nir/nir_builder.h \
+ nir/nir_clone.c \
nir/nir_constant_expressions.h \
nir/nir_control_flow.c \
nir/nir_control_flow.h \
@@ -102,7 +104,6 @@ LIBGLSL_FILES = \
blob.c \
blob.h \
builtin_functions.cpp \
- builtin_type_macros.h \
builtin_types.cpp \
builtin_variables.cpp \
glsl_parser_extras.cpp \
diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 1b75234d578..3bea63ea0ed 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -336,7 +336,7 @@ public:
array_dimensions.push_tail(&dim->link);
}
- const bool is_single_dimension()
+ bool is_single_dimension() const
{
return this->array_dimensions.tail_pred->prev != NULL &&
this->array_dimensions.tail_pred->prev->is_head_sentinel();
@@ -350,6 +350,26 @@ public:
exec_list array_dimensions;
};
+class ast_layout_expression : public ast_node {
+public:
+ ast_layout_expression(const struct YYLTYPE &locp, ast_expression *expr)
+ {
+ set_location(locp);
+ layout_const_expressions.push_tail(&expr->link);
+ }
+
+ bool process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+ const char *qual_indentifier,
+ unsigned *value, bool can_be_zero);
+
+ void merge_qualifier(ast_layout_expression *l_expr)
+ {
+ layout_const_expressions.append_list(&l_expr->layout_const_expressions);
+ }
+
+ exec_list layout_const_expressions;
+};
+
/**
* C-style aggregate initialization class
*
@@ -558,7 +578,7 @@ struct ast_type_qualifier {
unsigned precision:2;
/** Geometry shader invocations for GL_ARB_gpu_shader5. */
- int invocations;
+ ast_layout_expression *invocations;
/**
* Location specified via GL_ARB_explicit_attrib_location layout
@@ -566,20 +586,20 @@ struct ast_type_qualifier {
* \note
* This field is only valid if \c explicit_location is set.
*/
- int location;
+ ast_expression *location;
/**
* Index specified via GL_ARB_explicit_attrib_location layout
*
* \note
* This field is only valid if \c explicit_index is set.
*/
- int index;
+ ast_expression *index;
/** Maximum output vertices in GLSL 1.50 geometry shaders. */
- int max_vertices;
+ ast_layout_expression *max_vertices;
/** Stream in GLSL 1.50 geometry shaders. */
- unsigned stream;
+ ast_expression *stream;
/**
* Input or output primitive type in GLSL 1.50 geometry shaders
@@ -593,7 +613,7 @@ struct ast_type_qualifier {
* \note
* This field is only valid if \c explicit_binding is set.
*/
- int binding;
+ ast_expression *binding;
/**
* Offset specified via GL_ARB_shader_atomic_counter's "offset"
@@ -602,14 +622,14 @@ struct ast_type_qualifier {
* \note
* This field is only valid if \c explicit_offset is set.
*/
- int offset;
+ ast_expression *offset;
/**
* Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}"
* layout qualifier. Element i of this array is only valid if
* flags.q.local_size & (1 << i) is set.
*/
- int local_size[3];
+ ast_layout_expression *local_size[3];
/** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */
GLenum vertex_spacing;
@@ -621,7 +641,7 @@ struct ast_type_qualifier {
bool point_mode;
/** Tessellation control shader: number of output vertices */
- int vertices;
+ ast_layout_expression *vertices;
/**
* Image format specified with an ARB_shader_image_load_store
@@ -752,7 +772,7 @@ public:
class ast_fully_specified_type : public ast_node {
public:
virtual void print(void) const;
- bool has_qualifiers() const;
+ bool has_qualifiers(_mesa_glsl_parse_state *state) const;
ast_fully_specified_type() : qualifier(), specifier(NULL)
{
@@ -1093,17 +1113,13 @@ public:
class ast_tcs_output_layout : public ast_node
{
public:
- ast_tcs_output_layout(const struct YYLTYPE &locp, int vertices)
- : vertices(vertices)
+ ast_tcs_output_layout(const struct YYLTYPE &locp)
{
set_location(locp);
}
virtual ir_rvalue *hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state);
-
-private:
- const int vertices;
};
@@ -1135,9 +1151,12 @@ private:
class ast_cs_input_layout : public ast_node
{
public:
- ast_cs_input_layout(const struct YYLTYPE &locp, const unsigned *local_size)
+ ast_cs_input_layout(const struct YYLTYPE &locp,
+ ast_layout_expression **local_size)
{
- memcpy(this->local_size, local_size, sizeof(this->local_size));
+ for (int i = 0; i < 3; i++) {
+ this->local_size[i] = local_size[i];
+ }
set_location(locp);
}
@@ -1145,7 +1164,7 @@ public:
struct _mesa_glsl_parse_state *state);
private:
- unsigned local_size[3];
+ ast_layout_expression *local_size[3];
};
/*@}*/
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 65db2618895..52881a4da7a 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2491,7 +2491,7 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
"uniform block layout qualifiers row_major and "
"column_major may not be applied to variables "
"outside of uniform blocks");
- } else if (!type->is_matrix()) {
+ } else if (!type->without_array()->is_matrix()) {
/* The OpenGL ES 3.0 conformance tests did not originally allow
* matrix layout qualifiers on non-matrices. However, the OpenGL
* 4.4 and OpenGL ES 3.0 (revision TBD) specifications were
@@ -2502,39 +2502,88 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
"uniform block layout qualifiers row_major and "
"column_major applied to non-matrix types may "
"be rejected by older compilers");
- } else if (type->is_record()) {
- /* We allow 'layout(row_major)' on structure types because it's the only
- * way to get row-major layouts on matrices contained in structures.
- */
- _mesa_glsl_warning(loc, state,
- "uniform block layout qualifiers row_major and "
- "column_major applied to structure types is not "
- "strictly conformant and may be rejected by other "
- "compilers");
}
}
static bool
-validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
+process_qualifier_constant(struct _mesa_glsl_parse_state *state,
YYLTYPE *loc,
- const glsl_type *type,
- const ast_type_qualifier *qual)
+ const char *qual_indentifier,
+ ast_expression *const_expression,
+ unsigned *value)
+{
+ exec_list dummy_instructions;
+
+ if (const_expression == NULL) {
+ *value = 0;
+ return true;
+ }
+
+ ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
+
+ ir_constant *const const_int = ir->constant_expression_value();
+ if (const_int == NULL || !const_int->type->is_integer()) {
+ _mesa_glsl_error(loc, state, "%s must be an integral constant "
+ "expression", qual_indentifier);
+ return false;
+ }
+
+ if (const_int->value.i[0] < 0) {
+ _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)",
+ qual_indentifier, const_int->value.u[0]);
+ return false;
+ }
+
+ /* If the location is const (and we've verified that
+ * it is) then no instructions should have been emitted
+ * when we converted it to HIR. If they were emitted,
+ * then either the location isn't const after all, or
+ * we are emitting unnecessary instructions.
+ */
+ assert(dummy_instructions.is_empty());
+
+ *value = const_int->value.u[0];
+ return true;
+}
+
+static bool
+validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state,
+ unsigned stream)
+{
+ if (stream >= state->ctx->Const.MaxVertexStreams) {
+ _mesa_glsl_error(loc, state,
+ "invalid stream specified %d is larger than "
+ "MAX_VERTEX_STREAMS - 1 (%d).",
+ stream, state->ctx->Const.MaxVertexStreams - 1);
+ return false;
+ }
+
+ return true;
+}
+
+static void
+apply_explicit_binding(struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc,
+ ir_variable *var,
+ const glsl_type *type,
+ const ast_type_qualifier *qual)
{
if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
_mesa_glsl_error(loc, state,
"the \"binding\" qualifier only applies to uniforms and "
"shader storage buffer objects");
- return false;
+ return;
}
- if (qual->binding < 0) {
- _mesa_glsl_error(loc, state, "binding values must be >= 0");
- return false;
+ unsigned qual_binding;
+ if (!process_qualifier_constant(state, loc, "binding", qual->binding,
+ &qual_binding)) {
+ return;
}
const struct gl_context *const ctx = state->ctx;
unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
- unsigned max_index = qual->binding + elements - 1;
+ unsigned max_index = qual_binding + elements - 1;
const glsl_type *base_type = type->without_array();
if (base_type->is_interface()) {
@@ -2550,11 +2599,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
*/
if (qual->flags.q.uniform &&
max_index >= ctx->Const.MaxUniformBufferBindings) {
- _mesa_glsl_error(loc, state, "layout(binding = %d) for %d UBOs exceeds "
+ _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds "
"the maximum number of UBO binding points (%d)",
- qual->binding, elements,
+ qual_binding, elements,
ctx->Const.MaxUniformBufferBindings);
- return false;
+ return;
}
/* SSBOs. From page 67 of the GLSL 4.30 specification:
@@ -2568,11 +2617,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
*/
if (qual->flags.q.buffer &&
max_index >= ctx->Const.MaxShaderStorageBufferBindings) {
- _mesa_glsl_error(loc, state, "layout(binding = %d) for %d SSBOs exceeds "
+ _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds "
"the maximum number of SSBO binding points (%d)",
- qual->binding, elements,
+ qual_binding, elements,
ctx->Const.MaxShaderStorageBufferBindings);
- return false;
+ return;
}
} else if (base_type->is_sampler()) {
/* Samplers. From page 63 of the GLSL 4.20 specification:
@@ -2587,19 +2636,19 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
if (max_index >= limit) {
_mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers "
"exceeds the maximum number of texture image units "
- "(%d)", qual->binding, elements, limit);
+ "(%u)", qual_binding, elements, limit);
- return false;
+ return;
}
} else if (base_type->contains_atomic()) {
assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS);
- if (unsigned(qual->binding) >= ctx->Const.MaxAtomicBufferBindings) {
+ if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) {
_mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the "
" maximum number of atomic counter buffer bindings"
- "(%d)", qual->binding,
+ "(%u)", qual_binding,
ctx->Const.MaxAtomicBufferBindings);
- return false;
+ return;
}
} else if (state->is_version(420, 310) && base_type->is_image()) {
assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
@@ -2607,17 +2656,20 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
_mesa_glsl_error(loc, state, "Image binding %d exceeds the "
" maximum number of image units (%d)", max_index,
ctx->Const.MaxImageUnits);
- return false;
+ return;
}
} else {
_mesa_glsl_error(loc, state,
"the \"binding\" qualifier only applies to uniform "
"blocks, opaque variables, or arrays thereof");
- return false;
+ return;
}
- return true;
+ var->data.explicit_binding = true;
+ var->data.binding = qual_binding;
+
+ return;
}
@@ -2660,20 +2712,26 @@ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual,
static void
-validate_explicit_location(const struct ast_type_qualifier *qual,
- ir_variable *var,
- struct _mesa_glsl_parse_state *state,
- YYLTYPE *loc)
+apply_explicit_location(const struct ast_type_qualifier *qual,
+ ir_variable *var,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc)
{
bool fail = false;
+ unsigned qual_location;
+ if (!process_qualifier_constant(state, loc, "location", qual->location,
+ &qual_location)) {
+ return;
+ }
+
/* Checks for GL_ARB_explicit_uniform_location. */
if (qual->flags.q.uniform) {
if (!state->check_explicit_uniform_location_allowed(loc, var))
return;
const struct gl_context *const ctx = state->ctx;
- unsigned max_loc = qual->location + var->type->uniform_locations() - 1;
+ unsigned max_loc = qual_location + var->type->uniform_locations() - 1;
if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
_mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
@@ -2683,7 +2741,7 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
}
var->data.explicit_location = true;
- var->data.location = qual->location;
+ var->data.location = qual_location;
return;
}
@@ -2768,30 +2826,40 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
switch (state->stage) {
case MESA_SHADER_VERTEX:
var->data.location = (var->data.mode == ir_var_shader_in)
- ? (qual->location + VERT_ATTRIB_GENERIC0)
- : (qual->location + VARYING_SLOT_VAR0);
+ ? (qual_location + VERT_ATTRIB_GENERIC0)
+ : (qual_location + VARYING_SLOT_VAR0);
break;
case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
if (var->data.patch)
- var->data.location = qual->location + VARYING_SLOT_PATCH0;
+ var->data.location = qual_location + VARYING_SLOT_PATCH0;
else
- var->data.location = qual->location + VARYING_SLOT_VAR0;
+ var->data.location = qual_location + VARYING_SLOT_VAR0;
break;
case MESA_SHADER_FRAGMENT:
var->data.location = (var->data.mode == ir_var_shader_out)
- ? (qual->location + FRAG_RESULT_DATA0)
- : (qual->location + VARYING_SLOT_VAR0);
+ ? (qual_location + FRAG_RESULT_DATA0)
+ : (qual_location + VARYING_SLOT_VAR0);
break;
case MESA_SHADER_COMPUTE:
assert(!"Unexpected shader type");
break;
}
- if (qual->flags.q.explicit_index) {
+ /* Check if index was set for the uniform instead of the function */
+ if (qual->flags.q.explicit_index && qual->flags.q.subroutine) {
+ _mesa_glsl_error(loc, state, "an index qualifier can only be "
+ "used with subroutine functions");
+ return;
+ }
+
+ unsigned qual_index;
+ if (qual->flags.q.explicit_index &&
+ process_qualifier_constant(state, loc, "index", qual->index,
+ &qual_index)) {
/* From the GLSL 4.30 specification, section 4.4.2 (Output
* Layout Qualifiers):
*
@@ -2801,12 +2869,12 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
* Older specifications don't mandate a behavior; we take
* this as a clarification and always generate the error.
*/
- if (qual->index < 0 || qual->index > 1) {
+ if (qual_index > 1) {
_mesa_glsl_error(loc, state,
"explicit index may only be 0 or 1");
} else {
var->data.explicit_index = true;
- var->data.index = qual->index;
+ var->data.index = qual_index;
}
}
}
@@ -2939,6 +3007,221 @@ validate_array_dimensions(const glsl_type *t,
}
static void
+apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
+ ir_variable *var,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc)
+{
+ if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) {
+
+ /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says:
+ *
+ * "Within any shader, the first redeclarations of gl_FragCoord
+ * must appear before any use of gl_FragCoord."
+ *
+ * Generate a compiler error if above condition is not met by the
+ * fragment shader.
+ */
+ ir_variable *earlier = state->symbols->get_variable("gl_FragCoord");
+ if (earlier != NULL &&
+ earlier->data.used &&
+ !state->fs_redeclares_gl_fragcoord) {
+ _mesa_glsl_error(loc, state,
+ "gl_FragCoord used before its first redeclaration "
+ "in fragment shader");
+ }
+
+ /* Make sure all gl_FragCoord redeclarations specify the same layout
+ * qualifiers.
+ */
+ if (is_conflicting_fragcoord_redeclaration(state, qual)) {
+ const char *const qual_string =
+ get_layout_qualifier_string(qual->flags.q.origin_upper_left,
+ qual->flags.q.pixel_center_integer);
+
+ const char *const state_string =
+ get_layout_qualifier_string(state->fs_origin_upper_left,
+ state->fs_pixel_center_integer);
+
+ _mesa_glsl_error(loc, state,
+ "gl_FragCoord redeclared with different layout "
+ "qualifiers (%s) and (%s) ",
+ state_string,
+ qual_string);
+ }
+ state->fs_origin_upper_left = qual->flags.q.origin_upper_left;
+ state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer;
+ state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers =
+ !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer;
+ state->fs_redeclares_gl_fragcoord =
+ state->fs_origin_upper_left ||
+ state->fs_pixel_center_integer ||
+ state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
+ }
+
+ var->data.pixel_center_integer = qual->flags.q.pixel_center_integer;
+ var->data.origin_upper_left = qual->flags.q.origin_upper_left;
+ if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer)
+ && (strcmp(var->name, "gl_FragCoord") != 0)) {
+ const char *const qual_string = (qual->flags.q.origin_upper_left)
+ ? "origin_upper_left" : "pixel_center_integer";
+
+ _mesa_glsl_error(loc, state,
+ "layout qualifier `%s' can only be applied to "
+ "fragment shader input `gl_FragCoord'",
+ qual_string);
+ }
+
+ if (qual->flags.q.explicit_location) {
+ apply_explicit_location(qual, var, state, loc);
+ } else if (qual->flags.q.explicit_index) {
+ if (!qual->flags.q.subroutine_def)
+ _mesa_glsl_error(loc, state,
+ "explicit index requires explicit location");
+ }
+
+ if (qual->flags.q.explicit_binding) {
+ apply_explicit_binding(state, loc, var, var->type, qual);
+ }
+
+ if (state->stage == MESA_SHADER_GEOMETRY &&
+ qual->flags.q.out && qual->flags.q.stream) {
+ unsigned qual_stream;
+ if (process_qualifier_constant(state, loc, "stream", qual->stream,
+ &qual_stream) &&
+ validate_stream_qualifier(loc, state, qual_stream)) {
+ var->data.stream = qual_stream;
+ }
+ }
+
+ if (var->type->contains_atomic()) {
+ if (var->data.mode == ir_var_uniform) {
+ if (var->data.explicit_binding) {
+ unsigned *offset =
+ &state->atomic_counter_offsets[var->data.binding];
+
+ if (*offset % ATOMIC_COUNTER_SIZE)
+ _mesa_glsl_error(loc, state,
+ "misaligned atomic counter offset");
+
+ var->data.atomic.offset = *offset;
+ *offset += var->type->atomic_size();
+
+ } else {
+ _mesa_glsl_error(loc, state,
+ "atomic counters require explicit binding point");
+ }
+ } else if (var->data.mode != ir_var_function_in) {
+ _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
+ "function parameters or uniform-qualified "
+ "global variables");
+ }
+ }
+
+ /* Is the 'layout' keyword used with parameters that allow relaxed checking.
+ * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
+ * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
+ * allowed the layout qualifier to be used with 'varying' and 'attribute'.
+ * These extensions and all following extensions that add the 'layout'
+ * keyword have been modified to require the use of 'in' or 'out'.
+ *
+ * The following extension do not allow the deprecated keywords:
+ *
+ * GL_AMD_conservative_depth
+ * GL_ARB_conservative_depth
+ * GL_ARB_gpu_shader5
+ * GL_ARB_separate_shader_objects
+ * GL_ARB_tessellation_shader
+ * GL_ARB_transform_feedback3
+ * GL_ARB_uniform_buffer_object
+ *
+ * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5
+ * allow layout with the deprecated keywords.
+ */
+ const bool relaxed_layout_qualifier_checking =
+ state->ARB_fragment_coord_conventions_enable;
+
+ const bool uses_deprecated_qualifier = qual->flags.q.attribute
+ || qual->flags.q.varying;
+ if (qual->has_layout() && uses_deprecated_qualifier) {
+ if (relaxed_layout_qualifier_checking) {
+ _mesa_glsl_warning(loc, state,
+ "`layout' qualifier may not be used with "
+ "`attribute' or `varying'");
+ } else {
+ _mesa_glsl_error(loc, state,
+ "`layout' qualifier may not be used with "
+ "`attribute' or `varying'");
+ }
+ }
+
+ /* Layout qualifiers for gl_FragDepth, which are enabled by extension
+ * AMD_conservative_depth.
+ */
+ int depth_layout_count = qual->flags.q.depth_any
+ + qual->flags.q.depth_greater
+ + qual->flags.q.depth_less
+ + qual->flags.q.depth_unchanged;
+ if (depth_layout_count > 0
+ && !state->AMD_conservative_depth_enable
+ && !state->ARB_conservative_depth_enable) {
+ _mesa_glsl_error(loc, state,
+ "extension GL_AMD_conservative_depth or "
+ "GL_ARB_conservative_depth must be enabled "
+ "to use depth layout qualifiers");
+ } else if (depth_layout_count > 0
+ && strcmp(var->name, "gl_FragDepth") != 0) {
+ _mesa_glsl_error(loc, state,
+ "depth layout qualifiers can be applied only to "
+ "gl_FragDepth");
+ } else if (depth_layout_count > 1
+ && strcmp(var->name, "gl_FragDepth") == 0) {
+ _mesa_glsl_error(loc, state,
+ "at most one depth layout qualifier can be applied to "
+ "gl_FragDepth");
+ }
+ if (qual->flags.q.depth_any)
+ var->data.depth_layout = ir_depth_layout_any;
+ else if (qual->flags.q.depth_greater)
+ var->data.depth_layout = ir_depth_layout_greater;
+ else if (qual->flags.q.depth_less)
+ var->data.depth_layout = ir_depth_layout_less;
+ else if (qual->flags.q.depth_unchanged)
+ var->data.depth_layout = ir_depth_layout_unchanged;
+ else
+ var->data.depth_layout = ir_depth_layout_none;
+
+ if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
+ qual->flags.q.packed ||
+ qual->flags.q.shared) {
+ _mesa_glsl_error(loc, state,
+ "uniform and shader storage block layout qualifiers "
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform or shader storage blocks, not "
+ "members");
+ }
+
+ if (qual->flags.q.row_major || qual->flags.q.column_major) {
+ validate_matrix_layout_for_type(state, loc, var->type, var);
+ }
+
+ /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader
+ * Inputs):
+ *
+ * "Fragment shaders also allow the following layout qualifier on in only
+ * (not with variable declarations)
+ * layout-qualifier-id
+ * early_fragment_tests
+ * [...]"
+ */
+ if (qual->flags.q.early_fragment_tests) {
+ _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only "
+ "valid in fragment shader input layout declaration.");
+ }
+}
+
+static void
apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
ir_variable *var,
struct _mesa_glsl_parse_state *state,
@@ -2992,11 +3275,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
select_gles_precision(qual->precision, var->type, state, loc);
}
- if (state->stage == MESA_SHADER_GEOMETRY &&
- qual->flags.q.out && qual->flags.q.stream) {
- var->data.stream = qual->stream;
- }
-
if (qual->flags.q.patch)
var->data.patch = 1;
@@ -3136,102 +3414,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode,
state, loc);
- var->data.pixel_center_integer = qual->flags.q.pixel_center_integer;
- var->data.origin_upper_left = qual->flags.q.origin_upper_left;
- if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer)
- && (strcmp(var->name, "gl_FragCoord") != 0)) {
- const char *const qual_string = (qual->flags.q.origin_upper_left)
- ? "origin_upper_left" : "pixel_center_integer";
-
- _mesa_glsl_error(loc, state,
- "layout qualifier `%s' can only be applied to "
- "fragment shader input `gl_FragCoord'",
- qual_string);
- }
-
- if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) {
-
- /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says:
- *
- * "Within any shader, the first redeclarations of gl_FragCoord
- * must appear before any use of gl_FragCoord."
- *
- * Generate a compiler error if above condition is not met by the
- * fragment shader.
- */
- ir_variable *earlier = state->symbols->get_variable("gl_FragCoord");
- if (earlier != NULL &&
- earlier->data.used &&
- !state->fs_redeclares_gl_fragcoord) {
- _mesa_glsl_error(loc, state,
- "gl_FragCoord used before its first redeclaration "
- "in fragment shader");
- }
-
- /* Make sure all gl_FragCoord redeclarations specify the same layout
- * qualifiers.
- */
- if (is_conflicting_fragcoord_redeclaration(state, qual)) {
- const char *const qual_string =
- get_layout_qualifier_string(qual->flags.q.origin_upper_left,
- qual->flags.q.pixel_center_integer);
-
- const char *const state_string =
- get_layout_qualifier_string(state->fs_origin_upper_left,
- state->fs_pixel_center_integer);
-
- _mesa_glsl_error(loc, state,
- "gl_FragCoord redeclared with different layout "
- "qualifiers (%s) and (%s) ",
- state_string,
- qual_string);
- }
- state->fs_origin_upper_left = qual->flags.q.origin_upper_left;
- state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer;
- state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers =
- !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer;
- state->fs_redeclares_gl_fragcoord =
- state->fs_origin_upper_left ||
- state->fs_pixel_center_integer ||
- state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
- }
-
- if (qual->flags.q.explicit_location) {
- validate_explicit_location(qual, var, state, loc);
- } else if (qual->flags.q.explicit_index) {
- _mesa_glsl_error(loc, state, "explicit index requires explicit location");
- }
-
- if (qual->flags.q.explicit_binding &&
- validate_binding_qualifier(state, loc, var->type, qual)) {
- var->data.explicit_binding = true;
- var->data.binding = qual->binding;
- }
-
- if (var->type->contains_atomic()) {
- if (var->data.mode == ir_var_uniform) {
- if (var->data.explicit_binding) {
- unsigned *offset =
- &state->atomic_counter_offsets[var->data.binding];
-
- if (*offset % ATOMIC_COUNTER_SIZE)
- _mesa_glsl_error(loc, state,
- "misaligned atomic counter offset");
-
- var->data.atomic.offset = *offset;
- *offset += var->type->atomic_size();
-
- } else {
- _mesa_glsl_error(loc, state,
- "atomic counters require explicit binding point");
- }
- } else if (var->data.mode != ir_var_function_in) {
- _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
- "function parameters or uniform-qualified "
- "global variables");
- }
- }
-
/* Does the declaration use the deprecated 'attribute' or 'varying'
* keywords?
*/
@@ -3267,114 +3449,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
"`out' or `varying' variables between shader stages");
}
-
- /* Is the 'layout' keyword used with parameters that allow relaxed checking.
- * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
- * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
- * allowed the layout qualifier to be used with 'varying' and 'attribute'.
- * These extensions and all following extensions that add the 'layout'
- * keyword have been modified to require the use of 'in' or 'out'.
- *
- * The following extension do not allow the deprecated keywords:
- *
- * GL_AMD_conservative_depth
- * GL_ARB_conservative_depth
- * GL_ARB_gpu_shader5
- * GL_ARB_separate_shader_objects
- * GL_ARB_tessellation_shader
- * GL_ARB_transform_feedback3
- * GL_ARB_uniform_buffer_object
- *
- * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5
- * allow layout with the deprecated keywords.
- */
- const bool relaxed_layout_qualifier_checking =
- state->ARB_fragment_coord_conventions_enable;
-
- if (qual->has_layout() && uses_deprecated_qualifier) {
- if (relaxed_layout_qualifier_checking) {
- _mesa_glsl_warning(loc, state,
- "`layout' qualifier may not be used with "
- "`attribute' or `varying'");
- } else {
- _mesa_glsl_error(loc, state,
- "`layout' qualifier may not be used with "
- "`attribute' or `varying'");
- }
- }
-
- /* Layout qualifiers for gl_FragDepth, which are enabled by extension
- * AMD_conservative_depth.
- */
- int depth_layout_count = qual->flags.q.depth_any
- + qual->flags.q.depth_greater
- + qual->flags.q.depth_less
- + qual->flags.q.depth_unchanged;
- if (depth_layout_count > 0
- && !state->AMD_conservative_depth_enable
- && !state->ARB_conservative_depth_enable) {
- _mesa_glsl_error(loc, state,
- "extension GL_AMD_conservative_depth or "
- "GL_ARB_conservative_depth must be enabled "
- "to use depth layout qualifiers");
- } else if (depth_layout_count > 0
- && strcmp(var->name, "gl_FragDepth") != 0) {
- _mesa_glsl_error(loc, state,
- "depth layout qualifiers can be applied only to "
- "gl_FragDepth");
- } else if (depth_layout_count > 1
- && strcmp(var->name, "gl_FragDepth") == 0) {
- _mesa_glsl_error(loc, state,
- "at most one depth layout qualifier can be applied to "
- "gl_FragDepth");
- }
- if (qual->flags.q.depth_any)
- var->data.depth_layout = ir_depth_layout_any;
- else if (qual->flags.q.depth_greater)
- var->data.depth_layout = ir_depth_layout_greater;
- else if (qual->flags.q.depth_less)
- var->data.depth_layout = ir_depth_layout_less;
- else if (qual->flags.q.depth_unchanged)
- var->data.depth_layout = ir_depth_layout_unchanged;
- else
- var->data.depth_layout = ir_depth_layout_none;
-
- if (qual->flags.q.std140 ||
- qual->flags.q.std430 ||
- qual->flags.q.packed ||
- qual->flags.q.shared) {
- _mesa_glsl_error(loc, state,
- "uniform and shader storage block layout qualifiers "
- "std140, std430, packed, and shared can only be "
- "applied to uniform or shader storage blocks, not "
- "members");
- }
-
if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) {
_mesa_glsl_error(loc, state,
"the shared storage qualifiers can only be used with "
"compute shaders");
}
- if (qual->flags.q.row_major || qual->flags.q.column_major) {
- validate_matrix_layout_for_type(state, loc, var->type, var);
- }
-
apply_image_qualifier_to_variable(qual, var, state, loc);
-
- /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader
- * Inputs):
- *
- * "Fragment shaders also allow the following layout qualifier on in only
- * (not with variable declarations)
- * layout-qualifier-id
- * early_fragment_tests
- * [...]"
- */
- if (qual->flags.q.early_fragment_tests) {
- _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only "
- "valid in fragment shader input layout declaration.");
- }
}
/**
@@ -3798,7 +3879,17 @@ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state,
unsigned num_vertices = 0;
if (state->tcs_output_vertices_specified) {
- num_vertices = state->out_qualifier->vertices;
+ if (!state->out_qualifier->vertices->
+ process_qualifier_constant(state, "vertices",
+ &num_vertices, false)) {
+ return;
+ }
+
+ if (num_vertices > state->Const.MaxPatchVertices) {
+ _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+ "GL_MAX_PATCH_VERTICES", num_vertices);
+ return;
+ }
}
if (!var->type->is_array() && !var->data.patch) {
@@ -4032,9 +4123,18 @@ ast_declarator_list::hir(exec_list *instructions,
*/
if (decl_type && decl_type->contains_atomic()) {
if (type->qualifier.flags.q.explicit_binding &&
- type->qualifier.flags.q.explicit_offset)
- state->atomic_counter_offsets[type->qualifier.binding] =
- type->qualifier.offset;
+ type->qualifier.flags.q.explicit_offset) {
+ unsigned qual_binding;
+ unsigned qual_offset;
+ if (process_qualifier_constant(state, &loc, "binding",
+ type->qualifier.binding,
+ &qual_binding)
+ && process_qualifier_constant(state, &loc, "offset",
+ type->qualifier.offset,
+ &qual_offset)) {
+ state->atomic_counter_offsets[qual_binding] = qual_offset;
+ }
+ }
}
if (this->declarations.is_empty()) {
@@ -4188,6 +4288,8 @@ ast_declarator_list::hir(exec_list *instructions,
apply_type_qualifier_to_variable(& this->type->qualifier, var, state,
& loc, false);
+ apply_layout_qualifier_to_variable(&this->type->qualifier, var, state,
+ &loc);
if (this->type->qualifier.flags.q.invariant) {
if (!is_varying_var(var, state->stage)) {
@@ -4983,7 +5085,7 @@ ast_function::hir(exec_list *instructions,
/* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec:
* "No qualifier is allowed on the return type of a function."
*/
- if (this->return_type->has_qualifiers()) {
+ if (this->return_type->has_qualifiers(state)) {
YYLTYPE loc = this->get_location();
_mesa_glsl_error(& loc, state,
"function `%s' return type has qualifiers", name);
@@ -5115,6 +5217,27 @@ ast_function::hir(exec_list *instructions,
if (this->return_type->qualifier.flags.q.subroutine_def) {
int idx;
+ if (this->return_type->qualifier.flags.q.explicit_index) {
+ unsigned qual_index;
+ if (process_qualifier_constant(state, &loc, "index",
+ this->return_type->qualifier.index,
+ &qual_index)) {
+ if (!state->has_explicit_uniform_location()) {
+ _mesa_glsl_error(&loc, state, "subroutine index requires "
+ "GL_ARB_explicit_uniform_location or "
+ "GLSL 4.30");
+ } else if (qual_index >= MAX_SUBROUTINES) {
+ _mesa_glsl_error(&loc, state,
+ "invalid subroutine index (%d) index must "
+ "be a number between 0 and "
+ "GL_MAX_SUBROUTINES - 1 (%d)", qual_index,
+ MAX_SUBROUTINES - 1);
+ } else {
+ f->subroutine_index = qual_index;
+ }
+ }
+ }
+
f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length();
f->subroutine_types = ralloc_array(state, const struct glsl_type *,
f->num_subroutine_types);
@@ -6046,27 +6169,19 @@ ast_type_specifier::hir(exec_list *instructions,
* stored in \c *fields_ret.
*/
unsigned
-ast_process_structure_or_interface_block(exec_list *instructions,
- struct _mesa_glsl_parse_state *state,
- exec_list *declarations,
- YYLTYPE &loc,
- glsl_struct_field **fields_ret,
- bool is_interface,
- enum glsl_matrix_layout matrix_layout,
- bool allow_reserved_names,
- ir_variable_mode var_mode,
- ast_type_qualifier *layout)
+ast_process_struct_or_iface_block_members(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state,
+ exec_list *declarations,
+ glsl_struct_field **fields_ret,
+ bool is_interface,
+ enum glsl_matrix_layout matrix_layout,
+ bool allow_reserved_names,
+ ir_variable_mode var_mode,
+ ast_type_qualifier *layout,
+ unsigned block_stream)
{
unsigned decl_count = 0;
- /* For blocks that accept memory qualifiers (i.e. shader storage), verify
- * that we don't have incompatible qualifiers
- */
- if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
- _mesa_glsl_error(&loc, state,
- "Interface block sets both readonly and writeonly");
- }
-
/* Make an initial pass over the list of fields to determine how
* many there are. Each element in this list is an ast_declarator_list.
* This means that we actually need to count the number of elements in the
@@ -6087,6 +6202,7 @@ ast_process_structure_or_interface_block(exec_list *instructions,
unsigned i = 0;
foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
const char *type_name;
+ YYLTYPE loc = decl_list->get_location();
decl_list->type->specifier->hir(instructions, state);
@@ -6101,74 +6217,120 @@ ast_process_structure_or_interface_block(exec_list *instructions,
const glsl_type *decl_type =
decl_list->type->glsl_type(& type_name, state);
- foreach_list_typed (ast_declaration, decl, link,
- &decl_list->declarations) {
- if (!allow_reserved_names)
- validate_identifier(decl->identifier, loc, state);
+ const struct ast_type_qualifier *const qual =
+ &decl_list->type->qualifier;
- /* From section 4.3.9 of the GLSL 4.40 spec:
- *
- * "[In interface blocks] opaque types are not allowed."
+ /* From section 4.3.9 of the GLSL 4.40 spec:
+ *
+ * "[In interface blocks] opaque types are not allowed."
+ *
+ * It should be impossible for decl_type to be NULL here. Cases that
+ * might naturally lead to decl_type being NULL, especially for the
+ * is_interface case, will have resulted in compilation having
+ * already halted due to a syntax error.
+ */
+ assert(decl_type);
+
+ if (is_interface && decl_type->contains_opaque()) {
+ _mesa_glsl_error(&loc, state,
+ "uniform/buffer in non-default interface block contains "
+ "opaque variable");
+ }
+
+ if (decl_type->contains_atomic()) {
+ /* From section 4.1.7.3 of the GLSL 4.40 spec:
*
- * It should be impossible for decl_type to be NULL here. Cases that
- * might naturally lead to decl_type being NULL, especially for the
- * is_interface case, will have resulted in compilation having
- * already halted due to a syntax error.
+ * "Members of structures cannot be declared as atomic counter
+ * types."
*/
- assert(decl_type);
+ _mesa_glsl_error(&loc, state, "atomic counter in structure, "
+ "shader storage block or uniform block");
+ }
- if (is_interface && decl_type->contains_opaque()) {
- YYLTYPE loc = decl_list->get_location();
- _mesa_glsl_error(&loc, state,
- "uniform/buffer in non-default interface block contains "
- "opaque variable");
- }
+ if (decl_type->contains_image()) {
+ /* FINISHME: Same problem as with atomic counters.
+ * FINISHME: Request clarification from Khronos and add
+ * FINISHME: spec quotation here.
+ */
+ _mesa_glsl_error(&loc, state,
+ "image in structure, shader storage block or "
+ "uniform block");
+ }
- if (decl_type->contains_atomic()) {
- /* From section 4.1.7.3 of the GLSL 4.40 spec:
- *
- * "Members of structures cannot be declared as atomic counter
- * types."
- */
- YYLTYPE loc = decl_list->get_location();
- _mesa_glsl_error(&loc, state, "atomic counter in structure, "
- "shader storage block or uniform block");
- }
+ if (qual->flags.q.explicit_binding) {
+ _mesa_glsl_error(&loc, state,
+ "binding layout qualifier cannot be applied "
+ "to struct or interface block members");
+ }
- if (decl_type->contains_image()) {
- /* FINISHME: Same problem as with atomic counters.
- * FINISHME: Request clarification from Khronos and add
- * FINISHME: spec quotation here.
- */
- YYLTYPE loc = decl_list->get_location();
- _mesa_glsl_error(&loc, state,
- "image in structure, shader storage block or "
- "uniform block");
+ if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
+ qual->flags.q.packed ||
+ qual->flags.q.shared) {
+ _mesa_glsl_error(&loc, state,
+ "uniform/shader storage block layout qualifiers "
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform/shader storage blocks, not "
+ "members");
+ }
+
+ if (qual->flags.q.constant) {
+ _mesa_glsl_error(&loc, state,
+ "const storage qualifier cannot be applied "
+ "to struct or interface block members");
+ }
+
+ /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
+ *
+ * "A block member may be declared with a stream identifier, but
+ * the specified stream must match the stream associated with the
+ * containing block."
+ */
+ if (qual->flags.q.explicit_stream) {
+ unsigned qual_stream;
+ if (process_qualifier_constant(state, &loc, "stream",
+ qual->stream, &qual_stream) &&
+ qual_stream != block_stream) {
+ _mesa_glsl_error(&loc, state, "stream layout qualifier on "
+ "interface block member does not match "
+ "the interface block (%d vs %d)", qual->stream,
+ block_stream);
}
+ }
- const struct ast_type_qualifier *const qual =
- & decl_list->type->qualifier;
+ if (qual->flags.q.uniform && qual->has_interpolation()) {
+ _mesa_glsl_error(&loc, state,
+ "interpolation qualifiers cannot be used "
+ "with uniform interface blocks");
+ }
- if (qual->flags.q.explicit_binding)
- validate_binding_qualifier(state, &loc, decl_type, qual);
+ if ((qual->flags.q.uniform || !is_interface) &&
+ qual->has_auxiliary_storage()) {
+ _mesa_glsl_error(&loc, state,
+ "auxiliary storage qualifiers cannot be used "
+ "in uniform blocks or structures.");
+ }
- if (qual->flags.q.std140 ||
- qual->flags.q.std430 ||
- qual->flags.q.packed ||
- qual->flags.q.shared) {
+ if (qual->flags.q.row_major || qual->flags.q.column_major) {
+ if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
_mesa_glsl_error(&loc, state,
- "uniform/shader storage block layout qualifiers "
- "std140, std430, packed, and shared can only be "
- "applied to uniform/shader storage blocks, not "
- "members");
- }
+ "row_major and column_major can only be "
+ "applied to interface blocks");
+ } else
+ validate_matrix_layout_for_type(state, &loc, decl_type, NULL);
+ }
- if (qual->flags.q.constant) {
- YYLTYPE loc = decl_list->get_location();
- _mesa_glsl_error(&loc, state,
- "const storage qualifier cannot be applied "
- "to struct or interface block members");
- }
+ if (qual->flags.q.read_only && qual->flags.q.write_only) {
+ _mesa_glsl_error(&loc, state, "buffer variable can't be both "
+ "readonly and writeonly.");
+ }
+
+ foreach_list_typed (ast_declaration, decl, link,
+ &decl_list->declarations) {
+ YYLTYPE loc = decl->get_location();
+
+ if (!allow_reserved_names)
+ validate_identifier(decl->identifier, loc, state);
const struct glsl_type *field_type =
process_array_type(&loc, decl_type, decl->array_specifier, state);
@@ -6183,42 +6345,6 @@ ast_process_structure_or_interface_block(exec_list *instructions,
fields[i].patch = qual->flags.q.patch ? 1 : 0;
fields[i].precision = qual->precision;
- /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
- *
- * "A block member may be declared with a stream identifier, but
- * the specified stream must match the stream associated with the
- * containing block."
- */
- if (qual->flags.q.explicit_stream &&
- qual->stream != layout->stream) {
- _mesa_glsl_error(&loc, state, "stream layout qualifier on "
- "interface block member `%s' does not match "
- "the interface block (%d vs %d)",
- fields[i].name, qual->stream, layout->stream);
- }
-
- if (qual->flags.q.row_major || qual->flags.q.column_major) {
- if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
- _mesa_glsl_error(&loc, state,
- "row_major and column_major can only be "
- "applied to interface blocks");
- } else
- validate_matrix_layout_for_type(state, &loc, field_type, NULL);
- }
-
- if (qual->flags.q.uniform && qual->has_interpolation()) {
- _mesa_glsl_error(&loc, state,
- "interpolation qualifiers cannot be used "
- "with uniform interface blocks");
- }
-
- if ((qual->flags.q.uniform || !is_interface) &&
- qual->has_auxiliary_storage()) {
- _mesa_glsl_error(&loc, state,
- "auxiliary storage qualifiers cannot be used "
- "in uniform blocks or structures.");
- }
-
/* Propogate row- / column-major information down the fields of the
* structure or interface block. Structures need this data because
* the structure may contain a structure that contains ... a matrix
@@ -6248,29 +6374,20 @@ ast_process_structure_or_interface_block(exec_list *instructions,
* be defined inside shader storage buffer objects
*/
if (layout && var_mode == ir_var_shader_storage) {
- if (qual->flags.q.read_only && qual->flags.q.write_only) {
- _mesa_glsl_error(&loc, state,
- "buffer variable `%s' can't be "
- "readonly and writeonly.", fields[i].name);
- }
-
/* For readonly and writeonly qualifiers the field definition,
* if set, overwrites the layout qualifier.
*/
- bool read_only = layout->flags.q.read_only;
- bool write_only = layout->flags.q.write_only;
-
if (qual->flags.q.read_only) {
- read_only = true;
- write_only = false;
+ fields[i].image_read_only = true;
+ fields[i].image_write_only = false;
} else if (qual->flags.q.write_only) {
- read_only = false;
- write_only = true;
+ fields[i].image_read_only = false;
+ fields[i].image_write_only = true;
+ } else {
+ fields[i].image_read_only = layout->flags.q.read_only;
+ fields[i].image_write_only = layout->flags.q.write_only;
}
- fields[i].image_read_only = read_only;
- fields[i].image_write_only = write_only;
-
/* For other qualifiers, we set the flag if either the layout
* qualifier or the field qualifier are set
*/
@@ -6328,16 +6445,16 @@ ast_struct_specifier::hir(exec_list *instructions,
glsl_struct_field *fields;
unsigned decl_count =
- ast_process_structure_or_interface_block(instructions,
- state,
- &this->declarations,
- loc,
- &fields,
- false,
- GLSL_MATRIX_LAYOUT_INHERITED,
- false /* allow_reserved_names */,
- ir_var_auto,
- NULL);
+ ast_process_struct_or_iface_block_members(instructions,
+ state,
+ &this->declarations,
+ &fields,
+ false,
+ GLSL_MATRIX_LAYOUT_INHERITED,
+ false /* allow_reserved_names */,
+ ir_var_auto,
+ NULL,
+ 0 /* for interface only */);
validate_identifier(this->name, loc, state);
@@ -6483,17 +6600,36 @@ ast_interface_block::hir(exec_list *instructions,
*/
state->struct_specifier_depth++;
+ /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+ * that we don't have incompatible qualifiers
+ */
+ if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) {
+ _mesa_glsl_error(&loc, state,
+ "Interface block sets both readonly and writeonly");
+ }
+
+ unsigned qual_stream;
+ if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream,
+ &qual_stream) ||
+ !validate_stream_qualifier(&loc, state, qual_stream)) {
+ /* If the stream qualifier is invalid it doesn't make sense to continue
+ * on and try to compare stream layouts on member variables against it
+ * so just return early.
+ */
+ return NULL;
+ }
+
unsigned int num_variables =
- ast_process_structure_or_interface_block(&declared_variables,
- state,
- &this->declarations,
- loc,
- &fields,
- true,
- matrix_layout,
- redeclaring_per_vertex,
- var_mode,
- &this->layout);
+ ast_process_struct_or_iface_block_members(&declared_variables,
+ state,
+ &this->declarations,
+ &fields,
+ true,
+ matrix_layout,
+ redeclaring_per_vertex,
+ var_mode,
+ &this->layout,
+ qual_stream);
state->struct_specifier_depth--;
@@ -6604,6 +6740,8 @@ ast_interface_block::hir(exec_list *instructions,
earlier_per_vertex->fields.structure[j].sample;
fields[i].patch =
earlier_per_vertex->fields.structure[j].patch;
+ fields[i].precision =
+ earlier_per_vertex->fields.structure[j].precision;
}
}
@@ -6633,8 +6771,6 @@ ast_interface_block::hir(exec_list *instructions,
num_variables,
packing,
this->block_name);
- if (this->layout.flags.q.explicit_binding)
- validate_binding_qualifier(state, &loc, block_type, &this->layout);
if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) {
YYLTYPE loc = this->get_location();
@@ -6765,10 +6901,6 @@ ast_interface_block::hir(exec_list *instructions,
"not allowed");
}
- if (this->layout.flags.q.explicit_binding)
- validate_binding_qualifier(state, &loc, block_array_type,
- &this->layout);
-
var = new(state) ir_variable(block_array_type,
this->instance_name,
var_mode);
@@ -6830,14 +6962,12 @@ ast_interface_block::hir(exec_list *instructions,
earlier->reinit_interface_type(block_type);
delete var;
} else {
- /* Propagate the "binding" keyword into this UBO's fields;
- * the UBO declaration itself doesn't get an ir_variable unless it
- * has an instance name. This is ugly.
- */
- var->data.explicit_binding = this->layout.flags.q.explicit_binding;
- var->data.binding = this->layout.binding;
+ if (this->layout.flags.q.explicit_binding) {
+ apply_explicit_binding(state, &loc, var, var->type,
+ &this->layout);
+ }
- var->data.stream = this->layout.stream;
+ var->data.stream = qual_stream;
state->symbols->add_variable(var);
instructions->push_tail(var);
@@ -6857,7 +6987,7 @@ ast_interface_block::hir(exec_list *instructions,
var->data.centroid = fields[i].centroid;
var->data.sample = fields[i].sample;
var->data.patch = fields[i].patch;
- var->data.stream = this->layout.stream;
+ var->data.stream = qual_stream;
var->init_interface_type(block_type);
if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
@@ -6914,8 +7044,10 @@ ast_interface_block::hir(exec_list *instructions,
* The UBO declaration itself doesn't get an ir_variable unless it
* has an instance name. This is ugly.
*/
- var->data.explicit_binding = this->layout.flags.q.explicit_binding;
- var->data.binding = this->layout.binding;
+ if (this->layout.flags.q.explicit_binding) {
+ apply_explicit_binding(state, &loc, var,
+ var->get_interface_type(), &this->layout);
+ }
if (var->type->is_unsized_array()) {
if (var->is_in_shader_storage_block()) {
@@ -6997,22 +7129,18 @@ ast_tcs_output_layout::hir(exec_list *instructions,
{
YYLTYPE loc = this->get_location();
- /* If any tessellation control output layout declaration preceded this
- * one, make sure it was consistent with this one.
- */
- if (state->tcs_output_vertices_specified &&
- state->out_qualifier->vertices != this->vertices) {
- _mesa_glsl_error(&loc, state,
- "tessellation control shader output layout does not "
- "match previous declaration");
- return NULL;
+ unsigned num_vertices;
+ if (!state->out_qualifier->vertices->
+ process_qualifier_constant(state, "vertices", &num_vertices,
+ false)) {
+ /* return here to stop cascading incorrect error messages */
+ return NULL;
}
/* If any shader outputs occurred before this declaration and specified an
* array size, make sure the size they specified is consistent with the
* primitive type.
*/
- unsigned num_vertices = this->vertices;
if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) {
_mesa_glsl_error(&loc, state,
"this tessellation control shader output layout "
@@ -7120,20 +7248,6 @@ ast_cs_input_layout::hir(exec_list *instructions,
{
YYLTYPE loc = this->get_location();
- /* If any compute input layout declaration preceded this one, make sure it
- * was consistent with this one.
- */
- if (state->cs_input_local_size_specified) {
- for (int i = 0; i < 3; i++) {
- if (state->cs_input_local_size[i] != this->local_size[i]) {
- _mesa_glsl_error(&loc, state,
- "compute shader input layout does not match"
- " previous declaration");
- return NULL;
- }
- }
- }
-
/* From the ARB_compute_shader specification:
*
* If the local size of the shader in any dimension is greater
@@ -7146,15 +7260,30 @@ ast_cs_input_layout::hir(exec_list *instructions,
* report it at compile time as well.
*/
GLuint64 total_invocations = 1;
+ unsigned qual_local_size[3];
for (int i = 0; i < 3; i++) {
- if (this->local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
+
+ char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c",
+ 'x' + i);
+ /* Infer a local_size of 1 for unspecified dimensions */
+ if (this->local_size[i] == NULL) {
+ qual_local_size[i] = 1;
+ } else if (!this->local_size[i]->
+ process_qualifier_constant(state, local_size_str,
+ &qual_local_size[i], false)) {
+ ralloc_free(local_size_str);
+ return NULL;
+ }
+ ralloc_free(local_size_str);
+
+ if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
_mesa_glsl_error(&loc, state,
"local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE"
" (%d)", 'x' + i,
state->ctx->Const.MaxComputeWorkGroupSize[i]);
break;
}
- total_invocations *= this->local_size[i];
+ total_invocations *= qual_local_size[i];
if (total_invocations >
state->ctx->Const.MaxComputeWorkGroupInvocations) {
_mesa_glsl_error(&loc, state,
@@ -7165,9 +7294,23 @@ ast_cs_input_layout::hir(exec_list *instructions,
}
}
+ /* If any compute input layout declaration preceded this one, make sure it
+ * was consistent with this one.
+ */
+ if (state->cs_input_local_size_specified) {
+ for (int i = 0; i < 3; i++) {
+ if (state->cs_input_local_size[i] != qual_local_size[i]) {
+ _mesa_glsl_error(&loc, state,
+ "compute shader input layout does not match"
+ " previous declaration");
+ return NULL;
+ }
+ }
+ }
+
state->cs_input_local_size_specified = true;
for (int i = 0; i < 3; i++)
- state->cs_input_local_size[i] = this->local_size[i];
+ state->cs_input_local_size[i] = qual_local_size[i];
/* We may now declare the built-in constant gl_WorkGroupSize (see
* builtin_variable_generator::generate_constants() for why we didn't
@@ -7182,7 +7325,7 @@ ast_cs_input_layout::hir(exec_list *instructions,
ir_constant_data data;
memset(&data, 0, sizeof(data));
for (int i = 0; i < 3; i++)
- data.u[i] = this->local_size[i];
+ data.u[i] = qual_local_size[i];
var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data);
var->constant_initializer =
new(var) ir_constant(glsl_type::uvec3_type, &data);
@@ -7198,6 +7341,8 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
{
bool gl_FragColor_assigned = false;
bool gl_FragData_assigned = false;
+ bool gl_FragSecondaryColor_assigned = false;
+ bool gl_FragSecondaryData_assigned = false;
bool user_defined_fs_output_assigned = false;
ir_variable *user_defined_fs_output = NULL;
@@ -7215,6 +7360,10 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
gl_FragColor_assigned = true;
else if (strcmp(var->name, "gl_FragData") == 0)
gl_FragData_assigned = true;
+ else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0)
+ gl_FragSecondaryColor_assigned = true;
+ else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0)
+ gl_FragSecondaryData_assigned = true;
else if (!is_gl_identifier(var->name)) {
if (state->stage == MESA_SHADER_FRAGMENT &&
var->data.mode == ir_var_shader_out) {
@@ -7246,11 +7395,29 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
_mesa_glsl_error(&loc, state, "fragment shader writes to both "
"`gl_FragColor' and `%s'",
user_defined_fs_output->name);
+ } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragSecondaryColorEXT' and"
+ " `gl_FragSecondaryDataEXT'");
+ } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragColor' and"
+ " `gl_FragSecondaryDataEXT'");
+ } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragData' and"
+ " `gl_FragSecondaryColorEXT'");
} else if (gl_FragData_assigned && user_defined_fs_output_assigned) {
_mesa_glsl_error(&loc, state, "fragment shader writes to both "
"`gl_FragData' and `%s'",
user_defined_fs_output->name);
}
+
+ if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) &&
+ !state->EXT_blend_func_extended_enable) {
+ _mesa_glsl_error(&loc, state,
+ "Dual source blending requires EXT_blend_func_extended");
+ }
}
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 79134c19893..03ed4dcfa2a 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -38,13 +38,16 @@ ast_type_specifier::print(void) const
}
bool
-ast_fully_specified_type::has_qualifiers() const
+ast_fully_specified_type::has_qualifiers(_mesa_glsl_parse_state *state) const
{
/* 'subroutine' isnt a real qualifier. */
ast_type_qualifier subroutine_only;
subroutine_only.flags.i = 0;
subroutine_only.flags.q.subroutine = 1;
subroutine_only.flags.q.subroutine_def = 1;
+ if (state->has_explicit_uniform_location()) {
+ subroutine_only.flags.q.explicit_index = 1;
+ }
return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0;
}
@@ -169,41 +172,32 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
}
if (q.flags.q.max_vertices) {
- if (this->flags.q.max_vertices && this->max_vertices != q.max_vertices) {
+ if (this->max_vertices) {
+ this->max_vertices->merge_qualifier(q.max_vertices);
+ } else {
+ this->max_vertices = q.max_vertices;
+ }
+ }
+
+ if (q.flags.q.subroutine_def) {
+ if (this->flags.q.subroutine_def) {
_mesa_glsl_error(loc, state,
- "geometry shader set conflicting max_vertices "
- "(%d and %d)", this->max_vertices, q.max_vertices);
- return false;
+ "conflicting subroutine qualifiers used");
+ } else {
+ this->subroutine_list = q.subroutine_list;
}
- this->max_vertices = q.max_vertices;
}
if (q.flags.q.invocations) {
- if (this->flags.q.invocations && this->invocations != q.invocations) {
- _mesa_glsl_error(loc, state,
- "geometry shader set conflicting invocations "
- "(%d and %d)", this->invocations, q.invocations);
- return false;
+ if (this->invocations) {
+ this->invocations->merge_qualifier(q.invocations);
+ } else {
+ this->invocations = q.invocations;
}
- this->invocations = q.invocations;
}
if (state->stage == MESA_SHADER_GEOMETRY &&
state->has_explicit_attrib_stream()) {
- if (q.flags.q.stream && q.stream >= state->ctx->Const.MaxVertexStreams) {
- _mesa_glsl_error(loc, state,
- "`stream' value is larger than MAX_VERTEX_STREAMS - 1 "
- "(%d > %d)",
- q.stream, state->ctx->Const.MaxVertexStreams - 1);
- }
- if (this->flags.q.explicit_stream &&
- this->stream >= state->ctx->Const.MaxVertexStreams) {
- _mesa_glsl_error(loc, state,
- "`stream' value is larger than MAX_VERTEX_STREAMS - 1 "
- "(%d > %d)",
- this->stream, state->ctx->Const.MaxVertexStreams - 1);
- }
-
if (!this->flags.q.explicit_stream) {
if (q.flags.q.stream) {
this->flags.q.stream = 1;
@@ -222,14 +216,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
}
if (q.flags.q.vertices) {
- if (this->flags.q.vertices && this->vertices != q.vertices) {
- _mesa_glsl_error(loc, state,
- "tessellation control shader set conflicting "
- "vertices (%d and %d)",
- this->vertices, q.vertices);
- return false;
+ if (this->vertices) {
+ this->vertices->merge_qualifier(q.vertices);
+ } else {
+ this->vertices = q.vertices;
}
- this->vertices = q.vertices;
}
if (q.flags.q.vertex_spacing) {
@@ -266,15 +257,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
for (int i = 0; i < 3; i++) {
if (q.flags.q.local_size & (1 << i)) {
- if ((this->flags.q.local_size & (1 << i)) &&
- this->local_size[i] != q.local_size[i]) {
- _mesa_glsl_error(loc, state,
- "compute shader set conflicting values for "
- "local_size_%c (%d and %d)", 'x' + i,
- this->local_size[i], q.local_size[i]);
- return false;
+ if (this->local_size[i]) {
+ this->local_size[i]->merge_qualifier(q.local_size[i]);
+ } else {
+ this->local_size[i] = q.local_size[i];
}
- this->local_size[i] = q.local_size[i];
}
}
@@ -313,7 +300,7 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
const bool r = this->merge_qualifier(loc, state, q);
if (state->stage == MESA_SHADER_TESS_CTRL) {
- node = new(mem_ctx) ast_tcs_output_layout(*loc, q.vertices);
+ node = new(mem_ctx) ast_tcs_output_layout(*loc);
}
return r;
@@ -417,15 +404,13 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
state->in_qualifier->prim_type = q.prim_type;
}
- if (this->flags.q.invocations &&
- q.flags.q.invocations &&
- this->invocations != q.invocations) {
- _mesa_glsl_error(loc, state,
- "conflicting invocations counts specified");
- return false;
- } else if (q.flags.q.invocations) {
+ if (q.flags.q.invocations) {
this->flags.q.invocations = 1;
- this->invocations = q.invocations;
+ if (this->invocations) {
+ this->invocations->merge_qualifier(q.invocations);
+ } else {
+ this->invocations = q.invocations;
+ }
}
if (q.flags.q.early_fragment_tests) {
@@ -468,15 +453,67 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
if (create_gs_ast) {
node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type);
} else if (create_cs_ast) {
- /* Infer a local_size of 1 for every unspecified dimension */
- unsigned local_size[3];
- for (int i = 0; i < 3; i++) {
- if (q.flags.q.local_size & (1 << i))
- local_size[i] = q.local_size[i];
- else
- local_size[i] = 1;
+ node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size);
+ }
+
+ return true;
+}
+
+bool
+ast_layout_expression::process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+ const char *qual_indentifier,
+ unsigned *value,
+ bool can_be_zero)
+{
+ int min_value = 0;
+ bool first_pass = true;
+ *value = 0;
+
+ if (!can_be_zero)
+ min_value = 1;
+
+ for (exec_node *node = layout_const_expressions.head;
+ !node->is_tail_sentinel(); node = node->next) {
+
+ exec_list dummy_instructions;
+ ast_node *const_expression = exec_node_data(ast_node, node, link);
+
+ ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
+
+ ir_constant *const const_int = ir->constant_expression_value();
+ if (const_int == NULL || !const_int->type->is_integer()) {
+ YYLTYPE loc = const_expression->get_location();
+ _mesa_glsl_error(&loc, state, "%s must be an integral constant "
+ "expression", qual_indentifier);
+ return false;
+ }
+
+ if (const_int->value.i[0] < min_value) {
+ YYLTYPE loc = const_expression->get_location();
+ _mesa_glsl_error(&loc, state, "%s layout qualifier is invalid "
+ "(%d < %d)", qual_indentifier,
+ const_int->value.i[0], min_value);
+ return false;
}
- node = new(mem_ctx) ast_cs_input_layout(*loc, local_size);
+
+ if (!first_pass && *value != const_int->value.u[0]) {
+ YYLTYPE loc = const_expression->get_location();
+ _mesa_glsl_error(&loc, state, "%s layout qualifier does not "
+ "match previous declaration (%d vs %d)",
+ qual_indentifier, *value, const_int->value.i[0]);
+ return false;
+ } else {
+ first_pass = false;
+ *value = const_int->value.u[0];
+ }
+
+ /* If the location is const (and we've verified that
+ * it is) then no instructions should have been emitted
+ * when we converted it to HIR. If they were emitted,
+ * then either the location isn't const after all, or
+ * we are emitting unnecessary instructions.
+ */
+ assert(dummy_instructions.is_empty());
}
return true;
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 13494446b59..881ee2b6b55 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -290,6 +290,20 @@ texture_multisample_array(const _mesa_glsl_parse_state *state)
}
static bool
+texture_samples_identical(const _mesa_glsl_parse_state *state)
+{
+ return texture_multisample(state) &&
+ state->EXT_shader_samples_identical_enable;
+}
+
+static bool
+texture_samples_identical_array(const _mesa_glsl_parse_state *state)
+{
+ return texture_multisample_array(state) &&
+ state->EXT_shader_samples_identical_enable;
+}
+
+static bool
fs_texture_cube_map_array(const _mesa_glsl_parse_state *state)
{
return state->stage == MESA_SHADER_FRAGMENT &&
@@ -724,6 +738,7 @@ private:
BA2(textureQueryLod);
B1(textureQueryLevels);
+ BA2(textureSamplesIdentical);
B1(dFdx);
B1(dFdy);
B1(fwidth);
@@ -2210,6 +2225,16 @@ builtin_builder::create_builtins()
NULL);
+ add_function("textureSamplesIdenticalEXT",
+ _textureSamplesIdentical(texture_samples_identical, glsl_type::sampler2DMS_type, glsl_type::ivec2_type),
+ _textureSamplesIdentical(texture_samples_identical, glsl_type::isampler2DMS_type, glsl_type::ivec2_type),
+ _textureSamplesIdentical(texture_samples_identical, glsl_type::usampler2DMS_type, glsl_type::ivec2_type),
+
+ _textureSamplesIdentical(texture_samples_identical_array, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type),
+ _textureSamplesIdentical(texture_samples_identical_array, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type),
+ _textureSamplesIdentical(texture_samples_identical_array, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type),
+ NULL);
+
add_function("texture1D",
_texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type),
_texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type),
@@ -3573,7 +3598,16 @@ builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type
ir_constant_data infinities;
for (int i = 0; i < type->vector_elements; i++) {
- infinities.f[i] = INFINITY;
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ infinities.f[i] = INFINITY;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ infinities.d[i] = INFINITY;
+ break;
+ default:
+ unreachable("unknown type");
+ }
}
body.emit(ret(equal(abs(x), imm(type, infinities))));
@@ -4675,6 +4709,25 @@ builtin_builder::_textureQueryLevels(const glsl_type *sampler_type)
return sig;
}
+ir_function_signature *
+builtin_builder::_textureSamplesIdentical(builtin_available_predicate avail,
+ const glsl_type *sampler_type,
+ const glsl_type *coord_type)
+{
+ ir_variable *s = in_var(sampler_type, "sampler");
+ ir_variable *P = in_var(coord_type, "P");
+ const glsl_type *return_type = glsl_type::bool_type;
+ MAKE_SIG(return_type, avail, 2, s, P);
+
+ ir_texture *tex = new(mem_ctx) ir_texture(ir_samples_identical);
+ tex->coordinate = var_ref(P);
+ tex->set_sampler(var_ref(s), return_type);
+
+ body.emit(ret(tex));
+
+ return sig;
+}
+
UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives)
UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control)
UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control)
@@ -5243,8 +5296,8 @@ builtin_builder::_image_size_prototype(const glsl_type *image_type,
ir_function_signature *
builtin_builder::_image_samples_prototype(const glsl_type *image_type,
- unsigned num_arguments,
- unsigned flags)
+ unsigned /* num_arguments */,
+ unsigned /* flags */)
{
ir_variable *image = in_var(image_type, "image");
ir_function_signature *sig =
diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index b06c1bc5c12..e8eab808a19 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -327,6 +327,7 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
this->fields[this->num_fields].centroid = 0;
this->fields[this->num_fields].sample = 0;
this->fields[this->num_fields].patch = 0;
+ this->fields[this->num_fields].precision = GLSL_PRECISION_NONE;
this->num_fields++;
}
@@ -376,6 +377,11 @@ private:
return add_variable(name, type, ir_var_shader_out, slot);
}
+ ir_variable *add_index_output(int slot, int index, const glsl_type *type, const char *name)
+ {
+ return add_index_variable(name, type, ir_var_shader_out, slot, index);
+ }
+
ir_variable *add_system_value(int slot, const glsl_type *type,
const char *name)
{
@@ -384,6 +390,8 @@ private:
ir_variable *add_variable(const char *name, const glsl_type *type,
enum ir_variable_mode mode, int slot);
+ ir_variable *add_index_variable(const char *name, const glsl_type *type,
+ enum ir_variable_mode mode, int slot, int index);
ir_variable *add_uniform(const glsl_type *type, const char *name);
ir_variable *add_const(const char *name, int value);
ir_variable *add_const_ivec3(const char *name, int x, int y, int z);
@@ -429,6 +437,46 @@ builtin_variable_generator::builtin_variable_generator(
{
}
+ir_variable *
+builtin_variable_generator::add_index_variable(const char *name,
+ const glsl_type *type,
+ enum ir_variable_mode mode, int slot, int index)
+{
+ ir_variable *var = new(symtab) ir_variable(type, name, mode);
+ var->data.how_declared = ir_var_declared_implicitly;
+
+ switch (var->data.mode) {
+ case ir_var_auto:
+ case ir_var_shader_in:
+ case ir_var_uniform:
+ case ir_var_system_value:
+ var->data.read_only = true;
+ break;
+ case ir_var_shader_out:
+ case ir_var_shader_storage:
+ break;
+ default:
+ /* The only variables that are added using this function should be
+ * uniforms, shader storage, shader inputs, and shader outputs, constants
+ * (which use ir_var_auto), and system values.
+ */
+ assert(0);
+ break;
+ }
+
+ var->data.location = slot;
+ var->data.explicit_location = (slot >= 0);
+ var->data.explicit_index = 1;
+ var->data.index = index;
+
+ /* Once the variable is created an initialized, add it to the symbol table
+ * and add the declaration to the IR stream.
+ */
+ instructions->push_tail(var);
+
+ symtab->add_variable(var);
+ return var;
+}
ir_variable *
builtin_variable_generator::add_variable(const char *name,
@@ -580,6 +628,14 @@ builtin_variable_generator::generate_constants()
add_const("gl_MaxVaryingVectors",
state->ctx->Const.MaxVarying);
}
+
+ /* EXT_blend_func_extended brings a built in constant
+ * for determining number of dual source draw buffers
+ */
+ if (state->EXT_blend_func_extended_enable) {
+ add_const("gl_MaxDualSourceDrawBuffersEXT",
+ state->Const.MaxDualSourceDrawBuffers);
+ }
} else {
add_const("gl_MaxVertexUniformComponents",
state->Const.MaxVertexUniformComponents);
@@ -1016,6 +1072,19 @@ builtin_variable_generator::generate_fs_special_vars()
array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData");
}
+ if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) {
+ /* We make an assumption here that there will only ever be one dual-source draw buffer
+ * In case this assumption is ever proven to be false, make sure to assert here
+ * since we don't handle this case.
+ * In practice, this issue will never arise since no hardware will support it.
+ */
+ assert(state->Const.MaxDualSourceDrawBuffers <= 1);
+ add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, "gl_SecondaryFragColorEXT");
+ add_index_output(FRAG_RESULT_DATA0, 1,
+ array(vec4_t, state->Const.MaxDualSourceDrawBuffers),
+ "gl_SecondaryFragDataEXT");
+ }
+
/* gl_FragDepth has always been in desktop GLSL, but did not appear in GLSL
* ES 1.00.
*/
@@ -1186,6 +1255,7 @@ builtin_variable_generator::generate_varyings()
var->data.centroid = fields[i].centroid;
var->data.sample = fields[i].sample;
var->data.patch = fields[i].patch;
+ var->data.precision = fields[i].precision;
var->init_interface_type(per_vertex_out_type);
}
}
diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 6aa7abec00e..2fd4cf04079 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2384,6 +2384,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
add_builtin_define(parser, "GL_OES_standard_derivatives", 1);
if (extensions->ARB_texture_multisample)
add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
+ if (extensions->ARB_blend_func_extended)
+ add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
}
} else {
add_builtin_define(parser, "GL_ARB_draw_buffers", 1);
@@ -2510,6 +2512,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
if (extensions != NULL) {
if (extensions->EXT_shader_integer_mix)
add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1);
+
+ if (extensions->EXT_shader_samples_identical)
+ add_builtin_define(parser, "GL_EXT_shader_samples_identical", 1);
}
if (version >= 150)
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index adf6a05acce..5a8f98019d1 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -298,7 +298,6 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
%type <node> conditionopt
%type <node> for_init_statement
%type <for_rest_statement> for_rest_statement
-%type <n> integer_constant
%type <node> layout_defaults
%right THEN ELSE
@@ -1152,11 +1151,6 @@ layout_qualifier_id_list:
}
;
-integer_constant:
- INTCONSTANT { $$ = $1; }
- | UINTCONSTANT { $$ = $1; }
- ;
-
layout_qualifier_id:
any_identifier
{
@@ -1453,9 +1447,18 @@ layout_qualifier_id:
YYERROR;
}
}
- | any_identifier '=' integer_constant
+ | any_identifier '=' constant_expression
{
memset(& $$, 0, sizeof($$));
+ void *ctx = state;
+
+ if ($3->oper != ast_int_constant &&
+ $3->oper != ast_uint_constant &&
+ !state->has_enhanced_layouts()) {
+ _mesa_glsl_error(& @1, state,
+ "compile-time constant expressions require "
+ "GLSL 4.40 or ARB_enhanced_layouts");
+ }
if (match_layout_qualifier("location", $1, state) == 0) {
$$.flags.q.explicit_location = 1;
@@ -1466,24 +1469,17 @@ layout_qualifier_id:
"GL_ARB_explicit_attrib_location layout "
"identifier `%s' used", $1);
}
-
- if ($3 >= 0) {
- $$.location = $3;
- } else {
- _mesa_glsl_error(& @3, state, "invalid location %d specified", $3);
- YYERROR;
- }
+ $$.location = $3;
}
if (match_layout_qualifier("index", $1, state) == 0) {
- $$.flags.q.explicit_index = 1;
-
- if ($3 >= 0) {
- $$.index = $3;
- } else {
- _mesa_glsl_error(& @3, state, "invalid index %d specified", $3);
+ if (state->es_shader && !state->EXT_blend_func_extended_enable) {
+ _mesa_glsl_error(& @3, state, "index layout qualifier requires EXT_blend_func_extended");
YYERROR;
}
+
+ $$.flags.q.explicit_index = 1;
+ $$.index = $3;
}
if ((state->has_420pack() ||
@@ -1502,18 +1498,11 @@ layout_qualifier_id:
if (match_layout_qualifier("max_vertices", $1, state) == 0) {
$$.flags.q.max_vertices = 1;
-
- if ($3 < 0) {
+ $$.max_vertices = new(ctx) ast_layout_expression(@1, $3);
+ if (!state->is_version(150, 0)) {
_mesa_glsl_error(& @3, state,
- "invalid max_vertices %d specified", $3);
- YYERROR;
- } else {
- $$.max_vertices = $3;
- if (!state->is_version(150, 0)) {
- _mesa_glsl_error(& @3, state,
- "#version 150 max_vertices qualifier "
- "specified", $3);
- }
+ "#version 150 max_vertices qualifier "
+ "specified", $3);
}
}
@@ -1521,15 +1510,8 @@ layout_qualifier_id:
if (match_layout_qualifier("stream", $1, state) == 0 &&
state->check_explicit_attrib_stream_allowed(& @3)) {
$$.flags.q.stream = 1;
-
- if ($3 < 0) {
- _mesa_glsl_error(& @3, state,
- "invalid stream %d specified", $3);
- YYERROR;
- } else {
- $$.flags.q.explicit_stream = 1;
- $$.stream = $3;
- }
+ $$.flags.q.explicit_stream = 1;
+ $$.stream = $3;
}
}
@@ -1541,12 +1523,7 @@ layout_qualifier_id:
for (int i = 0; i < 3; i++) {
if (match_layout_qualifier(local_size_qualifiers[i], $1,
state) == 0) {
- if ($3 <= 0) {
- _mesa_glsl_error(& @3, state,
- "invalid %s of %d specified",
- local_size_qualifiers[i], $3);
- YYERROR;
- } else if (!state->has_compute_shader()) {
+ if (!state->has_compute_shader()) {
_mesa_glsl_error(& @3, state,
"%s qualifier requires GLSL 4.30 or "
"GLSL ES 3.10 or ARB_compute_shader",
@@ -1554,7 +1531,7 @@ layout_qualifier_id:
YYERROR;
} else {
$$.flags.q.local_size |= (1 << i);
- $$.local_size[i] = $3;
+ $$.local_size[i] = new(ctx) ast_layout_expression(@1, $3);
}
break;
}
@@ -1562,48 +1539,24 @@ layout_qualifier_id:
if (match_layout_qualifier("invocations", $1, state) == 0) {
$$.flags.q.invocations = 1;
-
- if ($3 <= 0) {
+ $$.invocations = new(ctx) ast_layout_expression(@1, $3);
+ if (!state->is_version(400, 0) &&
+ !state->ARB_gpu_shader5_enable) {
_mesa_glsl_error(& @3, state,
- "invalid invocations %d specified", $3);
- YYERROR;
- } else if ($3 > MAX_GEOMETRY_SHADER_INVOCATIONS) {
- _mesa_glsl_error(& @3, state,
- "invocations (%d) exceeds "
- "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", $3);
- YYERROR;
- } else {
- $$.invocations = $3;
- if (!state->is_version(400, 0) &&
- !state->ARB_gpu_shader5_enable) {
- _mesa_glsl_error(& @3, state,
- "GL_ARB_gpu_shader5 invocations "
- "qualifier specified", $3);
- }
+ "GL_ARB_gpu_shader5 invocations "
+ "qualifier specified", $3);
}
}
/* Layout qualifiers for tessellation control shaders. */
if (match_layout_qualifier("vertices", $1, state) == 0) {
$$.flags.q.vertices = 1;
-
- if ($3 <= 0) {
- _mesa_glsl_error(& @3, state,
- "invalid vertices (%d) specified", $3);
- YYERROR;
- } else if ($3 > (int)state->Const.MaxPatchVertices) {
- _mesa_glsl_error(& @3, state,
- "vertices (%d) exceeds "
- "GL_MAX_PATCH_VERTICES", $3);
- YYERROR;
- } else {
- $$.vertices = $3;
- if (!state->ARB_tessellation_shader_enable &&
- !state->is_version(400, 0)) {
- _mesa_glsl_error(& @1, state,
- "vertices qualifier requires GLSL 4.00 or "
- "ARB_tessellation_shader");
- }
+ $$.vertices = new(ctx) ast_layout_expression(@1, $3);
+ if (!state->ARB_tessellation_shader_enable &&
+ !state->is_version(400, 0)) {
+ _mesa_glsl_error(& @1, state,
+ "vertices qualifier requires GLSL 4.00 or "
+ "ARB_tessellation_shader");
}
}
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 02584c62a4d..b41b64af2c1 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -104,6 +104,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers;
+ this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers;
+
/* 1.50 constants */
this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents;
@@ -646,9 +648,11 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(AMD_shader_trinary_minmax, true, false, dummy_true),
EXT(AMD_vertex_shader_layer, true, false, AMD_vertex_shader_layer),
EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index),
+ EXT(EXT_blend_func_extended, false, true, ARB_blend_func_extended),
EXT(EXT_draw_buffers, false, true, dummy_true),
EXT(EXT_separate_shader_objects, false, true, dummy_true),
EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix),
+ EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical),
EXT(EXT_texture_array, true, false, EXT_texture_array),
};
@@ -1646,8 +1650,20 @@ set_shader_inout_layout(struct gl_shader *shader,
switch (shader->Stage) {
case MESA_SHADER_TESS_CTRL:
shader->TessCtrl.VerticesOut = 0;
- if (state->tcs_output_vertices_specified)
- shader->TessCtrl.VerticesOut = state->out_qualifier->vertices;
+ if (state->tcs_output_vertices_specified) {
+ unsigned vertices;
+ if (state->out_qualifier->vertices->
+ process_qualifier_constant(state, "vertices", &vertices,
+ false)) {
+
+ YYLTYPE loc = state->out_qualifier->vertices->get_location();
+ if (vertices > state->Const.MaxPatchVertices) {
+ _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+ "GL_MAX_PATCH_VERTICES", vertices);
+ }
+ shader->TessCtrl.VerticesOut = vertices;
+ }
+ }
break;
case MESA_SHADER_TESS_EVAL:
shader->TessEval.PrimitiveMode = PRIM_UNKNOWN;
@@ -1668,8 +1684,14 @@ set_shader_inout_layout(struct gl_shader *shader,
break;
case MESA_SHADER_GEOMETRY:
shader->Geom.VerticesOut = 0;
- if (state->out_qualifier->flags.q.max_vertices)
- shader->Geom.VerticesOut = state->out_qualifier->max_vertices;
+ if (state->out_qualifier->flags.q.max_vertices) {
+ unsigned qual_max_vertices;
+ if (state->out_qualifier->max_vertices->
+ process_qualifier_constant(state, "max_vertices",
+ &qual_max_vertices, true)) {
+ shader->Geom.VerticesOut = qual_max_vertices;
+ }
+ }
if (state->gs_input_prim_type_specified) {
shader->Geom.InputType = state->in_qualifier->prim_type;
@@ -1684,8 +1706,22 @@ set_shader_inout_layout(struct gl_shader *shader,
}
shader->Geom.Invocations = 0;
- if (state->in_qualifier->flags.q.invocations)
- shader->Geom.Invocations = state->in_qualifier->invocations;
+ if (state->in_qualifier->flags.q.invocations) {
+ unsigned invocations;
+ if (state->in_qualifier->invocations->
+ process_qualifier_constant(state, "invocations",
+ &invocations, false)) {
+
+ YYLTYPE loc = state->in_qualifier->invocations->get_location();
+ if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) {
+ _mesa_glsl_error(&loc, state,
+ "invocations (%d) exceeds "
+ "GL_MAX_GEOMETRY_SHADER_INVOCATIONS",
+ invocations);
+ }
+ shader->Geom.Invocations = invocations;
+ }
+ }
break;
case MESA_SHADER_COMPUTE:
@@ -1797,6 +1833,9 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
if (shader->InfoLog)
ralloc_free(shader->InfoLog);
+ if (!state->error)
+ set_shader_inout_layout(shader, state);
+
shader->symbols = new(shader->ir) glsl_symbol_table;
shader->CompileStatus = !state->error;
shader->InfoLog = state->info_log;
@@ -1804,9 +1843,6 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
shader->IsES = state->es_shader;
shader->uses_builtin_functions = state->uses_builtin_functions;
- if (!state->error)
- set_shader_inout_layout(shader, state);
-
/* Retain any live IR, but trash the rest. */
reparent_ir(shader->ir, shader->ir);
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 1d8c1b8799f..17ff0b5af79 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -380,6 +380,9 @@ struct _mesa_glsl_parse_state {
/* ARB_draw_buffers */
unsigned MaxDrawBuffers;
+ /* ARB_blend_func_extended */
+ unsigned MaxDualSourceDrawBuffers;
+
/* 3.00 ES */
int MinProgramTexelOffset;
int MaxProgramTexelOffset;
@@ -595,12 +598,16 @@ struct _mesa_glsl_parse_state {
bool AMD_vertex_shader_layer_warn;
bool AMD_vertex_shader_viewport_index_enable;
bool AMD_vertex_shader_viewport_index_warn;
+ bool EXT_blend_func_extended_enable;
+ bool EXT_blend_func_extended_warn;
bool EXT_draw_buffers_enable;
bool EXT_draw_buffers_warn;
bool EXT_separate_shader_objects_enable;
bool EXT_separate_shader_objects_warn;
bool EXT_shader_integer_mix_enable;
bool EXT_shader_integer_mix_warn;
+ bool EXT_shader_samples_identical_enable;
+ bool EXT_shader_samples_identical_warn;
bool EXT_texture_array_enable;
bool EXT_texture_array_warn;
/*@}*/
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 8933b230177..ca520f547a1 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1421,12 +1421,11 @@ ir_dereference::is_lvalue() const
}
-static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples" };
+static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" };
const char *ir_texture::opcode_string()
{
- assert((unsigned int) op <=
- sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]));
+ assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs));
return tex_opcode_strs[op];
}
@@ -1456,6 +1455,10 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
} else if (this->op == ir_lod) {
assert(type->vector_elements == 2);
assert(type->base_type == GLSL_TYPE_FLOAT);
+ } else if (this->op == ir_samples_identical) {
+ assert(type == glsl_type::bool_type);
+ assert(sampler->type->base_type == GLSL_TYPE_SAMPLER);
+ assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS);
} else {
assert(sampler->type->sampler_type == (int) type->base_type);
if (sampler->type->sampler_shadow)
@@ -1676,6 +1679,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
this->data.interpolation = INTERP_QUALIFIER_NONE;
this->data.max_array_access = 0;
this->data.atomic.offset = 0;
+ this->data.precision = GLSL_PRECISION_NONE;
this->data.image_read_only = false;
this->data.image_write_only = false;
this->data.image_coherent = false;
@@ -1842,6 +1846,7 @@ ir_function_signature::replace_parameters(exec_list *new_params)
ir_function::ir_function(const char *name)
: ir_instruction(ir_type_function)
{
+ this->subroutine_index = -1;
this->name = ralloc_strdup(this, name);
}
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index d59dee1e369..e1109eec1d3 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1171,6 +1171,8 @@ public:
*/
int num_subroutine_types;
const struct glsl_type **subroutine_types;
+
+ int subroutine_index;
};
inline const char *ir_function_signature::function_name() const
@@ -1965,6 +1967,7 @@ enum ir_texture_opcode {
ir_tg4, /**< Texture gather */
ir_query_levels, /**< Texture levels query */
ir_texture_samples, /**< Texture samples query */
+ ir_samples_identical, /**< Query whether all samples are definitely identical. */
};
@@ -1991,6 +1994,7 @@ enum ir_texture_opcode {
* (lod <type> <sampler> <coordinate>)
* (tg4 <type> <sampler> <coordinate> <offset> <component>)
* (query_levels <type> <sampler>)
+ * (samples_identical <sampler> <coordinate>)
*/
class ir_texture : public ir_rvalue {
public:
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index d6b06eeec87..2aef4fcb4ac 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -223,6 +223,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+ case ir_samples_identical:
break;
case ir_txb:
new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
@@ -269,6 +270,7 @@ ir_function::clone(void *mem_ctx, struct hash_table *ht) const
ir_function *copy = new(mem_ctx) ir_function(this->name);
copy->is_subroutine = this->is_subroutine;
+ copy->subroutine_index = this->subroutine_index;
copy->num_subroutine_types = this->num_subroutine_types;
copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types);
for (int i = 0; i < copy->num_subroutine_types; i++)
diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp
index 5f0785e0ece..b86f4ea16bb 100644
--- a/src/glsl/ir_equals.cpp
+++ b/src/glsl/ir_equals.cpp
@@ -58,8 +58,13 @@ ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const
return false;
for (unsigned i = 0; i < type->components(); i++) {
- if (value.u[i] != other->value.u[i])
- return false;
+ if (type->base_type == GLSL_TYPE_DOUBLE) {
+ if (value.d[i] != other->value.d[i])
+ return false;
+ } else {
+ if (value.u[i] != other->value.u[i])
+ return false;
+ }
}
return true;
@@ -152,6 +157,7 @@ ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+ case ir_samples_identical:
break;
case ir_txb:
if (!lod_info.bias->equals(other->lod_info.bias, ignore))
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp
index 6495cc4581d..213992af28c 100644
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -195,6 +195,7 @@ ir_texture::accept(ir_hierarchical_visitor *v)
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+ case ir_samples_identical:
break;
case ir_txb:
s = this->lod_info.bias->accept(v);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 42b03fdea52..fd7bc2eea98 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -268,6 +268,14 @@ void ir_print_visitor::visit(ir_texture *ir)
{
fprintf(f, "(%s ", ir->opcode_string());
+ if (ir->op == ir_samples_identical) {
+ ir->sampler->accept(this);
+ fprintf(f, " ");
+ ir->coordinate->accept(this);
+ fprintf(f, ")");
+ return;
+ }
+
print_type(f, ir->type);
fprintf(f, " ");
@@ -334,6 +342,8 @@ void ir_print_visitor::visit(ir_texture *ir)
case ir_tg4:
ir->lod_info.component->accept(this);
break;
+ case ir_samples_identical:
+ unreachable(!"ir_samples_identical was already handled");
};
fprintf(f, ")");
}
diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp
index a6966f546bc..6486838b8b8 100644
--- a/src/glsl/ir_rvalue_visitor.cpp
+++ b/src/glsl/ir_rvalue_visitor.cpp
@@ -59,6 +59,7 @@ ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir)
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+ case ir_samples_identical:
break;
case ir_txb:
handle_rvalue(&ir->lod_info.bias);
diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index 7e77a675db1..c0b4b3e820c 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -766,7 +766,7 @@ public:
gl_shader_stage consumer_stage);
~varying_matches();
void record(ir_variable *producer_var, ir_variable *consumer_var);
- unsigned assign_locations();
+ unsigned assign_locations(uint64_t reserved_slots);
void store_locations() const;
private:
@@ -986,7 +986,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
* passed to varying_matches::record().
*/
unsigned
-varying_matches::assign_locations()
+varying_matches::assign_locations(uint64_t reserved_slots)
{
/* Sort varying matches into an order that makes them easy to pack. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
@@ -1013,6 +1013,10 @@ varying_matches::assign_locations()
!= this->matches[i].packing_class) {
*location = ALIGN(*location, 4);
}
+ while ((*location < MAX_VARYING * 4u) &&
+ (reserved_slots & (1u << *location / 4u))) {
+ *location = ALIGN(*location + 1, 4);
+ }
this->matches[i].generic_location = *location;
@@ -1376,6 +1380,38 @@ canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
}
/**
+ * Generate a bitfield map of the explicit locations for shader varyings.
+ *
+ * In theory a 32 bits value will be enough but a 64 bits value is future proof.
+ */
+uint64_t
+reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode)
+{
+ assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
+ assert(MAX_VARYING <= 64); /* avoid an overflow of the returned value */
+
+ uint64_t slots = 0;
+ int var_slot;
+
+ if (!stage)
+ return slots;
+
+ foreach_in_list(ir_instruction, node, stage->ir) {
+ ir_variable *const var = node->as_variable();
+
+ if (var == NULL || var->data.mode != io_mode || !var->data.explicit_location)
+ continue;
+
+ var_slot = var->data.location - VARYING_SLOT_VAR0;
+ if (var_slot >= 0 && var_slot < MAX_VARYING)
+ slots |= 1u << var_slot;
+ }
+
+ return slots;
+}
+
+
+/**
* Assign locations for all variables that are produced in one pipeline stage
* (the "producer") and consumed in the next stage (the "consumer").
*
@@ -1550,7 +1586,11 @@ assign_varying_locations(struct gl_context *ctx,
matches.record(matched_candidate->toplevel_var, NULL);
}
- const unsigned slots_used = matches.assign_locations();
+ const uint64_t reserved_slots =
+ reserved_varying_slot(producer, ir_var_shader_out) |
+ reserved_varying_slot(consumer, ir_var_shader_in);
+
+ const unsigned slots_used = matches.assign_locations(reserved_slots);
matches.store_locations();
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index db00f8febc6..331d9a28007 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3864,10 +3864,43 @@ link_assign_subroutine_types(struct gl_shader_program *prog)
sh->SubroutineFunctions[sh->NumSubroutineFunctions].types =
ralloc_array(sh, const struct glsl_type *,
fn->num_subroutine_types);
+
+ /* From Section 4.4.4(Subroutine Function Layout Qualifiers) of the
+ * GLSL 4.5 spec:
+ *
+ * "Each subroutine with an index qualifier in the shader must be
+ * given a unique index, otherwise a compile or link error will be
+ * generated."
+ */
+ for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) {
+ if (sh->SubroutineFunctions[j].index != -1 &&
+ sh->SubroutineFunctions[j].index == fn->subroutine_index) {
+ linker_error(prog, "each subroutine index qualifier in the "
+ "shader must be unique\n");
+ return;
+ }
+ }
+ sh->SubroutineFunctions[sh->NumSubroutineFunctions].index =
+ fn->subroutine_index;
+
for (int j = 0; j < fn->num_subroutine_types; j++)
sh->SubroutineFunctions[sh->NumSubroutineFunctions].types[j] = fn->subroutine_types[j];
sh->NumSubroutineFunctions++;
}
+
+ /* Assign index for subroutines without an explicit index*/
+ int index = 0;
+ for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) {
+ while (sh->SubroutineFunctions[j].index == -1) {
+ for (unsigned k = 0; k < sh->NumSubroutineFunctions; k++) {
+ if (sh->SubroutineFunctions[k].index == index)
+ break;
+ else if (k == sh->NumSubroutineFunctions - 1)
+ sh->SubroutineFunctions[j].index = index;
+ }
+ index++;
+ }
+ }
}
}
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index d8df3544f10..a26300d1d26 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -31,6 +31,7 @@
#include "ir_visitor.h"
#include "ir_hierarchical_visitor.h"
#include "ir.h"
+#include "main/imports.h"
/*
* pass to lower GLSL IR to NIR
@@ -147,16 +148,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
nir_lower_outputs_to_temporaries(shader);
- /* TODO: Use _mesa_fls instead */
- unsigned num_textures = 0;
- for (unsigned i = 0; i < 8 * sizeof(sh->Program->SamplersUsed); i++)
- if (sh->Program->SamplersUsed & (1 << i))
- num_textures = i;
-
shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
if (shader_prog->Label)
shader->info.label = ralloc_strdup(shader, shader_prog->Label);
- shader->info.num_textures = num_textures;
+ shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed);
shader->info.num_ubos = sh->NumUniformBlocks;
shader->info.num_abos = shader_prog->NumAtomicBuffers;
shader->info.num_ssbos = sh->NumShaderStorageBlocks;
@@ -174,6 +169,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
shader_prog->TransformFeedback.NumVarying > 0;
switch (stage) {
+ case MESA_SHADER_TESS_CTRL:
+ shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut;
+ break;
+
case MESA_SHADER_GEOMETRY:
shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn;
shader->info.gs.output_primitive = sh->Geom.OutputType;
@@ -244,6 +243,8 @@ constant_copy(ir_constant *ir, void *mem_ctx)
unsigned total_elems = ir->type->components();
unsigned i;
+
+ ret->num_elements = 0;
switch (ir->type->base_type) {
case GLSL_TYPE_UINT:
for (i = 0; i < total_elems; i++)
@@ -268,6 +269,8 @@ constant_copy(ir_constant *ir, void *mem_ctx)
case GLSL_TYPE_STRUCT:
ret->elements = ralloc_array(mem_ctx, nir_constant *,
ir->type->length);
+ ret->num_elements = ir->type->length;
+
i = 0;
foreach_in_list(ir_constant, field, &ir->components) {
ret->elements[i] = constant_copy(field, mem_ctx);
@@ -278,6 +281,7 @@ constant_copy(ir_constant *ir, void *mem_ctx)
case GLSL_TYPE_ARRAY:
ret->elements = ralloc_array(mem_ctx, nir_constant *,
ir->type->length);
+ ret->num_elements = ir->type->length;
for (i = 0; i < ir->type->length; i++)
ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx);
@@ -297,15 +301,6 @@ nir_visitor::visit(ir_variable *ir)
var->type = ir->type;
var->name = ralloc_strdup(var, ir->name);
- if (ir->is_interface_instance() && ir->get_max_ifc_array_access() != NULL) {
- unsigned size = ir->get_interface_type()->length;
- var->max_ifc_array_access = ralloc_array(var, unsigned, size);
- memcpy(var->max_ifc_array_access, ir->get_max_ifc_array_access(),
- size * sizeof(unsigned));
- } else {
- var->max_ifc_array_access = NULL;
- }
-
var->data.read_only = ir->data.read_only;
var->data.centroid = ir->data.centroid;
var->data.sample = ir->data.sample;
@@ -1543,9 +1538,9 @@ nir_visitor::visit(ir_expression *ir)
result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
: nir_for(&b, srcs[0], srcs[1]);
break;
- case ir_binop_logic_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
- result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
- : nir_for(&b, srcs[0], srcs[1]);
+ case ir_binop_logic_xor:
+ result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1])
+ : nir_fxor(&b, srcs[0], srcs[1]);
break;
case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
case ir_binop_rshift:
@@ -1808,6 +1803,11 @@ nir_visitor::visit(ir_texture *ir)
num_srcs = 0;
break;
+ case ir_samples_identical:
+ op = nir_texop_samples_identical;
+ num_srcs = 1; /* coordinate */
+ break;
+
default:
unreachable("not reached");
}
@@ -1835,8 +1835,9 @@ nir_visitor::visit(ir_texture *ir)
case GLSL_TYPE_INT:
instr->dest_type = nir_type_int;
break;
+ case GLSL_TYPE_BOOL:
case GLSL_TYPE_UINT:
- instr->dest_type = nir_type_unsigned;
+ instr->dest_type = nir_type_uint;
break;
default:
unreachable("not reached");
diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp
index 3e9d38f7707..64b5c0cb106 100644
--- a/src/glsl/nir/glsl_types.cpp
+++ b/src/glsl/nir/glsl_types.cpp
@@ -130,6 +130,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
this->fields.structure[i].image_coherent = fields[i].image_coherent;
this->fields.structure[i].image_volatile = fields[i].image_volatile;
this->fields.structure[i].image_restrict = fields[i].image_restrict;
+ this->fields.structure[i].precision = fields[i].precision;
}
mtx_unlock(&glsl_type::mutex);
diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h
index 14c2aa49f85..1aafa5cd547 100644
--- a/src/glsl/nir/glsl_types.h
+++ b/src/glsl/nir/glsl_types.h
@@ -858,7 +858,7 @@ struct glsl_struct_field {
/**
* Precision qualifier
*/
- unsigned precision;
+ unsigned precision:2;
/**
* Image qualifiers, applicable to buffer variables defined in shader
@@ -873,7 +873,8 @@ struct glsl_struct_field {
#ifdef __cplusplus
glsl_struct_field(const struct glsl_type *_type, const char *_name)
: type(_type), name(_name), location(-1), interpolation(0), centroid(0),
- sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0)
+ sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
+ precision(GLSL_PRECISION_NONE)
{
/* empty */
}
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 3157ff82d99..79df6d3df94 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -107,6 +107,10 @@ void
nir_shader_add_variable(nir_shader *shader, nir_variable *var)
{
switch (var->data.mode) {
+ case nir_var_all:
+ assert(!"invalid mode");
+ break;
+
case nir_var_local:
assert(!"nir_shader_add_variable cannot be used for local variables");
break;
@@ -312,6 +316,14 @@ nir_block_create(nir_shader *shader)
block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
block->imm_dom = NULL;
+ /* XXX maybe it would be worth it to defer allocation? This
+ * way it doesn't get allocated for shader ref's that never run
+ * nir_calc_dominance? For example, state-tracker creates an
+ * initial IR, clones that, runs appropriate lowering pass, passes
+ * to driver which does common lowering/opt, and then stores ref
+ * which is later used to do state specific lowering and futher
+ * opt. Do any of the references not need dominance metadata?
+ */
block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
@@ -1306,21 +1318,62 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
{
assert(!new_src.is_ssa || def != new_src.ssa);
- nir_foreach_use_safe(def, use_src) {
- nir_instr *src_parent_instr = use_src->parent_instr;
- list_del(&use_src->use_link);
- nir_src_copy(use_src, &new_src, src_parent_instr);
- src_add_all_uses(use_src, src_parent_instr, NULL);
- }
+ nir_foreach_use_safe(def, use_src)
+ nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+
+ nir_foreach_if_use_safe(def, use_src)
+ nir_if_rewrite_condition(use_src->parent_if, new_src);
+}
+
+static bool
+is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
+{
+ assert(start->block == end->block);
+
+ if (between->block != start->block)
+ return false;
+
+ /* Search backwards looking for "between" */
+ while (start != end) {
+ if (between == end)
+ return true;
- nir_foreach_if_use_safe(def, use_src) {
- nir_if *src_parent_if = use_src->parent_if;
- list_del(&use_src->use_link);
- nir_src_copy(use_src, &new_src, src_parent_if);
- src_add_all_uses(use_src, NULL, src_parent_if);
+ end = nir_instr_prev(end);
+ assert(end);
}
+
+ return false;
}
+/* Replaces all uses of the given SSA def with the given source but only if
+ * the use comes after the after_me instruction. This can be useful if you
+ * are emitting code to fix up the result of some instruction: you can freely
+ * use the result in that code and then call rewrite_uses_after and pass the
+ * last fixup instruction as after_me and it will replace all of the uses you
+ * want without touching the fixup code.
+ *
+ * This function assumes that after_me is in the same block as
+ * def->parent_instr and that after_me comes after def->parent_instr.
+ */
+void
+nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+ nir_instr *after_me)
+{
+ assert(!new_src.is_ssa || def != new_src.ssa);
+
+ nir_foreach_use_safe(def, use_src) {
+ assert(use_src->parent_instr != def->parent_instr);
+ /* Since def already dominates all of its uses, the only way a use can
+ * not be dominated by after_me is if it is between def and after_me in
+ * the instruction list.
+ */
+ if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
+ nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+ }
+
+ nir_foreach_if_use_safe(def, use_src)
+ nir_if_rewrite_condition(use_src->parent_if, new_src);
+}
static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
bool reverse, void *state);
@@ -1571,6 +1624,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
return nir_intrinsic_load_tess_level_inner;
case SYSTEM_VALUE_VERTICES_IN:
return nir_intrinsic_load_patch_vertices_in;
+ case SYSTEM_VALUE_HELPER_INVOCATION:
+ return nir_intrinsic_load_helper_invocation;
default:
unreachable("system value does not directly correspond to intrinsic");
}
@@ -1614,6 +1669,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
return SYSTEM_VALUE_TESS_LEVEL_INNER;
case nir_intrinsic_load_patch_vertices_in:
return SYSTEM_VALUE_VERTICES_IN;
+ case nir_intrinsic_load_helper_invocation:
+ return SYSTEM_VALUE_HELPER_INVOCATION;
default:
unreachable("intrinsic doesn't produce a system value");
}
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index df0e6f1f54a..b7374e17407 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -82,6 +82,7 @@ typedef struct {
} nir_state_slot;
typedef enum {
+ nir_var_all = -1,
nir_var_shader_in,
nir_var_shader_out,
nir_var_global,
@@ -111,6 +112,11 @@ typedef struct nir_constant {
*/
union nir_constant_data value;
+ /* we could get this from the var->type but makes clone *much* easier to
+ * not have to care about the type.
+ */
+ unsigned num_elements;
+
/* Array elements / Structure Fields */
struct nir_constant **elements;
} nir_constant;
@@ -147,19 +153,6 @@ typedef struct {
*/
char *name;
- /**
- * For variables which satisfy the is_interface_instance() predicate, this
- * points to an array of integers such that if the ith member of the
- * interface block is an array, max_ifc_array_access[i] is the maximum
- * array element of that member that has been accessed. If the ith member
- * of the interface block is not an array, max_ifc_array_access[i] is
- * unused.
- *
- * For variables whose type is not an interface block, this pointer is
- * NULL.
- */
- unsigned *max_ifc_array_access;
-
struct nir_variable_data {
/**
@@ -654,7 +647,7 @@ typedef enum {
nir_type_invalid = 0, /* Not a valid type */
nir_type_float,
nir_type_int,
- nir_type_unsigned,
+ nir_type_uint,
nir_type_bool
} nir_alu_type;
@@ -977,6 +970,9 @@ typedef enum {
nir_texop_tg4, /**< Texture gather */
nir_texop_query_levels, /**< Texture levels query */
nir_texop_texture_samples, /**< Texture samples query */
+ nir_texop_samples_identical, /**< Query whether all samples are definitely
+ * identical.
+ */
} nir_texop;
typedef struct {
@@ -1069,6 +1065,7 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
case nir_texop_texture_samples:
case nir_texop_query_levels:
+ case nir_texop_samples_identical:
return 1;
default:
@@ -1079,6 +1076,31 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
}
}
+/* Returns true if this texture operation queries something about the texture
+ * rather than actually sampling it.
+ */
+static inline bool
+nir_tex_instr_is_query(nir_tex_instr *instr)
+{
+ switch (instr->op) {
+ case nir_texop_txs:
+ case nir_texop_lod:
+ case nir_texop_texture_samples:
+ case nir_texop_query_levels:
+ return true;
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_txl:
+ case nir_texop_txd:
+ case nir_texop_txf:
+ case nir_texop_txf_ms:
+ case nir_texop_tg4:
+ return false;
+ default:
+ unreachable("Invalid texture opcode");
+ }
+}
+
static inline unsigned
nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
{
@@ -1353,6 +1375,7 @@ typedef enum {
nir_metadata_block_index = 0x1,
nir_metadata_dominance = 0x2,
nir_metadata_live_ssa_defs = 0x4,
+ nir_metadata_not_properly_reset = 0x8,
} nir_metadata;
typedef struct {
@@ -1578,6 +1601,11 @@ typedef struct nir_shader_info {
struct {
unsigned local_size[3];
} cs;
+
+ struct {
+ /** The number of vertices in the TCS output patch. */
+ unsigned vertices_out;
+ } tcs;
};
} nir_shader_info;
@@ -1910,6 +1938,8 @@ void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components, const char *name);
void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
+void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+ nir_instr *after_me);
/* visits basic blocks in source-code order */
typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
@@ -1937,10 +1967,16 @@ void nir_index_blocks(nir_function_impl *impl);
void nir_print_shader(nir_shader *shader, FILE *fp);
void nir_print_instr(const nir_instr *instr, FILE *fp);
+nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
+
#ifdef DEBUG
void nir_validate_shader(nir_shader *shader);
+void nir_metadata_set_validation_flag(nir_shader *shader);
+void nir_metadata_check_validation_flag(nir_shader *shader);
#else
static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
+static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
+static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
#endif /* DEBUG */
void nir_calc_dominance_impl(nir_function_impl *impl);
@@ -2032,9 +2068,22 @@ typedef struct nir_lower_tex_options {
unsigned saturate_s;
unsigned saturate_t;
unsigned saturate_r;
+
+ /* Bitmask of samplers that need swizzling.
+ *
+ * If (swizzle_result & (1 << sampler_index)), then the swizzle in
+ * swizzles[sampler_index] is applied to the result of the texturing
+ * operation.
+ */
+ unsigned swizzle_result;
+
+ /* A swizzle for each sampler. Values 0-3 represent x, y, z, or w swizzles
+ * while 4 and 5 represent 0 and 1 respectively.
+ */
+ uint8_t swizzles[32][4];
} nir_lower_tex_options;
-void nir_lower_tex(nir_shader *shader,
+bool nir_lower_tex(nir_shader *shader,
const nir_lower_tex_options *options);
void nir_lower_idiv(nir_shader *shader);
diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index 205aa067b0b..fe41c74b608 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -256,7 +256,7 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
{
nir_alu_src alu_src = { NIR_SRC_INIT };
alu_src.src = nir_src_for_ssa(src);
- for (int i = 0; i < 4; i++)
+ for (unsigned i = 0; i < num_components; i++)
alu_src.swizzle[i] = swiz[i];
return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
@@ -290,6 +290,8 @@ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
/**
* Turns a nir_src into a nir_ssa_def * so it can be passed to
* nir_build_alu()-based builder calls.
+ *
+ * See nir_ssa_for_alu_src() for alu instructions.
*/
static inline nir_ssa_def *
nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
@@ -305,6 +307,25 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
return nir_imov_alu(build, alu, num_components);
}
+/**
+ * Similar to nir_ssa_for_src(), but for alu src's, respecting the
+ * nir_alu_src's swizzle.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn)
+{
+ static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 };
+ nir_alu_src *src = &instr->src[srcn];
+ unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn);
+
+ if (src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
+ !src->abs && !src->negate &&
+ (memcmp(src->swizzle, trivial_swizzle, num_components) == 0))
+ return src->src.ssa;
+
+ return nir_imov_alu(build, *src, num_components);
+}
+
static inline nir_ssa_def *
nir_load_var(nir_builder *build, nir_variable *var)
{
diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c
new file mode 100644
index 00000000000..68b72ef5381
--- /dev/null
+++ b/src/glsl/nir/nir_clone.c
@@ -0,0 +1,674 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_control_flow_private.h"
+
+/* Secret Decoder Ring:
+ * clone_foo():
+ * Allocate and clone a foo.
+ * __clone_foo():
+ * Clone body of foo (ie. parent class, embedded struct, etc)
+ */
+
+typedef struct {
+ /* maps orig ptr -> cloned ptr: */
+ struct hash_table *ptr_table;
+
+ /* List of phi sources. */
+ struct list_head phi_srcs;
+
+ /* new shader object, used as memctx for just about everything else: */
+ nir_shader *ns;
+} clone_state;
+
+static void
+init_clone_state(clone_state *state)
+{
+ state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ list_inithead(&state->phi_srcs);
+}
+
+static void
+free_clone_state(clone_state *state)
+{
+ _mesa_hash_table_destroy(state->ptr_table, NULL);
+}
+
+static void *
+lookup_ptr(clone_state *state, const void *ptr)
+{
+ struct hash_entry *entry;
+
+ if (!ptr)
+ return NULL;
+
+ entry = _mesa_hash_table_search(state->ptr_table, ptr);
+ assert(entry && "Failed to find pointer!");
+ if (!entry)
+ return NULL;
+
+ return entry->data;
+}
+
+static void
+store_ptr(clone_state *state, void *nptr, const void *ptr)
+{
+ _mesa_hash_table_insert(state->ptr_table, ptr, nptr);
+}
+
+static nir_constant *
+clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar)
+{
+ nir_constant *nc = ralloc(nvar, nir_constant);
+
+ nc->value = c->value;
+ nc->num_elements = c->num_elements;
+ nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
+ for (unsigned i = 0; i < c->num_elements; i++) {
+ nc->elements[i] = clone_constant(state, c->elements[i], nvar);
+ }
+
+ return nc;
+}
+
+/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
+ * having to deal with locals and globals separately:
+ */
+static nir_variable *
+clone_variable(clone_state *state, const nir_variable *var)
+{
+ nir_variable *nvar = rzalloc(state->ns, nir_variable);
+ store_ptr(state, nvar, var);
+
+ nvar->type = var->type;
+ nvar->name = ralloc_strdup(nvar, var->name);
+ nvar->data = var->data;
+ nvar->num_state_slots = var->num_state_slots;
+ nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
+ memcpy(nvar->state_slots, var->state_slots,
+ var->num_state_slots * sizeof(nir_state_slot));
+ if (var->constant_initializer) {
+ nvar->constant_initializer =
+ clone_constant(state, var->constant_initializer, nvar);
+ }
+ nvar->interface_type = var->interface_type;
+
+ return nvar;
+}
+
+/* clone list of nir_variable: */
+static void
+clone_var_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+{
+ exec_list_make_empty(dst);
+ foreach_list_typed(nir_variable, var, node, list) {
+ nir_variable *nvar = clone_variable(state, var);
+ exec_list_push_tail(dst, &nvar->node);
+ }
+}
+
+/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create()
+ * to avoid having to deal with locals and globals separately:
+ */
+static nir_register *
+clone_register(clone_state *state, const nir_register *reg)
+{
+ nir_register *nreg = rzalloc(state->ns, nir_register);
+ store_ptr(state, nreg, reg);
+
+ nreg->num_components = reg->num_components;
+ nreg->num_array_elems = reg->num_array_elems;
+ nreg->index = reg->index;
+ nreg->name = ralloc_strdup(nreg, reg->name);
+ nreg->is_global = reg->is_global;
+ nreg->is_packed = reg->is_packed;
+
+ /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
+ list_inithead(&nreg->uses);
+ list_inithead(&nreg->defs);
+ list_inithead(&nreg->if_uses);
+
+ return nreg;
+}
+
+/* clone list of nir_register: */
+static void
+clone_reg_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+{
+ exec_list_make_empty(dst);
+ foreach_list_typed(nir_register, reg, node, list) {
+ nir_register *nreg = clone_register(state, reg);
+ exec_list_push_tail(dst, &nreg->node);
+ }
+}
+
+static void
+__clone_src(clone_state *state, void *ninstr_or_if,
+ nir_src *nsrc, const nir_src *src)
+{
+ nsrc->is_ssa = src->is_ssa;
+ if (src->is_ssa) {
+ nsrc->ssa = lookup_ptr(state, src->ssa);
+ } else {
+ nsrc->reg.reg = lookup_ptr(state, src->reg.reg);
+ if (src->reg.indirect) {
+ nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src);
+ __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
+ }
+ nsrc->reg.base_offset = src->reg.base_offset;
+ }
+}
+
+static void
+__clone_dst(clone_state *state, nir_instr *ninstr,
+ nir_dest *ndst, const nir_dest *dst)
+{
+ ndst->is_ssa = dst->is_ssa;
+ if (dst->is_ssa) {
+ nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
+ store_ptr(state, &ndst->ssa, &dst->ssa);
+ } else {
+ ndst->reg.reg = lookup_ptr(state, dst->reg.reg);
+ if (dst->reg.indirect) {
+ ndst->reg.indirect = ralloc(ninstr, nir_src);
+ __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
+ }
+ ndst->reg.base_offset = dst->reg.base_offset;
+ }
+}
+
+static nir_deref *clone_deref(clone_state *state, const nir_deref *deref,
+ nir_instr *ninstr, nir_deref *parent);
+
+static nir_deref_var *
+clone_deref_var(clone_state *state, const nir_deref_var *dvar,
+ nir_instr *ninstr)
+{
+ nir_variable *nvar = lookup_ptr(state, dvar->var);
+ nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar);
+
+ if (dvar->deref.child)
+ ndvar->deref.child = clone_deref(state, dvar->deref.child,
+ ninstr, &ndvar->deref);
+
+ return ndvar;
+}
+
+static nir_deref_array *
+clone_deref_array(clone_state *state, const nir_deref_array *darr,
+ nir_instr *ninstr, nir_deref *parent)
+{
+ nir_deref_array *ndarr = nir_deref_array_create(parent);
+
+ ndarr->deref.type = darr->deref.type;
+ if (darr->deref.child)
+ ndarr->deref.child = clone_deref(state, darr->deref.child,
+ ninstr, &ndarr->deref);
+
+ ndarr->deref_array_type = darr->deref_array_type;
+ ndarr->base_offset = darr->base_offset;
+ if (ndarr->deref_array_type == nir_deref_array_type_indirect)
+ __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect);
+
+ return ndarr;
+}
+
+static nir_deref_struct *
+clone_deref_struct(clone_state *state, const nir_deref_struct *dstr,
+ nir_instr *ninstr, nir_deref *parent)
+{
+ nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index);
+
+ ndstr->deref.type = dstr->deref.type;
+ if (dstr->deref.child)
+ ndstr->deref.child = clone_deref(state, dstr->deref.child,
+ ninstr, &ndstr->deref);
+
+ return ndstr;
+}
+
+static nir_deref *
+clone_deref(clone_state *state, const nir_deref *dref,
+ nir_instr *ninstr, nir_deref *parent)
+{
+ switch (dref->deref_type) {
+ case nir_deref_type_array:
+ return &clone_deref_array(state, nir_deref_as_array(dref),
+ ninstr, parent)->deref;
+ case nir_deref_type_struct:
+ return &clone_deref_struct(state, nir_deref_as_struct(dref),
+ ninstr, parent)->deref;
+ default:
+ unreachable("bad deref type");
+ return NULL;
+ }
+}
+
+static nir_alu_instr *
+clone_alu(clone_state *state, const nir_alu_instr *alu)
+{
+ nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
+
+ __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
+ nalu->dest.saturate = alu->dest.saturate;
+ nalu->dest.write_mask = alu->dest.write_mask;
+
+ for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+ __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src);
+ nalu->src[i].negate = alu->src[i].negate;
+ nalu->src[i].abs = alu->src[i].abs;
+ memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
+ sizeof(nalu->src[i].swizzle));
+ }
+
+ return nalu;
+}
+
+static nir_intrinsic_instr *
+clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr)
+{
+ nir_intrinsic_instr *nitr =
+ nir_intrinsic_instr_create(state->ns, itr->intrinsic);
+
+ unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables;
+ unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
+
+ if (nir_intrinsic_infos[itr->intrinsic].has_dest)
+ __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest);
+
+ nitr->num_components = itr->num_components;
+ memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
+
+ for (unsigned i = 0; i < num_variables; i++) {
+ nitr->variables[i] = clone_deref_var(state, itr->variables[i],
+ &nitr->instr);
+ }
+
+ for (unsigned i = 0; i < num_srcs; i++)
+ __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]);
+
+ return nitr;
+}
+
+static nir_load_const_instr *
+clone_load_const(clone_state *state, const nir_load_const_instr *lc)
+{
+ nir_load_const_instr *nlc =
+ nir_load_const_instr_create(state->ns, lc->def.num_components);
+
+ memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
+
+ store_ptr(state, &nlc->def, &lc->def);
+
+ return nlc;
+}
+
+static nir_ssa_undef_instr *
+clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
+{
+ nir_ssa_undef_instr *nsa =
+ nir_ssa_undef_instr_create(state->ns, sa->def.num_components);
+
+ store_ptr(state, &nsa->def, &sa->def);
+
+ return nsa;
+}
+
+static nir_tex_instr *
+clone_tex(clone_state *state, const nir_tex_instr *tex)
+{
+ nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs);
+
+ ntex->sampler_dim = tex->sampler_dim;
+ ntex->dest_type = tex->dest_type;
+ ntex->op = tex->op;
+ __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest);
+ for (unsigned i = 0; i < ntex->num_srcs; i++) {
+ ntex->src[i].src_type = tex->src[i].src_type;
+ __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src);
+ }
+ ntex->coord_components = tex->coord_components;
+ ntex->is_array = tex->is_array;
+ ntex->is_shadow = tex->is_shadow;
+ ntex->is_new_style_shadow = tex->is_new_style_shadow;
+ memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset));
+ ntex->component = tex->component;
+ ntex->texture_index = tex->texture_index;
+ ntex->texture_array_size = tex->texture_array_size;
+ if (tex->texture)
+ ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr);
+ ntex->sampler_index = tex->sampler_index;
+ if (tex->sampler)
+ ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr);
+
+ return ntex;
+}
+
+static nir_phi_instr *
+clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk)
+{
+ nir_phi_instr *nphi = nir_phi_instr_create(state->ns);
+
+ __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest);
+
+ /* Cloning a phi node is a bit different from other instructions. The
+ * sources of phi instructions are the only time where we can use an SSA
+ * def before it is defined. In order to handle this, we just copy over
+ * the sources from the old phi instruction directly and then fix them up
+ * in a second pass once all the instrutions in the function have been
+ * properly cloned.
+ *
+ * In order to ensure that the copied sources (which are the same as the
+ * old phi instruction's sources for now) don't get inserted into the old
+ * shader's use-def lists, we have to add the phi instruction *before* we
+ * set up its sources.
+ */
+ nir_instr_insert_after_block(nblk, &nphi->instr);
+
+ foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
+ nir_phi_src *nsrc = ralloc(nphi, nir_phi_src);
+
+ /* Just copy the old source for now. */
+ memcpy(nsrc, src, sizeof(*src));
+
+ /* Since we're not letting nir_insert_instr handle use/def stuff for us,
+ * we have to set the parent_instr manually. It doesn't really matter
+ * when we do it, so we might as well do it here.
+ */
+ nsrc->src.parent_instr = &nphi->instr;
+
+ /* Stash it in the list of phi sources. We'll walk this list and fix up
+ * sources at the very end of clone_function_impl.
+ */
+ list_add(&nsrc->src.use_link, &state->phi_srcs);
+
+ exec_list_push_tail(&nphi->srcs, &nsrc->node);
+ }
+
+ return nphi;
+}
+
+static nir_jump_instr *
+clone_jump(clone_state *state, const nir_jump_instr *jmp)
+{
+ nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type);
+
+ return njmp;
+}
+
+static nir_call_instr *
+clone_call(clone_state *state, const nir_call_instr *call)
+{
+ nir_function_overload *ncallee = lookup_ptr(state, call->callee);
+ nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee);
+
+ for (unsigned i = 0; i < ncall->num_params; i++)
+ ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr);
+
+ ncall->return_deref = clone_deref_var(state, call->return_deref,
+ &ncall->instr);
+
+ return ncall;
+}
+
+static nir_instr *
+clone_instr(clone_state *state, const nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ return &clone_alu(state, nir_instr_as_alu(instr))->instr;
+ case nir_instr_type_intrinsic:
+ return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr;
+ case nir_instr_type_load_const:
+ return &clone_load_const(state, nir_instr_as_load_const(instr))->instr;
+ case nir_instr_type_ssa_undef:
+ return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr;
+ case nir_instr_type_tex:
+ return &clone_tex(state, nir_instr_as_tex(instr))->instr;
+ case nir_instr_type_phi:
+ unreachable("Cannot clone phis with clone_instr");
+ case nir_instr_type_jump:
+ return &clone_jump(state, nir_instr_as_jump(instr))->instr;
+ case nir_instr_type_call:
+ return &clone_call(state, nir_instr_as_call(instr))->instr;
+ case nir_instr_type_parallel_copy:
+ unreachable("Cannot clone parallel copies");
+ default:
+ unreachable("bad instr type");
+ return NULL;
+ }
+}
+
+static nir_block *
+clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk)
+{
+ /* Don't actually create a new block. Just use the one from the tail of
+ * the list. NIR guarantees that the tail of the list is a block and that
+ * no two blocks are side-by-side in the IR; It should be empty.
+ */
+ nir_block *nblk =
+ exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
+ assert(nblk->cf_node.type == nir_cf_node_block);
+ assert(exec_list_is_empty(&nblk->instr_list));
+
+ /* We need this for phi sources */
+ store_ptr(state, nblk, blk);
+
+ nir_foreach_instr(blk, instr) {
+ if (instr->type == nir_instr_type_phi) {
+ /* Phi instructions are a bit of a special case when cloning because
+ * we don't want inserting the instruction to automatically handle
+ * use/defs for us. Instead, we need to wait until all the
+ * blocks/instructions are in so that we can set their sources up.
+ */
+ clone_phi(state, nir_instr_as_phi(instr), nblk);
+ } else {
+ nir_instr *ninstr = clone_instr(state, instr);
+ nir_instr_insert_after_block(nblk, ninstr);
+ }
+ }
+
+ return nblk;
+}
+
+static void
+clone_cf_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list);
+
+static nir_if *
+clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i)
+{
+ nir_if *ni = nir_if_create(state->ns);
+
+ __clone_src(state, ni, &ni->condition, &i->condition);
+
+ nir_cf_node_insert_end(cf_list, &ni->cf_node);
+
+ clone_cf_list(state, &ni->then_list, &i->then_list);
+ clone_cf_list(state, &ni->else_list, &i->else_list);
+
+ return ni;
+}
+
+static nir_loop *
+clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
+{
+ nir_loop *nloop = nir_loop_create(state->ns);
+
+ nir_cf_node_insert_end(cf_list, &nloop->cf_node);
+
+ clone_cf_list(state, &nloop->body, &loop->body);
+
+ return nloop;
+}
+
+/* clone list of nir_cf_node: */
+static void
+clone_cf_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+{
+ foreach_list_typed(nir_cf_node, cf, node, list) {
+ switch (cf->type) {
+ case nir_cf_node_block:
+ clone_block(state, dst, nir_cf_node_as_block(cf));
+ break;
+ case nir_cf_node_if:
+ clone_if(state, dst, nir_cf_node_as_if(cf));
+ break;
+ case nir_cf_node_loop:
+ clone_loop(state, dst, nir_cf_node_as_loop(cf));
+ break;
+ default:
+ unreachable("bad cf type");
+ }
+ }
+}
+
+static nir_function_impl *
+clone_function_impl(clone_state *state, const nir_function_impl *fi,
+ nir_function_overload *nfo)
+{
+ nir_function_impl *nfi = nir_function_impl_create(nfo);
+
+ clone_var_list(state, &nfi->locals, &fi->locals);
+ clone_reg_list(state, &nfi->registers, &fi->registers);
+ nfi->reg_alloc = fi->reg_alloc;
+
+ nfi->num_params = fi->num_params;
+ nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
+ for (unsigned i = 0; i < fi->num_params; i++) {
+ nfi->params[i] = lookup_ptr(state, fi->params[i]);
+ }
+ nfi->return_var = lookup_ptr(state, fi->return_var);
+
+ assert(list_empty(&state->phi_srcs));
+
+ clone_cf_list(state, &nfi->body, &fi->body);
+
+ /* After we've cloned almost everything, we have to walk the list of phi
+ * sources and fix them up. Thanks to loops, the block and SSA value for a
+ * phi source may not be defined when we first encounter it. Instead, we
+ * add it to the phi_srcs list and we fix it up here.
+ */
+ list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) {
+ src->pred = lookup_ptr(state, src->pred);
+ assert(src->src.is_ssa);
+ src->src.ssa = lookup_ptr(state, src->src.ssa);
+
+ /* Remove from this list and place in the uses of the SSA def */
+ list_del(&src->src.use_link);
+ list_addtail(&src->src.use_link, &src->src.ssa->uses);
+ }
+ assert(list_empty(&state->phi_srcs));
+
+ /* All metadata is invalidated in the cloning process */
+ nfi->valid_metadata = 0;
+
+ return nfi;
+}
+
+static nir_function_overload *
+clone_function_overload(clone_state *state, const nir_function_overload *fo,
+ nir_function *nfxn)
+{
+ nir_function_overload *nfo = nir_function_overload_create(nfxn);
+
+ /* Needed for call instructions */
+ store_ptr(state, nfo, fo);
+
+ nfo->num_params = fo->num_params;
+ nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params);
+ memcpy(nfo->params, fo->params, sizeof(nir_parameter) * fo->num_params);
+
+ nfo->return_type = fo->return_type;
+
+ /* At first glance, it looks like we should clone the function_impl here.
+ * However, call instructions need to be able to reference at least the
+ * overload and those will get processed as we clone the function_impl's.
+ * We stop here and do function_impls as a second pass.
+ */
+
+ return nfo;
+}
+
+static nir_function *
+clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns)
+{
+ assert(ns == state->ns);
+ nir_function *nfxn = nir_function_create(ns, fxn->name);
+
+ foreach_list_typed(nir_function_overload, fo, node, &fxn->overload_list)
+ clone_function_overload(state, fo, nfxn);
+
+ return nfxn;
+}
+
+nir_shader *
+nir_shader_clone(void *mem_ctx, const nir_shader *s)
+{
+ clone_state state;
+ init_clone_state(&state);
+
+ nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options);
+ state.ns = ns;
+
+ clone_var_list(&state, &ns->uniforms, &s->uniforms);
+ clone_var_list(&state, &ns->inputs, &s->inputs);
+ clone_var_list(&state, &ns->outputs, &s->outputs);
+ clone_var_list(&state, &ns->globals, &s->globals);
+ clone_var_list(&state, &ns->system_values, &s->system_values);
+
+ /* Go through and clone functions and overloads */
+ foreach_list_typed(nir_function, fxn, node, &s->functions)
+ clone_function(&state, fxn, ns);
+
+ /* Only after all overloads are cloned can we clone the actual function
+ * implementations. This is because nir_call_instr's need to reference the
+ * overloads of other functions and we don't know what order the functions
+ * will have in the list.
+ */
+ nir_foreach_overload(s, fo) {
+ nir_function_overload *nfo = lookup_ptr(&state, fo);
+ clone_function_impl(&state, fo->impl, nfo);
+ }
+
+ clone_reg_list(&state, &ns->registers, &s->registers);
+ ns->reg_alloc = s->reg_alloc;
+
+ ns->info = s->info;
+ ns->info.name = ralloc_strdup(ns, ns->info.name);
+ if (ns->info.label)
+ ns->info.label = ralloc_strdup(ns, ns->info.label);
+
+ ns->num_inputs = s->num_inputs;
+ ns->num_uniforms = s->num_uniforms;
+ ns->num_outputs = s->num_outputs;
+
+ free_clone_state(&state);
+
+ return ns;
+}
diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py
index 2ba8554645d..b16ef503c92 100644
--- a/src/glsl/nir/nir_constant_expressions.py
+++ b/src/glsl/nir/nir_constant_expressions.py
@@ -213,7 +213,7 @@ unpack_half_1x16(uint16_t u)
}
/* Some typed vector structures to make things like src0.y work */
-% for type in ["float", "int", "unsigned", "bool"]:
+% for type in ["float", "int", "uint", "bool"]:
struct ${type}_vec {
${type} x;
${type} y;
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 0a134aff211..de30db61eea 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -244,6 +244,7 @@ SYSTEM_VALUE(local_invocation_id, 3, 0)
SYSTEM_VALUE(work_group_id, 3, 0)
SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
SYSTEM_VALUE(num_work_groups, 3, 0)
+SYSTEM_VALUE(helper_invocation, 1, 0)
/*
* The format of the indices depends on the type of the load. For uniforms,
diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c
index 31ccfb2c02b..c58c7785b3f 100644
--- a/src/glsl/nir/nir_lower_clip.c
+++ b/src/glsl/nir/nir_lower_clip.c
@@ -55,9 +55,11 @@ create_clipdist_var(nir_shader *shader, unsigned drvloc,
if (output) {
exec_list_push_tail(&shader->outputs, &var->node);
+ shader->num_outputs++; /* TODO use type_size() */
}
else {
exec_list_push_tail(&shader->inputs, &var->node);
+ shader->num_inputs++; /* TODO use type_size() */
}
return var;
}
diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c
index c961178c53a..f64b3eac8a0 100644
--- a/src/glsl/nir/nir_lower_idiv.c
+++ b/src/glsl/nir/nir_lower_idiv.c
@@ -52,10 +52,8 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
bld->cursor = nir_before_instr(&alu->instr);
- numer = nir_ssa_for_src(bld, alu->src[0].src,
- nir_ssa_alu_instr_src_components(alu, 0));
- denom = nir_ssa_for_src(bld, alu->src[1].src,
- nir_ssa_alu_instr_src_components(alu, 1));
+ numer = nir_ssa_for_alu_src(bld, alu, 0);
+ denom = nir_ssa_for_alu_src(bld, alu, 1);
if (is_signed) {
af = nir_i2f(bld, numer);
@@ -96,7 +94,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
r = nir_imul(bld, q, b);
r = nir_isub(bld, a, r);
- r = nir_ige(bld, r, b);
+ r = nir_uge(bld, r, b);
r = nir_b2i(bld, r);
q = nir_iadd(bld, q, r);
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 00a31458310..5683e69d865 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -197,7 +197,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
nir_variable_mode mode = intrin->variables[0]->var->data.mode;
- if (state->mode != -1 && state->mode != mode)
+ if (state->mode != nir_var_all && state->mode != mode)
continue;
if (mode != nir_var_shader_in &&
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index 8aaa48ab568..93ebf8e78a9 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -41,6 +41,7 @@
typedef struct {
nir_builder b;
const nir_lower_tex_options *options;
+ bool progress;
} lower_tex_state;
static void
@@ -133,6 +134,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
txs->op = nir_texop_txs;
txs->sampler_dim = GLSL_SAMPLER_DIM_RECT;
txs->sampler_index = tex->sampler_index;
+ txs->dest_type = nir_type_int;
/* only single src, the lod: */
txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
@@ -213,6 +215,66 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
}
}
+static nir_ssa_def *
+get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
+{
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+
+ if (swizzle_val == 4) {
+ v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0;
+ } else {
+ assert(swizzle_val == 5);
+ if (type == nir_type_float)
+ v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0;
+ else
+ v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1;
+ }
+
+ return nir_build_imm(b, 4, v);
+}
+
+static void
+swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
+{
+ assert(tex->dest.is_ssa);
+
+ b->cursor = nir_after_instr(&tex->instr);
+
+ nir_ssa_def *swizzled;
+ if (tex->op == nir_texop_tg4) {
+ if (swizzle[tex->component] < 4) {
+ /* This one's easy */
+ tex->component = swizzle[tex->component];
+ return;
+ } else {
+ swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
+ }
+ } else {
+ assert(nir_tex_instr_dest_size(tex) == 4);
+ if (swizzle[0] < 4 && swizzle[1] < 4 &&
+ swizzle[2] < 4 && swizzle[3] < 4) {
+ unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
+ /* We have no 0's or 1's, just emit a swizzling MOV */
+ swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
+ } else {
+ nir_ssa_def *srcs[4];
+ for (unsigned i = 0; i < 4; i++) {
+ if (swizzle[i] < 4) {
+ srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
+ } else {
+ srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
+ }
+ }
+ swizzled = nir_vec(b, srcs, 4);
+ }
+ }
+
+ nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
+ swizzled->parent_instr);
+}
+
static bool
nir_lower_tex_block(nir_block *block, void *void_state)
{
@@ -239,15 +301,28 @@ nir_lower_tex_block(nir_block *block, void *void_state)
/* If we are clamping any coords, we must lower projector first
* as clamping happens *after* projection:
*/
- if (lower_txp || sat_mask)
+ if (lower_txp || sat_mask) {
project_src(b, tex);
+ state->progress = true;
+ }
if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
- state->options->lower_rect)
+ state->options->lower_rect) {
lower_rect(b, tex);
+ state->progress = true;
+ }
- if (sat_mask)
+ if (sat_mask) {
saturate_src(b, tex, sat_mask);
+ state->progress = true;
+ }
+
+ if (((1 << tex->sampler_index) & state->options->swizzle_result) &&
+ !nir_tex_instr_is_query(tex) &&
+ !(tex->is_shadow && tex->is_new_style_shadow)) {
+ swizzle_result(b, tex, state->options->swizzles[tex->sampler_index]);
+ state->progress = true;
+ }
}
return true;
@@ -264,13 +339,17 @@ nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state)
nir_metadata_dominance);
}
-void
+bool
nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
{
lower_tex_state state;
state.options = options;
+ state.progress = false;
+
nir_foreach_overload(shader, overload) {
if (overload->impl)
nir_lower_tex_impl(overload->impl, &state);
}
+
+ return state.progress;
}
diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c
index db519bf513b..6995b9d6bc1 100644
--- a/src/glsl/nir/nir_lower_two_sided_color.c
+++ b/src/glsl/nir/nir_lower_two_sided_color.c
@@ -60,6 +60,8 @@ create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot)
exec_list_push_tail(&shader->inputs, &var->node);
+ shader->num_inputs++; /* TODO use type_size() */
+
return var;
}
diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c
index 6de981f430f..d5324b35a78 100644
--- a/src/glsl/nir/nir_metadata.c
+++ b/src/glsl/nir/nir_metadata.c
@@ -52,3 +52,39 @@ nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved)
{
impl->valid_metadata &= preserved;
}
+
+#ifdef DEBUG
+/**
+ * Make sure passes properly invalidate metadata (part 1).
+ *
+ * Call this before running a pass to set a bogus metadata flag, which will
+ * only be preserved if the pass forgets to call nir_metadata_preserve().
+ */
+void
+nir_metadata_set_validation_flag(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl) {
+ overload->impl->valid_metadata |= nir_metadata_not_properly_reset;
+ }
+ }
+}
+
+/**
+ * Make sure passes properly invalidate metadata (part 2).
+ *
+ * Call this after a pass makes progress to verify that the bogus metadata set by
+ * the earlier function was properly thrown away. Note that passes may not call
+ * nir_metadata_preserve() if they don't actually make any changes at all.
+ */
+void
+nir_metadata_check_validation_flag(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl) {
+ assert(!(overload->impl->valid_metadata &
+ nir_metadata_not_properly_reset));
+ }
+ }
+}
+#endif
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 3c0f1da94af..37d3dfc4588 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -91,7 +91,7 @@ class Opcode(object):
tfloat = "float"
tint = "int"
tbool = "bool"
-tunsigned = "unsigned"
+tuint = "uint"
commutative = "commutative "
associative = "associative "
@@ -156,7 +156,7 @@ unop("fsqrt", tfloat, "sqrtf(src0)")
unop("fexp2", tfloat, "exp2f(src0)")
unop("flog2", tfloat, "log2f(src0)")
unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
-unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
+unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion
unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
# Float-to-boolean conversion
unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
@@ -165,7 +165,7 @@ unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
unop_convert("i2b", tint, tbool, "src0 != 0")
unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
-unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
+unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion.
unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}")
@@ -205,13 +205,13 @@ unop("fddy_coarse", tfloat, "0.0f")
# Floating point pack and unpack operations.
def pack_2x16(fmt):
- unop_horiz("pack_" + fmt + "_2x16", 1, tunsigned, 2, tfloat, """
+ unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
dst.x = (uint32_t) pack_fmt_1x16(src0.x);
dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
""".replace("fmt", fmt))
def pack_4x8(fmt):
- unop_horiz("pack_" + fmt + "_4x8", 1, tunsigned, 4, tfloat, """
+ unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
dst.x = (uint32_t) pack_fmt_1x8(src0.x);
dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
@@ -219,13 +219,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
""".replace("fmt", fmt))
def unpack_2x16(fmt):
- unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tunsigned, """
+ unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
""".replace("fmt", fmt))
def unpack_4x8(fmt):
- unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tunsigned, """
+ unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
@@ -248,22 +248,22 @@ unpack_2x16("half")
# Lowered floating point unpacking operations.
-unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tunsigned,
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
"unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
-unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tunsigned,
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
"unpack_half_1x16((uint16_t)(src0.x >> 16))")
# Bit operations, part of ARB_gpu_shader5.
-unop("bitfield_reverse", tunsigned, """
+unop("bitfield_reverse", tuint, """
/* we're not winning any awards for speed here, but that's ok */
dst = 0;
for (unsigned bit = 0; bit < 32; bit++)
dst |= ((src0 >> bit) & 1) << (31 - bit);
""")
-unop("bit_count", tunsigned, """
+unop("bit_count", tuint, """
dst = 0;
for (unsigned bit = 0; bit < 32; bit++) {
if ((src0 >> bit) & 1)
@@ -271,7 +271,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
}
""")
-unop_convert("ufind_msb", tunsigned, tint, """
+unop_convert("ufind_msb", tuint, tint, """
dst = -1;
for (int bit = 31; bit > 0; bit--) {
if ((src0 >> bit) & 1) {
@@ -358,25 +358,25 @@ binop("imul", tint, commutative + associative, "src0 * src1")
binop("imul_high", tint, commutative,
"(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
# high 32-bits of unsigned integer multiply
-binop("umul_high", tunsigned, commutative,
+binop("umul_high", tuint, commutative,
"(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
binop("fdiv", tfloat, "", "src0 / src1")
binop("idiv", tint, "", "src0 / src1")
-binop("udiv", tunsigned, "", "src0 / src1")
+binop("udiv", tuint, "", "src0 / src1")
# returns a boolean representing the carry resulting from the addition of
# the two unsigned arguments.
-binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0")
+binop_convert("uadd_carry", tbool, tuint, commutative, "src0 + src1 < src0")
# returns a boolean representing the borrow resulting from the subtraction
# of the two unsigned arguments.
-binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0")
+binop_convert("usub_borrow", tbool, tuint, "", "src1 < src0")
binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
-binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
+binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1")
#
# Comparisons
@@ -393,8 +393,8 @@ binop_compare("ilt", tint, "", "src0 < src1")
binop_compare("ige", tint, "", "src0 >= src1")
binop_compare("ieq", tint, commutative, "src0 == src1")
binop_compare("ine", tint, commutative, "src0 != src1")
-binop_compare("ult", tunsigned, "", "src0 < src1")
-binop_compare("uge", tunsigned, "", "src0 >= src1")
+binop_compare("ult", tuint, "", "src0 < src1")
+binop_compare("uge", tuint, "", "src0 >= src1")
# integer-aware GLSL-style comparisons that compare floats and ints
@@ -425,7 +425,7 @@ binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not E
binop("ishl", tint, "", "src0 << src1")
binop("ishr", tint, "", "src0 >> src1")
-binop("ushr", tunsigned, "", "src0 >> src1")
+binop("ushr", tuint, "", "src0 >> src1")
# bitwise logic operators
#
@@ -433,9 +433,9 @@ binop("ushr", tunsigned, "", "src0 >> src1")
# integers.
-binop("iand", tunsigned, commutative + associative, "src0 & src1")
-binop("ior", tunsigned, commutative + associative, "src0 | src1")
-binop("ixor", tunsigned, commutative + associative, "src0 ^ src1")
+binop("iand", tuint, commutative + associative, "src0 & src1")
+binop("ior", tuint, commutative + associative, "src0 | src1")
+binop("ixor", tuint, commutative + associative, "src0 ^ src1")
# floating point logic operators
@@ -463,10 +463,10 @@ opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "",
binop("fmin", tfloat, "", "fminf(src0, src1)")
binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
-binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
+binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1")
binop("fmax", tfloat, "", "fmaxf(src0, src1)")
binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
-binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
+binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
# Saturated vector add for 4 8bit ints.
binop("usadd_4x8", tint, commutative + associative, """
@@ -515,10 +515,10 @@ for (int i = 0; i < 32; i += 8) {
binop("fpow", tfloat, "", "powf(src0, src1)")
-binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
+binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
"pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
-binop_convert("bfm", tunsigned, tint, "", """
+binop_convert("bfm", tuint, tint, "", """
int offset = src0, bits = src1;
if (offset < 0 || bits < 0 || offset + bits > 32)
dst = 0; /* undefined per the spec */
@@ -535,7 +535,7 @@ if (!isnormal(dst))
# Combines the first component of each input to make a 2-component vector.
-binop_horiz("vec2", 2, tunsigned, 1, tunsigned, 1, tunsigned, """
+binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
dst.x = src0.x;
dst.y = src1.x;
""")
@@ -543,9 +543,9 @@ dst.y = src1.x;
def triop(name, ty, const_expr):
opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
- opcode(name, output_size, tunsigned,
+ opcode(name, output_size, tuint,
[src1_size, src2_size, src3_size],
- [tunsigned, tunsigned, tunsigned], "", const_expr)
+ [tuint, tuint, tuint], "", const_expr)
triop("ffma", tfloat, "src0 * src1 + src2")
@@ -559,11 +559,11 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
-opcode("bcsel", 0, tunsigned, [0, 0, 0],
- [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2")
+opcode("bcsel", 0, tuint, [0, 0, 0],
+ [tbool, tuint, tuint], "", "src0 ? src1 : src2")
-triop("bfi", tunsigned, """
-unsigned mask = src0, insert = src1 & mask, base = src2;
+triop("bfi", tuint, """
+unsigned mask = src0, insert = src1, base = src2;
if (mask == 0) {
dst = base;
} else {
@@ -572,12 +572,12 @@ if (mask == 0) {
tmp >>= 1;
insert <<= 1;
}
- dst = (base & ~mask) | insert;
+ dst = (base & ~mask) | (insert & mask);
}
""")
-opcode("ubitfield_extract", 0, tunsigned,
- [0, 1, 1], [tunsigned, tint, tint], "", """
+opcode("ubitfield_extract", 0, tuint,
+ [0, 1, 1], [tuint, tint, tint], "", """
unsigned base = src0;
int offset = src1.x, bits = src2.x;
if (bits == 0) {
@@ -611,13 +611,13 @@ dst.z = src2.x;
def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
src4_size, const_expr):
- opcode(name, output_size, tunsigned,
+ opcode(name, output_size, tuint,
[src1_size, src2_size, src3_size, src4_size],
- [tunsigned, tunsigned, tunsigned, tunsigned],
+ [tuint, tuint, tuint, tuint],
"", const_expr)
-opcode("bitfield_insert", 0, tunsigned, [0, 0, 1, 1],
- [tunsigned, tunsigned, tint, tint], "", """
+opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1],
+ [tuint, tuint, tint, tint], "", """
unsigned base = src0, insert = src1;
int offset = src2.x, bits = src3.x;
if (bits == 0) {
diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c
index 7d8bdd7f2ca..cfc8e331128 100644
--- a/src/glsl/nir/nir_opt_copy_propagate.c
+++ b/src/glsl/nir/nir_opt_copy_propagate.c
@@ -55,10 +55,15 @@ static bool is_move(nir_alu_instr *instr)
static bool is_vec(nir_alu_instr *instr)
{
- for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
if (!instr->src[i].src.is_ssa)
return false;
+ /* we handle modifiers in a separate pass */
+ if (instr->src[i].abs || instr->src[i].negate)
+ return false;
+ }
+
return instr->op == nir_op_vec2 ||
instr->op == nir_op_vec3 ||
instr->op == nir_op_vec4;
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 2db209d434d..76bfc47c2a0 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -512,7 +512,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
case nir_texop_texture_samples:
fprintf(fp, "texture_samples ");
break;
-
+ case nir_texop_samples_identical:
+ fprintf(fp, "samples_identical ");
+ break;
default:
unreachable("Invalid texture operation");
break;
@@ -985,6 +987,16 @@ nir_print_shader(nir_shader *shader, FILE *fp)
fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
+ if (shader->info.name)
+ fprintf(fp, "name: %s\n", shader->info.name);
+
+ if (shader->info.label)
+ fprintf(fp, "label: %s\n", shader->info.label);
+
+ fprintf(fp, "inputs: %u\n", shader->num_inputs);
+ fprintf(fp, "outputs: %u\n", shader->num_outputs);
+ fprintf(fp, "uniforms: %u\n", shader->num_uniforms);
+
nir_foreach_variable(var, &shader->uniforms) {
print_var_decl(var, &state);
}
diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c
index bb154407914..56d7e8162f3 100644
--- a/src/glsl/nir/nir_search.c
+++ b/src/glsl/nir/nir_search.c
@@ -166,7 +166,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
}
return true;
case nir_type_int:
- case nir_type_unsigned:
+ case nir_type_uint:
case nir_type_bool:
for (unsigned i = 0; i < num_components; ++i) {
if (load->value.i[new_swizzle[i]] != const_val->data.i)
@@ -310,7 +310,7 @@ construct_value(const nir_search_value *value, nir_alu_type type,
load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
load->value.i[0] = c->data.i;
break;
- case nir_type_unsigned:
+ case nir_type_uint:
case nir_type_bool:
load->value.u[0] = c->data.u;
break;
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index ed374b921fa..06879d64ee2 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -290,11 +290,11 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
{
assert(instr->op < nir_num_opcodes);
- validate_alu_dest(&instr->dest, state);
-
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
validate_alu_src(instr, i, state);
}
+
+ validate_alu_dest(&instr->dest, state);
}
static void
@@ -375,6 +375,11 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
validate_src(&instr->src[i], state);
}
+ unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+ for (unsigned i = 0; i < num_vars; i++) {
+ validate_deref_var(instr, instr->variables[i], state);
+ }
+
if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
unsigned components_written =
nir_intrinsic_infos[instr->intrinsic].dest_components;
@@ -392,11 +397,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
validate_dest(&instr->dest, state);
}
- unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
- for (unsigned i = 0; i < num_vars; i++) {
- validate_deref_var(instr, instr->variables[i], state);
- }
-
switch (instr->intrinsic) {
case nir_intrinsic_load_var: {
const struct glsl_type *type =
@@ -434,8 +434,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
static void
validate_tex_instr(nir_tex_instr *instr, validate_state *state)
{
- validate_dest(&instr->dest, state);
-
bool src_type_seen[nir_num_tex_src_types];
for (unsigned i = 0; i < nir_num_tex_src_types; i++)
src_type_seen[i] = false;
@@ -448,6 +446,8 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
if (instr->sampler != NULL)
validate_deref_var(instr, instr->sampler, state);
+
+ validate_dest(&instr->dest, state);
}
static void
diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c
index 70610ca0f66..86282d25e0a 100644
--- a/src/glsl/nir/spirv_to_nir.c
+++ b/src/glsl/nir/spirv_to_nir.c
@@ -2026,7 +2026,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
switch (glsl_get_sampler_result_type(sampler_type)) {
case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break;
case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break;
- case GLSL_TYPE_UINT: instr->dest_type = nir_type_unsigned; break;
+ case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break;
case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break;
default:
unreachable("Invalid base type for sampler result");
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp
index e38a0e93058..cd58213c019 100644
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -275,6 +275,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+ case ir_samples_identical:
break;
case ir_txb:
if (do_graft(&ir->lod_info.bias))
diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am
index e64955e3b3e..00925455b07 100644
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -133,6 +133,8 @@ if HAVE_DRI3
libglx_la_SOURCES += \
dri3_glx.c \
dri3_priv.h
+
+libglx_la_LIBADD += $(top_builddir)/src/loader/libloader_dri3_helper.la
endif
if HAVE_APPLEDRI
diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index 96f13e6a07b..ee243126731 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -78,40 +78,109 @@
#include "loader.h"
#include "dri2.h"
-static const struct glx_context_vtable dri3_context_vtable;
+static struct dri3_drawable *
+loader_drawable_to_dri3_drawable(struct loader_dri3_drawable *draw) {
+ size_t offset = offsetof(struct dri3_drawable, loader_drawable);
+ return (struct dri3_drawable *)(((void*) draw) - offset);
+}
-static inline void
-dri3_fence_reset(xcb_connection_t *c, struct dri3_buffer *buffer)
+static int
+glx_dri3_get_swap_interval(struct loader_dri3_drawable *draw)
{
- xshmfence_reset(buffer->shm_fence);
+ struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw);
+
+ return priv->swap_interval;
}
-static inline void
-dri3_fence_set(struct dri3_buffer *buffer)
+static int
+glx_dri3_clamp_swap_interval(struct loader_dri3_drawable *draw, int interval)
{
- xshmfence_trigger(buffer->shm_fence);
+ return interval;
}
-static inline void
-dri3_fence_trigger(xcb_connection_t *c, struct dri3_buffer *buffer)
+static void
+glx_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
{
- xcb_sync_trigger_fence(c, buffer->sync_fence);
+ struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw);
+
+ priv->swap_interval = interval;
}
-static inline void
-dri3_fence_await(xcb_connection_t *c, struct dri3_buffer *buffer)
+static void
+glx_dri3_set_drawable_size(struct loader_dri3_drawable *draw,
+ int width, int height)
{
- xcb_flush(c);
- xshmfence_await(buffer->shm_fence);
+ /* Nothing to do */
}
-static inline Bool
-dri3_fence_triggered(struct dri3_buffer *buffer)
+static bool
+glx_dri3_in_current_context(struct loader_dri3_drawable *draw)
{
- return xshmfence_query(buffer->shm_fence);
+ struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw);
+ struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext();
+ struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
+
+ return (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base;
+}
+
+static __DRIcontext *
+glx_dri3_get_dri_context(struct loader_dri3_drawable *draw)
+{
+ struct glx_context *gc = __glXGetCurrentContext();
+
+ if (gc) {
+ struct dri3_context *dri3Ctx = (struct dri3_context *) gc;
+ return dri3Ctx->driContext;
+ }
+
+ return NULL;
}
static void
+glx_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
+{
+ loader_dri3_flush(draw, flags, __DRI2_THROTTLE_SWAPBUFFER);
+}
+
+static void
+glx_dri3_show_fps(struct loader_dri3_drawable *draw, uint64_t current_ust)
+{
+ struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw);
+ const uint64_t interval =
+ ((struct dri3_screen *) priv->base.psc)->show_fps_interval;
+
+ if (!interval)
+ return;
+
+ priv->frames++;
+
+ /* DRI3+Present together uses microseconds for UST. */
+ if (priv->previous_ust + interval * 1000000 <= current_ust) {
+ if (priv->previous_ust) {
+ fprintf(stderr, "libGL: FPS = %.1f\n",
+ ((uint64_t) priv->frames * 1000000) /
+ (double)(current_ust - priv->previous_ust));
+ }
+ priv->frames = 0;
+ priv->previous_ust = current_ust;
+ }
+}
+
+static struct loader_dri3_vtable glx_dri3_vtable = {
+ .get_swap_interval = glx_dri3_get_swap_interval,
+ .clamp_swap_interval = glx_dri3_clamp_swap_interval,
+ .set_swap_interval = glx_dri3_set_swap_interval,
+ .set_drawable_size = glx_dri3_set_drawable_size,
+ .in_current_context = glx_dri3_in_current_context,
+ .get_dri_context = glx_dri3_get_dri_context,
+ .flush_drawable = glx_dri3_flush_drawable,
+ .show_fps = glx_dri3_show_fps,
+};
+
+
+static const struct glx_context_vtable dri3_context_vtable;
+
+static void
dri3_destroy_context(struct glx_context *context)
{
struct dri3_context *pcp = (struct dri3_context *) context;
@@ -143,7 +212,8 @@ dri3_bind_context(struct glx_context *context, struct glx_context *old,
return GLXBadDrawable;
if (!(*psc->core->bindContext) (pcp->driContext,
- pdraw->driDrawable, pread->driDrawable))
+ pdraw->loader_drawable.dri_drawable,
+ pread->loader_drawable.dri_drawable))
return GLXBadContext;
return Success;
@@ -265,38 +335,12 @@ dri3_create_context(struct glx_screen *base,
}
static void
-dri3_free_render_buffer(struct dri3_drawable *pdraw, struct dri3_buffer *buffer);
-
-static void
-dri3_update_num_back(struct dri3_drawable *priv)
-{
- priv->num_back = 1;
- if (priv->flipping) {
- if (!priv->is_pixmap && !(priv->present_capabilities & XCB_PRESENT_CAPABILITY_ASYNC))
- priv->num_back++;
- priv->num_back++;
- }
- if (priv->swap_interval == 0)
- priv->num_back++;
-}
-
-static void
dri3_destroy_drawable(__GLXDRIdrawable *base)
{
- struct dri3_screen *psc = (struct dri3_screen *) base->psc;
struct dri3_drawable *pdraw = (struct dri3_drawable *) base;
- xcb_connection_t *c = XGetXCBConnection(pdraw->base.psc->dpy);
- int i;
-
- (*psc->core->destroyDrawable) (pdraw->driDrawable);
- for (i = 0; i < DRI3_NUM_BUFFERS; i++) {
- if (pdraw->buffers[i])
- dri3_free_render_buffer(pdraw, pdraw->buffers[i]);
- }
+ loader_dri3_drawable_fini(&pdraw->loader_drawable);
- if (pdraw->special_event)
- xcb_unregister_for_special_event(c, pdraw->special_event);
free(pdraw);
}
@@ -307,7 +351,6 @@ dri3_create_drawable(struct glx_screen *base, XID xDrawable,
struct dri3_drawable *pdraw;
struct dri3_screen *psc = (struct dri3_screen *) base;
__GLXDRIconfigPrivate *config = (__GLXDRIconfigPrivate *) config_base;
- GLint vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1;
pdraw = calloc(1, sizeof(*pdraw));
if (!pdraw)
@@ -317,158 +360,21 @@ dri3_create_drawable(struct glx_screen *base, XID xDrawable,
pdraw->base.xDrawable = xDrawable;
pdraw->base.drawable = drawable;
pdraw->base.psc = &psc->base;
- pdraw->swap_interval = 1; /* default may be overridden below */
- pdraw->have_back = 0;
- pdraw->have_fake_front = 0;
-
- if (psc->config)
- psc->config->configQueryi(psc->driScreen,
- "vblank_mode", &vblank_mode);
-
- switch (vblank_mode) {
- case DRI_CONF_VBLANK_NEVER:
- case DRI_CONF_VBLANK_DEF_INTERVAL_0:
- pdraw->swap_interval = 0;
- break;
- case DRI_CONF_VBLANK_DEF_INTERVAL_1:
- case DRI_CONF_VBLANK_ALWAYS_SYNC:
- default:
- pdraw->swap_interval = 1;
- break;
- }
-
- dri3_update_num_back(pdraw);
(void) __glXInitialize(psc->base.dpy);
- /* Create a new drawable */
- pdraw->driDrawable =
- (*psc->image_driver->createNewDrawable) (psc->driScreen,
- config->driConfig, pdraw);
-
- if (!pdraw->driDrawable) {
+ if (loader_dri3_drawable_init(XGetXCBConnection(base->dpy),
+ xDrawable, psc->driScreen,
+ psc->is_different_gpu, config->driConfig,
+ &psc->loader_dri3_ext, &glx_dri3_vtable,
+ &pdraw->loader_drawable)) {
free(pdraw);
return NULL;
}
- /*
- * Make sure server has the same swap interval we do for the new
- * drawable.
- */
- if (psc->vtable.setSwapInterval)
- psc->vtable.setSwapInterval(&pdraw->base, pdraw->swap_interval);
-
return &pdraw->base;
}
-static void
-show_fps(struct dri3_drawable *draw, uint64_t current_ust)
-{
- const uint64_t interval =
- ((struct dri3_screen *) draw->base.psc)->show_fps_interval;
-
- draw->frames++;
-
- /* DRI3+Present together uses microseconds for UST. */
- if (draw->previous_ust + interval * 1000000 <= current_ust) {
- if (draw->previous_ust) {
- fprintf(stderr, "libGL: FPS = %.1f\n",
- ((uint64_t) draw->frames * 1000000) /
- (double)(current_ust - draw->previous_ust));
- }
- draw->frames = 0;
- draw->previous_ust = current_ust;
- }
-}
-
-/*
- * Process one Present event
- */
-static void
-dri3_handle_present_event(struct dri3_drawable *priv, xcb_present_generic_event_t *ge)
-{
- struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
-
- switch (ge->evtype) {
- case XCB_PRESENT_CONFIGURE_NOTIFY: {
- xcb_present_configure_notify_event_t *ce = (void *) ge;
-
- priv->width = ce->width;
- priv->height = ce->height;
- break;
- }
- case XCB_PRESENT_COMPLETE_NOTIFY: {
- xcb_present_complete_notify_event_t *ce = (void *) ge;
-
- /* Compute the processed SBC number from the received 32-bit serial number merged
- * with the upper 32-bits of the sent 64-bit serial number while checking for
- * wrap
- */
- if (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) {
- priv->recv_sbc = (priv->send_sbc & 0xffffffff00000000LL) | ce->serial;
- if (priv->recv_sbc > priv->send_sbc)
- priv->recv_sbc -= 0x100000000;
- switch (ce->mode) {
- case XCB_PRESENT_COMPLETE_MODE_FLIP:
- priv->flipping = true;
- break;
- case XCB_PRESENT_COMPLETE_MODE_COPY:
- priv->flipping = false;
- break;
- }
- dri3_update_num_back(priv);
-
- if (psc->show_fps_interval)
- show_fps(priv, ce->ust);
-
- priv->ust = ce->ust;
- priv->msc = ce->msc;
- } else {
- priv->recv_msc_serial = ce->serial;
- priv->notify_ust = ce->ust;
- priv->notify_msc = ce->msc;
- }
- break;
- }
- case XCB_PRESENT_EVENT_IDLE_NOTIFY: {
- xcb_present_idle_notify_event_t *ie = (void *) ge;
- int b;
-
- for (b = 0; b < sizeof (priv->buffers) / sizeof (priv->buffers[0]); b++) {
- struct dri3_buffer *buf = priv->buffers[b];
-
- if (buf && buf->pixmap == ie->pixmap) {
- buf->busy = 0;
- if (priv->num_back <= b && b < DRI3_MAX_BACK) {
- dri3_free_render_buffer(priv, buf);
- priv->buffers[b] = NULL;
- }
- break;
- }
- }
- break;
- }
- }
- free(ge);
-}
-
-static bool
-dri3_wait_for_event(__GLXDRIdrawable *pdraw)
-{
- xcb_connection_t *c = XGetXCBConnection(pdraw->psc->dpy);
- struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
- xcb_generic_event_t *ev;
- xcb_present_generic_event_t *ge;
-
- xcb_flush(c);
- ev = xcb_wait_for_special_event(c, priv->special_event);
- if (!ev)
- return false;
- ge = (void *) ev;
- dri3_handle_present_event(priv, ge);
- return true;
-}
-
/** dri3_wait_for_msc
*
* Get the X server to send an event when the target msc/divisor/remainder is
@@ -478,32 +384,10 @@ static int
dri3_wait_for_msc(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
int64_t remainder, int64_t *ust, int64_t *msc, int64_t *sbc)
{
- xcb_connection_t *c = XGetXCBConnection(pdraw->psc->dpy);
struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
- uint32_t msc_serial;
-
- /* Ask for the an event for the target MSC */
- msc_serial = ++priv->send_msc_serial;
- xcb_present_notify_msc(c,
- priv->base.xDrawable,
- msc_serial,
- target_msc,
- divisor,
- remainder);
-
- xcb_flush(c);
-
- /* Wait for the event */
- if (priv->special_event) {
- while ((int32_t) (msc_serial - priv->recv_msc_serial) > 0) {
- if (!dri3_wait_for_event(pdraw))
- return 0;
- }
- }
- *ust = priv->notify_ust;
- *msc = priv->notify_msc;
- *sbc = priv->recv_sbc;
+ loader_dri3_wait_for_msc(&priv->loader_drawable, target_msc, divisor,
+ remainder, ust, msc, sbc);
return 1;
}
@@ -532,101 +416,8 @@ dri3_wait_for_sbc(__GLXDRIdrawable *pdraw, int64_t target_sbc, int64_t *ust,
{
struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
- /* From the GLX_OML_sync_control spec:
- *
- * "If <target_sbc> = 0, the function will block until all previous
- * swaps requested with glXSwapBuffersMscOML for that window have
- * completed."
- */
- if (!target_sbc)
- target_sbc = priv->send_sbc;
-
- while (priv->recv_sbc < target_sbc) {
- if (!dri3_wait_for_event(pdraw))
- return 0;
- }
-
- *ust = priv->ust;
- *msc = priv->msc;
- *sbc = priv->recv_sbc;
- return 1;
-}
-
-/**
- * Asks the driver to flush any queued work necessary for serializing with the
- * X command stream, and optionally the slightly more strict requirement of
- * glFlush() equivalence (which would require flushing even if nothing had
- * been drawn to a window system framebuffer, for example).
- */
-static void
-dri3_flush(struct dri3_screen *psc,
- struct dri3_drawable *draw,
- unsigned flags,
- enum __DRI2throttleReason throttle_reason)
-{
- struct glx_context *gc = __glXGetCurrentContext();
-
- if (gc) {
- struct dri3_context *dri3Ctx = (struct dri3_context *)gc;
-
- (*psc->f->flush_with_flags)(dri3Ctx->driContext, draw->driDrawable, flags, throttle_reason);
- }
-}
-
-static xcb_gcontext_t
-dri3_drawable_gc(struct dri3_drawable *priv)
-{
- if (!priv->gc) {
- uint32_t v;
- xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy);
-
- v = 0;
- xcb_create_gc(c,
- (priv->gc = xcb_generate_id(c)),
- priv->base.xDrawable,
- XCB_GC_GRAPHICS_EXPOSURES,
- &v);
- }
- return priv->gc;
-}
-
-static struct dri3_buffer *
-dri3_back_buffer(struct dri3_drawable *priv)
-{
- return priv->buffers[DRI3_BACK_ID(priv->cur_back)];
-}
-
-static struct dri3_buffer *
-dri3_fake_front_buffer(struct dri3_drawable *priv)
-{
- return priv->buffers[DRI3_FRONT_ID];
-}
-
-static void
-dri3_copy_area (xcb_connection_t *c /**< */,
- xcb_drawable_t src_drawable /**< */,
- xcb_drawable_t dst_drawable /**< */,
- xcb_gcontext_t gc /**< */,
- int16_t src_x /**< */,
- int16_t src_y /**< */,
- int16_t dst_x /**< */,
- int16_t dst_y /**< */,
- uint16_t width /**< */,
- uint16_t height /**< */)
-{
- xcb_void_cookie_t cookie;
-
- cookie = xcb_copy_area_checked(c,
- src_drawable,
- dst_drawable,
- gc,
- src_x,
- src_y,
- dst_x,
- dst_y,
- width,
- height);
- xcb_discard_reply(c, cookie.sequence);
+ return loader_dri3_wait_for_sbc(&priv->loader_drawable, target_sbc,
+ ust, msc, sbc);
}
static void
@@ -635,144 +426,27 @@ dri3_copy_sub_buffer(__GLXDRIdrawable *pdraw, int x, int y,
Bool flush)
{
struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
- struct dri3_screen *psc = (struct dri3_screen *) pdraw->psc;
- struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext();
- xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy);
- struct dri3_buffer *back;
-
- unsigned flags = __DRI2_FLUSH_DRAWABLE;
-
- /* Check we have the right attachments */
- if (!priv->have_back || priv->is_pixmap)
- return;
-
- if (flush)
- flags |= __DRI2_FLUSH_CONTEXT;
- dri3_flush(psc, priv, flags, __DRI2_THROTTLE_SWAPBUFFER);
-
- back = dri3_back_buffer(priv);
- y = priv->height - y - height;
-
- if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) {
- /* Update the linear buffer part of the back buffer
- * for the dri3_copy_area operation
- */
- psc->image->blitImage(pcp->driContext,
- back->linear_buffer,
- back->image,
- 0, 0, back->width,
- back->height,
- 0, 0, back->width,
- back->height, __BLIT_FLAG_FLUSH);
- /* We use blitImage to update our fake front,
- */
- if (priv->have_fake_front)
- psc->image->blitImage(pcp->driContext,
- dri3_fake_front_buffer(priv)->image,
- back->image,
- x, y, width, height,
- x, y, width, height, __BLIT_FLAG_FLUSH);
- }
-
- dri3_fence_reset(c, back);
- dri3_copy_area(c,
- dri3_back_buffer(priv)->pixmap,
- priv->base.xDrawable,
- dri3_drawable_gc(priv),
- x, y, x, y, width, height);
- dri3_fence_trigger(c, back);
- /* Refresh the fake front (if present) after we just damaged the real
- * front.
- */
- if (priv->have_fake_front && !psc->is_different_gpu) {
- dri3_fence_reset(c, dri3_fake_front_buffer(priv));
- dri3_copy_area(c,
- dri3_back_buffer(priv)->pixmap,
- dri3_fake_front_buffer(priv)->pixmap,
- dri3_drawable_gc(priv),
- x, y, x, y, width, height);
- dri3_fence_trigger(c, dri3_fake_front_buffer(priv));
- dri3_fence_await(c, dri3_fake_front_buffer(priv));
- }
- dri3_fence_await(c, back);
-}
-
-static void
-dri3_copy_drawable(struct dri3_drawable *priv, Drawable dest, Drawable src)
-{
- struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
- xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy);
- dri3_flush(psc, priv, __DRI2_FLUSH_DRAWABLE, 0);
-
- dri3_fence_reset(c, dri3_fake_front_buffer(priv));
- dri3_copy_area(c,
- src, dest,
- dri3_drawable_gc(priv),
- 0, 0, 0, 0, priv->width, priv->height);
- dri3_fence_trigger(c, dri3_fake_front_buffer(priv));
- dri3_fence_await(c, dri3_fake_front_buffer(priv));
+ loader_dri3_copy_sub_buffer(&priv->loader_drawable, x, y,
+ width, height, flush);
}
static void
dri3_wait_x(struct glx_context *gc)
{
- struct dri3_context *pcp = (struct dri3_context *) gc;
struct dri3_drawable *priv = (struct dri3_drawable *)
GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable);
- struct dri3_screen *psc;
- struct dri3_buffer *front;
- if (priv == NULL || !priv->have_fake_front)
- return;
-
- psc = (struct dri3_screen *) priv->base.psc;
- front = dri3_fake_front_buffer(priv);
-
- dri3_copy_drawable(priv, front->pixmap, priv->base.xDrawable);
-
- /* In the psc->is_different_gpu case, the linear buffer has been updated,
- * but not yet the tiled buffer.
- * Copy back to the tiled buffer we use for rendering.
- * Note that we don't need flushing.
- */
- if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base)
- psc->image->blitImage(pcp->driContext,
- front->image,
- front->linear_buffer,
- 0, 0, front->width,
- front->height,
- 0, 0, front->width,
- front->height, 0);
+ loader_dri3_wait_x(&priv->loader_drawable);
}
static void
dri3_wait_gl(struct glx_context *gc)
{
- struct dri3_context *pcp = (struct dri3_context *) gc;
struct dri3_drawable *priv = (struct dri3_drawable *)
GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable);
- struct dri3_screen *psc;
- struct dri3_buffer *front;
-
- if (priv == NULL || !priv->have_fake_front)
- return;
-
- psc = (struct dri3_screen *) priv->base.psc;
- front = dri3_fake_front_buffer(priv);
- /* In the psc->is_different_gpu case, we update the linear_buffer
- * before updating the real front.
- */
- if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base)
- psc->image->blitImage(pcp->driContext,
- front->linear_buffer,
- front->image,
- 0, 0, front->width,
- front->height,
- 0, 0, front->width,
- front->height, __BLIT_FLAG_FLUSH);
- dri3_copy_drawable(priv, priv->base.xDrawable, front->pixmap);
+ loader_dri3_wait_gl(&priv->loader_drawable);
}
/**
@@ -782,8 +456,8 @@ dri3_wait_gl(struct glx_context *gc)
static void
dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate)
{
- struct glx_context *gc;
- struct dri3_drawable *pdraw = loaderPrivate;
+ struct loader_dri3_drawable *draw = loaderPrivate;
+ struct dri3_drawable *pdraw = loader_drawable_to_dri3_drawable(draw);
struct dri3_screen *psc;
if (!pdraw)
@@ -796,699 +470,9 @@ dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate)
(void) __glXInitialize(psc->base.dpy);
- gc = __glXGetCurrentContext();
+ loader_dri3_flush(draw, __DRI2_FLUSH_DRAWABLE, __DRI2_THROTTLE_FLUSHFRONT);
- dri3_flush(psc, pdraw, __DRI2_FLUSH_DRAWABLE, __DRI2_THROTTLE_FLUSHFRONT);
-
- dri3_wait_gl(gc);
-}
-
-static uint32_t
-dri3_cpp_for_format(uint32_t format) {
- switch (format) {
- case __DRI_IMAGE_FORMAT_R8:
- return 1;
- case __DRI_IMAGE_FORMAT_RGB565:
- case __DRI_IMAGE_FORMAT_GR88:
- return 2;
- case __DRI_IMAGE_FORMAT_XRGB8888:
- case __DRI_IMAGE_FORMAT_ARGB8888:
- case __DRI_IMAGE_FORMAT_ABGR8888:
- case __DRI_IMAGE_FORMAT_XBGR8888:
- case __DRI_IMAGE_FORMAT_XRGB2101010:
- case __DRI_IMAGE_FORMAT_ARGB2101010:
- case __DRI_IMAGE_FORMAT_SARGB8:
- return 4;
- case __DRI_IMAGE_FORMAT_NONE:
- default:
- return 0;
- }
-}
-
-
-/** dri3_alloc_render_buffer
- *
- * Use the driver createImage function to construct a __DRIimage, then
- * get a file descriptor for that and create an X pixmap from that
- *
- * Allocate an xshmfence for synchronization
- */
-static struct dri3_buffer *
-dri3_alloc_render_buffer(struct glx_screen *glx_screen, Drawable draw,
- unsigned int format, int width, int height, int depth)
-{
- struct dri3_screen *psc = (struct dri3_screen *) glx_screen;
- Display *dpy = glx_screen->dpy;
- struct dri3_buffer *buffer;
- __DRIimage *pixmap_buffer;
- xcb_connection_t *c = XGetXCBConnection(dpy);
- xcb_pixmap_t pixmap;
- xcb_sync_fence_t sync_fence;
- struct xshmfence *shm_fence;
- int buffer_fd, fence_fd;
- int stride;
-
- /* Create an xshmfence object and
- * prepare to send that to the X server
- */
-
- fence_fd = xshmfence_alloc_shm();
- if (fence_fd < 0) {
- ErrorMessageF("DRI3 Fence object allocation failure %s\n", strerror(errno));
- return NULL;
- }
- shm_fence = xshmfence_map_shm(fence_fd);
- if (shm_fence == NULL) {
- ErrorMessageF("DRI3 Fence object map failure %s\n", strerror(errno));
- goto no_shm_fence;
- }
-
- /* Allocate the image from the driver
- */
- buffer = calloc(1, sizeof (struct dri3_buffer));
- if (!buffer)
- goto no_buffer;
-
- buffer->cpp = dri3_cpp_for_format(format);
- if (!buffer->cpp) {
- ErrorMessageF("DRI3 buffer format %d invalid\n", format);
- goto no_image;
- }
-
- if (!psc->is_different_gpu) {
- buffer->image = (*psc->image->createImage) (psc->driScreen,
- width, height,
- format,
- __DRI_IMAGE_USE_SHARE |
- __DRI_IMAGE_USE_SCANOUT,
- buffer);
- pixmap_buffer = buffer->image;
-
- if (!buffer->image) {
- ErrorMessageF("DRI3 gpu image creation failure\n");
- goto no_image;
- }
- } else {
- buffer->image = (*psc->image->createImage) (psc->driScreen,
- width, height,
- format,
- 0,
- buffer);
-
- if (!buffer->image) {
- ErrorMessageF("DRI3 other gpu image creation failure\n");
- goto no_image;
- }
-
- buffer->linear_buffer = (*psc->image->createImage) (psc->driScreen,
- width, height,
- format,
- __DRI_IMAGE_USE_SHARE |
- __DRI_IMAGE_USE_LINEAR,
- buffer);
- pixmap_buffer = buffer->linear_buffer;
-
- if (!buffer->linear_buffer) {
- ErrorMessageF("DRI3 gpu linear image creation failure\n");
- goto no_linear_buffer;
- }
- }
-
- /* X wants the stride, so ask the image for it
- */
- if (!(*psc->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_STRIDE, &stride)) {
- ErrorMessageF("DRI3 get image stride failed\n");
- goto no_buffer_attrib;
- }
-
- buffer->pitch = stride;
-
- if (!(*psc->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_FD, &buffer_fd)) {
- ErrorMessageF("DRI3 get image FD failed\n");
- goto no_buffer_attrib;
- }
-
- xcb_dri3_pixmap_from_buffer(c,
- (pixmap = xcb_generate_id(c)),
- draw,
- buffer->size,
- width, height, buffer->pitch,
- depth, buffer->cpp * 8,
- buffer_fd);
-
- xcb_dri3_fence_from_fd(c,
- pixmap,
- (sync_fence = xcb_generate_id(c)),
- false,
- fence_fd);
-
- buffer->pixmap = pixmap;
- buffer->own_pixmap = true;
- buffer->sync_fence = sync_fence;
- buffer->shm_fence = shm_fence;
- buffer->width = width;
- buffer->height = height;
-
- /* Mark the buffer as idle
- */
- dri3_fence_set(buffer);
-
- return buffer;
-
-no_buffer_attrib:
- (*psc->image->destroyImage)(pixmap_buffer);
-no_linear_buffer:
- if (psc->is_different_gpu)
- (*psc->image->destroyImage)(buffer->image);
-no_image:
- free(buffer);
-no_buffer:
- xshmfence_unmap_shm(shm_fence);
-no_shm_fence:
- close(fence_fd);
- ErrorMessageF("DRI3 alloc_render_buffer failed\n");
- return NULL;
-}
-
-/** dri3_free_render_buffer
- *
- * Free everything associated with one render buffer including pixmap, fence
- * stuff and the driver image
- */
-static void
-dri3_free_render_buffer(struct dri3_drawable *pdraw, struct dri3_buffer *buffer)
-{
- struct dri3_screen *psc = (struct dri3_screen *) pdraw->base.psc;
- xcb_connection_t *c = XGetXCBConnection(pdraw->base.psc->dpy);
-
- if (buffer->own_pixmap)
- xcb_free_pixmap(c, buffer->pixmap);
- xcb_sync_destroy_fence(c, buffer->sync_fence);
- xshmfence_unmap_shm(buffer->shm_fence);
- (*psc->image->destroyImage)(buffer->image);
- if (buffer->linear_buffer)
- (*psc->image->destroyImage)(buffer->linear_buffer);
- free(buffer);
-}
-
-
-/** dri3_flush_present_events
- *
- * Process any present events that have been received from the X server
- */
-static void
-dri3_flush_present_events(struct dri3_drawable *priv)
-{
- xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy);
-
- /* Check to see if any configuration changes have occurred
- * since we were last invoked
- */
- if (priv->special_event) {
- xcb_generic_event_t *ev;
-
- while ((ev = xcb_poll_for_special_event(c, priv->special_event)) != NULL) {
- xcb_present_generic_event_t *ge = (void *) ev;
- dri3_handle_present_event(priv, ge);
- }
- }
-}
-
-/** dri3_update_drawable
- *
- * Called the first time we use the drawable and then
- * after we receive present configure notify events to
- * track the geometry of the drawable
- */
-static int
-dri3_update_drawable(__DRIdrawable *driDrawable, void *loaderPrivate)
-{
- struct dri3_drawable *priv = loaderPrivate;
- xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy);
-
- /* First time through, go get the current drawable geometry
- */
- if (priv->width == 0 || priv->height == 0 || priv->depth == 0) {
- xcb_get_geometry_cookie_t geom_cookie;
- xcb_get_geometry_reply_t *geom_reply;
- xcb_void_cookie_t cookie;
- xcb_generic_error_t *error;
- xcb_present_query_capabilities_cookie_t present_capabilities_cookie;
- xcb_present_query_capabilities_reply_t *present_capabilities_reply;
-
-
- /* Try to select for input on the window.
- *
- * If the drawable is a window, this will get our events
- * delivered.
- *
- * Otherwise, we'll get a BadWindow error back from this request which
- * will let us know that the drawable is a pixmap instead.
- */
-
-
- cookie = xcb_present_select_input_checked(c,
- (priv->eid = xcb_generate_id(c)),
- priv->base.xDrawable,
- XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY|
- XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY|
- XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY);
-
- present_capabilities_cookie = xcb_present_query_capabilities(c, priv->base.xDrawable);
-
- /* Create an XCB event queue to hold present events outside of the usual
- * application event queue
- */
- priv->special_event = xcb_register_for_special_xge(c,
- &xcb_present_id,
- priv->eid,
- priv->stamp);
-
- geom_cookie = xcb_get_geometry(c, priv->base.xDrawable);
-
- geom_reply = xcb_get_geometry_reply(c, geom_cookie, NULL);
-
- if (!geom_reply)
- return false;
-
- priv->width = geom_reply->width;
- priv->height = geom_reply->height;
- priv->depth = geom_reply->depth;
- priv->is_pixmap = false;
-
- free(geom_reply);
-
- /* Check to see if our select input call failed. If it failed with a
- * BadWindow error, then assume the drawable is a pixmap. Destroy the
- * special event queue created above and mark the drawable as a pixmap
- */
-
- error = xcb_request_check(c, cookie);
-
- present_capabilities_reply = xcb_present_query_capabilities_reply(c,
- present_capabilities_cookie,
- NULL);
-
- if (present_capabilities_reply) {
- priv->present_capabilities = present_capabilities_reply->capabilities;
- free(present_capabilities_reply);
- } else
- priv->present_capabilities = 0;
-
- if (error) {
- if (error->error_code != BadWindow) {
- free(error);
- return false;
- }
- priv->is_pixmap = true;
- xcb_unregister_for_special_event(c, priv->special_event);
- priv->special_event = NULL;
- }
- }
- dri3_flush_present_events(priv);
- return true;
-}
-
-/* the DRIimage createImage function takes __DRI_IMAGE_FORMAT codes, while
- * the createImageFromFds call takes __DRI_IMAGE_FOURCC codes. To avoid
- * complete confusion, just deal in __DRI_IMAGE_FORMAT codes for now and
- * translate to __DRI_IMAGE_FOURCC codes in the call to createImageFromFds
- */
-static int
-image_format_to_fourcc(int format)
-{
-
- /* Convert from __DRI_IMAGE_FORMAT to __DRI_IMAGE_FOURCC (sigh) */
- switch (format) {
- case __DRI_IMAGE_FORMAT_SARGB8: return __DRI_IMAGE_FOURCC_SARGB8888;
- case __DRI_IMAGE_FORMAT_RGB565: return __DRI_IMAGE_FOURCC_RGB565;
- case __DRI_IMAGE_FORMAT_XRGB8888: return __DRI_IMAGE_FOURCC_XRGB8888;
- case __DRI_IMAGE_FORMAT_ARGB8888: return __DRI_IMAGE_FOURCC_ARGB8888;
- case __DRI_IMAGE_FORMAT_ABGR8888: return __DRI_IMAGE_FOURCC_ABGR8888;
- case __DRI_IMAGE_FORMAT_XBGR8888: return __DRI_IMAGE_FOURCC_XBGR8888;
- }
- return 0;
-}
-
-/** dri3_get_pixmap_buffer
- *
- * Get the DRM object for a pixmap from the X server and
- * wrap that with a __DRIimage structure using createImageFromFds
- */
-static struct dri3_buffer *
-dri3_get_pixmap_buffer(__DRIdrawable *driDrawable,
- unsigned int format,
- enum dri3_buffer_type buffer_type,
- void *loaderPrivate)
-{
- struct dri3_drawable *pdraw = loaderPrivate;
- int buf_id = dri3_pixmap_buf_id(buffer_type);
- struct dri3_buffer *buffer = pdraw->buffers[buf_id];
- Pixmap pixmap;
- xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie;
- xcb_dri3_buffer_from_pixmap_reply_t *bp_reply;
- int *fds;
- Display *dpy;
- struct dri3_screen *psc;
- xcb_connection_t *c;
- xcb_sync_fence_t sync_fence;
- struct xshmfence *shm_fence;
- int fence_fd;
- __DRIimage *image_planar;
- int stride, offset;
-
- if (buffer)
- return buffer;
-
- pixmap = pdraw->base.xDrawable;
- psc = (struct dri3_screen *) pdraw->base.psc;
- dpy = psc->base.dpy;
- c = XGetXCBConnection(dpy);
-
- buffer = calloc(1, sizeof (struct dri3_buffer));
- if (!buffer)
- goto no_buffer;
-
- fence_fd = xshmfence_alloc_shm();
- if (fence_fd < 0)
- goto no_fence;
- shm_fence = xshmfence_map_shm(fence_fd);
- if (shm_fence == NULL) {
- close (fence_fd);
- goto no_fence;
- }
-
- xcb_dri3_fence_from_fd(c,
- pixmap,
- (sync_fence = xcb_generate_id(c)),
- false,
- fence_fd);
-
- /* Get an FD for the pixmap object
- */
- bp_cookie = xcb_dri3_buffer_from_pixmap(c, pixmap);
- bp_reply = xcb_dri3_buffer_from_pixmap_reply(c, bp_cookie, NULL);
- if (!bp_reply)
- goto no_image;
- fds = xcb_dri3_buffer_from_pixmap_reply_fds(c, bp_reply);
-
- stride = bp_reply->stride;
- offset = 0;
-
- /* createImageFromFds creates a wrapper __DRIimage structure which
- * can deal with multiple planes for things like Yuv images. So, once
- * we've gotten the planar wrapper, pull the single plane out of it and
- * discard the wrapper.
- */
- image_planar = (*psc->image->createImageFromFds) (psc->driScreen,
- bp_reply->width,
- bp_reply->height,
- image_format_to_fourcc(format),
- fds, 1,
- &stride, &offset, buffer);
- close(fds[0]);
- if (!image_planar)
- goto no_image;
-
- buffer->image = (*psc->image->fromPlanar)(image_planar, 0, buffer);
-
- (*psc->image->destroyImage)(image_planar);
-
- if (!buffer->image)
- goto no_image;
-
- buffer->pixmap = pixmap;
- buffer->own_pixmap = false;
- buffer->width = bp_reply->width;
- buffer->height = bp_reply->height;
- buffer->buffer_type = buffer_type;
- buffer->shm_fence = shm_fence;
- buffer->sync_fence = sync_fence;
-
- pdraw->buffers[buf_id] = buffer;
- return buffer;
-
-no_image:
- xcb_sync_destroy_fence(c, sync_fence);
- xshmfence_unmap_shm(shm_fence);
-no_fence:
- free(buffer);
-no_buffer:
- return NULL;
-}
-
-/** dri3_find_back
- *
- * Find an idle back buffer. If there isn't one, then
- * wait for a present idle notify event from the X server
- */
-static int
-dri3_find_back(xcb_connection_t *c, struct dri3_drawable *priv)
-{
- int b;
- xcb_generic_event_t *ev;
- xcb_present_generic_event_t *ge;
-
- for (;;) {
- for (b = 0; b < priv->num_back; b++) {
- int id = DRI3_BACK_ID((b + priv->cur_back) % priv->num_back);
- struct dri3_buffer *buffer = priv->buffers[id];
-
- if (!buffer || !buffer->busy) {
- priv->cur_back = id;
- return id;
- }
- }
- xcb_flush(c);
- ev = xcb_wait_for_special_event(c, priv->special_event);
- if (!ev)
- return -1;
- ge = (void *) ev;
- dri3_handle_present_event(priv, ge);
- }
-}
-
-/** dri3_get_buffer
- *
- * Find a front or back buffer, allocating new ones as necessary
- */
-static struct dri3_buffer *
-dri3_get_buffer(__DRIdrawable *driDrawable,
- unsigned int format,
- enum dri3_buffer_type buffer_type,
- void *loaderPrivate)
-{
- struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext();
- struct dri3_drawable *priv = loaderPrivate;
- struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
- xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy);
- struct dri3_buffer *buffer;
- int buf_id;
-
- if (buffer_type == dri3_buffer_back) {
- buf_id = dri3_find_back(c, priv);
-
- if (buf_id < 0)
- return NULL;
- } else {
- buf_id = DRI3_FRONT_ID;
- }
-
- buffer = priv->buffers[buf_id];
-
- /* Allocate a new buffer if there isn't an old one, or if that
- * old one is the wrong size
- */
- if (!buffer || buffer->width != priv->width || buffer->height != priv->height) {
- struct dri3_buffer *new_buffer;
-
- /* Allocate the new buffers
- */
- new_buffer = dri3_alloc_render_buffer(priv->base.psc,
- priv->base.xDrawable,
- format, priv->width, priv->height, priv->depth);
- if (!new_buffer)
- return NULL;
-
- /* When resizing, copy the contents of the old buffer, waiting for that
- * copy to complete using our fences before proceeding
- */
- switch (buffer_type) {
- case dri3_buffer_back:
- if (buffer) {
- if (!buffer->linear_buffer) {
- dri3_fence_reset(c, new_buffer);
- dri3_fence_await(c, buffer);
- dri3_copy_area(c,
- buffer->pixmap,
- new_buffer->pixmap,
- dri3_drawable_gc(priv),
- 0, 0, 0, 0, priv->width, priv->height);
- dri3_fence_trigger(c, new_buffer);
- } else if ((&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) {
- psc->image->blitImage(pcp->driContext,
- new_buffer->image,
- buffer->image,
- 0, 0, priv->width,
- priv->height,
- 0, 0, priv->width,
- priv->height, 0);
- }
- dri3_free_render_buffer(priv, buffer);
- }
- break;
- case dri3_buffer_front:
- dri3_fence_reset(c, new_buffer);
- dri3_copy_area(c,
- priv->base.xDrawable,
- new_buffer->pixmap,
- dri3_drawable_gc(priv),
- 0, 0, 0, 0, priv->width, priv->height);
- dri3_fence_trigger(c, new_buffer);
-
- if (new_buffer->linear_buffer && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) {
- dri3_fence_await(c, new_buffer);
- psc->image->blitImage(pcp->driContext,
- new_buffer->image,
- new_buffer->linear_buffer,
- 0, 0, priv->width,
- priv->height,
- 0, 0, priv->width,
- priv->height, 0);
- }
- break;
- }
- buffer = new_buffer;
- buffer->buffer_type = buffer_type;
- priv->buffers[buf_id] = buffer;
- }
- dri3_fence_await(c, buffer);
-
- /* Return the requested buffer */
- return buffer;
-}
-
-/** dri3_free_buffers
- *
- * Free the front bufffer or all of the back buffers. Used
- * when the application changes which buffers it needs
- */
-static void
-dri3_free_buffers(__DRIdrawable *driDrawable,
- enum dri3_buffer_type buffer_type,
- void *loaderPrivate)
-{
- struct dri3_drawable *priv = loaderPrivate;
- struct dri3_buffer *buffer;
- int first_id;
- int n_id;
- int buf_id;
-
- switch (buffer_type) {
- case dri3_buffer_back:
- first_id = DRI3_BACK_ID(0);
- n_id = DRI3_MAX_BACK;
- break;
- case dri3_buffer_front:
- first_id = DRI3_FRONT_ID;
- n_id = 1;
- }
-
- for (buf_id = first_id; buf_id < first_id + n_id; buf_id++) {
- buffer = priv->buffers[buf_id];
- if (buffer) {
- dri3_free_render_buffer(priv, buffer);
- priv->buffers[buf_id] = NULL;
- }
- }
-}
-
-/** dri3_get_buffers
- *
- * The published buffer allocation API.
- * Returns all of the necessary buffers, allocating
- * as needed.
- */
-static int
-dri3_get_buffers(__DRIdrawable *driDrawable,
- unsigned int format,
- uint32_t *stamp,
- void *loaderPrivate,
- uint32_t buffer_mask,
- struct __DRIimageList *buffers)
-{
- struct dri3_drawable *priv = loaderPrivate;
- struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
- struct dri3_buffer *front, *back;
-
- buffers->image_mask = 0;
- buffers->front = NULL;
- buffers->back = NULL;
-
- front = NULL;
- back = NULL;
-
- if (!dri3_update_drawable(driDrawable, loaderPrivate))
- return false;
-
- /* pixmaps always have front buffers */
- if (priv->is_pixmap)
- buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
-
- if (buffer_mask & __DRI_IMAGE_BUFFER_FRONT) {
- /* All pixmaps are owned by the server gpu.
- * When we use a different gpu, we can't use the pixmap
- * as buffer since it is potentially tiled a way
- * our device can't understand. In this case, use
- * a fake front buffer. Hopefully the pixmap
- * content will get synced with the fake front
- * buffer.
- */
- if (priv->is_pixmap && !psc->is_different_gpu)
- front = dri3_get_pixmap_buffer(driDrawable,
- format,
- dri3_buffer_front,
- loaderPrivate);
- else
- front = dri3_get_buffer(driDrawable,
- format,
- dri3_buffer_front,
- loaderPrivate);
-
- if (!front)
- return false;
- } else {
- dri3_free_buffers(driDrawable, dri3_buffer_front, loaderPrivate);
- priv->have_fake_front = 0;
- }
-
- if (buffer_mask & __DRI_IMAGE_BUFFER_BACK) {
- back = dri3_get_buffer(driDrawable,
- format,
- dri3_buffer_back,
- loaderPrivate);
- if (!back)
- return false;
- priv->have_back = 1;
- } else {
- dri3_free_buffers(driDrawable, dri3_buffer_back, loaderPrivate);
- priv->have_back = 0;
- }
-
- if (front) {
- buffers->image_mask |= __DRI_IMAGE_BUFFER_FRONT;
- buffers->front = front->image;
- priv->have_fake_front = psc->is_different_gpu || !priv->is_pixmap;
- }
-
- if (back) {
- buffers->image_mask |= __DRI_IMAGE_BUFFER_BACK;
- buffers->back = back->image;
- }
-
- priv->stamp = stamp;
-
- return true;
+ loader_dri3_wait_gl(draw);
}
/* The image loader extension record for DRI3
@@ -1496,7 +480,7 @@ dri3_get_buffers(__DRIdrawable *driDrawable,
static const __DRIimageLoaderExtension imageLoaderExtension = {
.base = { __DRI_IMAGE_LOADER, 1 },
- .getBuffers = dri3_get_buffers,
+ .getBuffers = loader_dri3_get_buffers,
.flushFrontBuffer = dri3_flush_front_buffer,
};
@@ -1519,172 +503,25 @@ static int64_t
dri3_swap_buffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
int64_t remainder, Bool flush)
{
- struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext();
struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
- struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
- Display *dpy = priv->base.psc->dpy;
- xcb_connection_t *c = XGetXCBConnection(dpy);
- struct dri3_buffer *back;
- int64_t ret = 0;
- uint32_t options = XCB_PRESENT_OPTION_NONE;
-
unsigned flags = __DRI2_FLUSH_DRAWABLE;
+
if (flush)
flags |= __DRI2_FLUSH_CONTEXT;
- dri3_flush(psc, priv, flags, __DRI2_THROTTLE_SWAPBUFFER);
-
- back = priv->buffers[DRI3_BACK_ID(priv->cur_back)];
- if (psc->is_different_gpu && back) {
- /* Update the linear buffer before presenting the pixmap */
- psc->image->blitImage(pcp->driContext,
- back->linear_buffer,
- back->image,
- 0, 0, back->width,
- back->height,
- 0, 0, back->width,
- back->height, __BLIT_FLAG_FLUSH);
- /* Update the fake front */
- if (priv->have_fake_front)
- psc->image->blitImage(pcp->driContext,
- priv->buffers[DRI3_FRONT_ID]->image,
- back->image,
- 0, 0, priv->width,
- priv->height,
- 0, 0, priv->width,
- priv->height, __BLIT_FLAG_FLUSH);
- }
-
- dri3_flush_present_events(priv);
-
- if (back && !priv->is_pixmap) {
- dri3_fence_reset(c, back);
-
- /* Compute when we want the frame shown by taking the last known successful
- * MSC and adding in a swap interval for each outstanding swap request.
- * target_msc=divisor=remainder=0 means "Use glXSwapBuffers() semantic"
- */
- ++priv->send_sbc;
- if (target_msc == 0 && divisor == 0 && remainder == 0)
- target_msc = priv->msc + priv->swap_interval * (priv->send_sbc - priv->recv_sbc);
- else if (divisor == 0 && remainder > 0) {
- /* From the GLX_OML_sync_control spec:
- *
- * "If <divisor> = 0, the swap will occur when MSC becomes
- * greater than or equal to <target_msc>."
- *
- * Note that there's no mention of the remainder. The Present extension
- * throws BadValue for remainder != 0 with divisor == 0, so just drop
- * the passed in value.
- */
- remainder = 0;
- }
-
- /* From the GLX_EXT_swap_control spec:
- *
- * "If <interval> is set to a value of 0, buffer swaps are not
- * synchronized to a video frame."
- *
- * Implementation note: It is possible to enable triple buffering behaviour
- * by not using XCB_PRESENT_OPTION_ASYNC, but this should not be the default.
- */
- if (priv->swap_interval == 0)
- options |= XCB_PRESENT_OPTION_ASYNC;
-
- back->busy = 1;
- back->last_swap = priv->send_sbc;
- xcb_present_pixmap(c,
- priv->base.xDrawable,
- back->pixmap,
- (uint32_t) priv->send_sbc,
- 0, /* valid */
- 0, /* update */
- 0, /* x_off */
- 0, /* y_off */
- None, /* target_crtc */
- None,
- back->sync_fence,
- options,
- target_msc,
- divisor,
- remainder, 0, NULL);
- ret = (int64_t) priv->send_sbc;
-
- /* If there's a fake front, then copy the source back buffer
- * to the fake front to keep it up to date. This needs
- * to reset the fence and make future users block until
- * the X server is done copying the bits
- */
- if (priv->have_fake_front && !psc->is_different_gpu) {
- dri3_fence_reset(c, priv->buffers[DRI3_FRONT_ID]);
- dri3_copy_area(c,
- back->pixmap,
- priv->buffers[DRI3_FRONT_ID]->pixmap,
- dri3_drawable_gc(priv),
- 0, 0, 0, 0, priv->width, priv->height);
- dri3_fence_trigger(c, priv->buffers[DRI3_FRONT_ID]);
- }
- xcb_flush(c);
- if (priv->stamp)
- ++(*priv->stamp);
- }
- (*psc->f->invalidate)(priv->driDrawable);
-
- return ret;
+ return loader_dri3_swap_buffers_msc(&priv->loader_drawable,
+ target_msc, divisor, remainder,
+ flags, false);
}
static int
dri3_get_buffer_age(__GLXDRIdrawable *pdraw)
{
- xcb_connection_t *c = XGetXCBConnection(pdraw->psc->dpy);
- struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
- int back_id = DRI3_BACK_ID(dri3_find_back(c, priv));
-
- if (back_id < 0 || !priv->buffers[back_id])
- return 0;
-
- if (priv->buffers[back_id]->last_swap != 0)
- return priv->send_sbc - priv->buffers[back_id]->last_swap + 1;
- else
- return 0;
-}
-
-/** dri3_open
- *
- * Wrapper around xcb_dri3_open
- */
-static int
-dri3_open(Display *dpy,
- Window root,
- CARD32 provider)
-{
- xcb_dri3_open_cookie_t cookie;
- xcb_dri3_open_reply_t *reply;
- xcb_connection_t *c = XGetXCBConnection(dpy);
- int fd;
-
- cookie = xcb_dri3_open(c,
- root,
- provider);
-
- reply = xcb_dri3_open_reply(c, cookie, NULL);
- if (!reply)
- return -1;
-
- if (reply->nfd != 1) {
- free(reply);
- return -1;
- }
+ struct dri3_drawable *priv = (struct dri3_drawable *)pdraw;
- fd = xcb_dri3_open_reply_fds(c, reply)[0];
- fcntl(fd, F_SETFD, FD_CLOEXEC);
-
- free(reply);
-
- return fd;
+ return loader_dri3_query_buffer_age(&priv->loader_drawable);
}
-
/** dri3_destroy_screen
*/
static void
@@ -1727,8 +564,7 @@ dri3_set_swap_interval(__GLXDRIdrawable *pdraw, int interval)
break;
}
- priv->swap_interval = interval;
- dri3_update_num_back(priv);
+ loader_dri3_set_swap_interval(&priv->loader_drawable, interval);
return 0;
}
@@ -1759,14 +595,14 @@ dri3_bind_tex_image(Display * dpy,
if (pdraw != NULL) {
psc = (struct dri3_screen *) base->psc;
- (*psc->f->invalidate)(pdraw->driDrawable);
+ (*psc->f->invalidate)(pdraw->loader_drawable.dri_drawable);
XSync(dpy, false);
(*psc->texBuffer->setTexBuffer2) (pcp->driContext,
pdraw->base.textureTarget,
pdraw->base.textureFormat,
- pdraw->driDrawable);
+ pdraw->loader_drawable.dri_drawable);
}
}
@@ -1786,7 +622,7 @@ dri3_release_tex_image(Display * dpy, GLXDrawable drawable, int buffer)
psc->texBuffer->releaseTexBuffer != NULL)
(*psc->texBuffer->releaseTexBuffer) (pcp->driContext,
pdraw->base.textureTarget,
- pdraw->driDrawable);
+ pdraw->loader_drawable.dri_drawable);
}
}
@@ -1908,7 +744,7 @@ dri3_create_screen(int screen, struct glx_display * priv)
return NULL;
}
- psc->fd = dri3_open(priv->dpy, RootWindow(priv->dpy, screen), None);
+ psc->fd = loader_dri3_open(c, RootWindow(priv->dpy, screen), None);
if (psc->fd < 0) {
int conn_error = xcb_connection_has_error(c);
@@ -2000,6 +836,13 @@ dri3_create_screen(int screen, struct glx_display * priv)
goto handle_error;
}
+ psc->loader_dri3_ext.core = psc->core;
+ psc->loader_dri3_ext.image_driver = psc->image_driver;
+ psc->loader_dri3_ext.flush = psc->f;
+ psc->loader_dri3_ext.tex_buffer = psc->texBuffer;
+ psc->loader_dri3_ext.image = psc->image;
+ psc->loader_dri3_ext.config = psc->config;
+
configs = driConvertConfigs(psc->core, psc->base.configs, driver_configs);
visuals = driConvertConfigs(psc->core, psc->base.visuals, driver_configs);
diff --git a/src/glx/dri3_priv.h b/src/glx/dri3_priv.h
index 160444907e6..56a63309f36 100644
--- a/src/glx/dri3_priv.h
+++ b/src/glx/dri3_priv.h
@@ -59,50 +59,14 @@
#include <xcb/present.h>
#include <xcb/sync.h>
+#include "loader_dri3_helper.h"
+
/* From xmlpool/options.h, user exposed so should be stable */
#define DRI_CONF_VBLANK_NEVER 0
#define DRI_CONF_VBLANK_DEF_INTERVAL_0 1
#define DRI_CONF_VBLANK_DEF_INTERVAL_1 2
#define DRI_CONF_VBLANK_ALWAYS_SYNC 3
-enum dri3_buffer_type {
- dri3_buffer_back = 0,
- dri3_buffer_front = 1
-};
-
-struct dri3_buffer {
- __DRIimage *image;
- __DRIimage *linear_buffer;
- uint32_t pixmap;
-
- /* Synchronization between the client and X server is done using an
- * xshmfence that is mapped into an X server SyncFence. This lets the
- * client check whether the X server is done using a buffer with a simple
- * xshmfence call, rather than going to read X events from the wire.
- *
- * However, we can only wait for one xshmfence to be triggered at a time,
- * so we need to know *which* buffer is going to be idle next. We do that
- * by waiting for a PresentIdleNotify event. When that event arrives, the
- * 'busy' flag gets cleared and the client knows that the fence has been
- * triggered, and that the wait call will not block.
- */
-
- uint32_t sync_fence; /* XID of X SyncFence object */
- struct xshmfence *shm_fence; /* pointer to xshmfence object */
- GLboolean busy; /* Set on swap, cleared on IdleNotify */
- GLboolean own_pixmap; /* We allocated the pixmap ID, free on destroy */
- void *driverPrivate;
-
- uint32_t size;
- uint32_t pitch;
- uint32_t cpp;
- uint32_t flags;
- uint32_t width, height;
- uint64_t last_swap;
-
- enum dri3_buffer_type buffer_type;
-};
-
struct dri3_display
{
__GLXDRIdisplay base;
@@ -139,6 +103,8 @@ struct dri3_screen {
int is_different_gpu;
int show_fps_interval;
+
+ struct loader_dri3_extensions loader_dri3_ext;
};
struct dri3_context
@@ -147,60 +113,10 @@ struct dri3_context
__DRIcontext *driContext;
};
-#define DRI3_MAX_BACK 4
-#define DRI3_BACK_ID(i) (i)
-#define DRI3_FRONT_ID (DRI3_MAX_BACK)
-
-static inline int
-dri3_pixmap_buf_id(enum dri3_buffer_type buffer_type)
-{
- if (buffer_type == dri3_buffer_back)
- return DRI3_BACK_ID(0);
- else
- return DRI3_FRONT_ID;
-}
-
-#define DRI3_NUM_BUFFERS (1 + DRI3_MAX_BACK)
-
struct dri3_drawable {
__GLXDRIdrawable base;
- __DRIdrawable *driDrawable;
- int width, height, depth;
+ struct loader_dri3_drawable loader_drawable;
int swap_interval;
- uint8_t have_back;
- uint8_t have_fake_front;
- uint8_t is_pixmap;
- uint8_t flipping;
-
- /* Present extension capabilities
- */
- uint32_t present_capabilities;
-
- /* SBC numbers are tracked by using the serial numbers
- * in the present request and complete events
- */
- uint64_t send_sbc;
- uint64_t recv_sbc;
-
- /* Last received UST/MSC values for pixmap present complete */
- uint64_t ust, msc;
-
- /* Last received UST/MSC values from present notify msc event */
- uint64_t notify_ust, notify_msc;
-
- /* Serial numbers for tracking wait_for_msc events */
- uint32_t send_msc_serial;
- uint32_t recv_msc_serial;
-
- struct dri3_buffer *buffers[DRI3_NUM_BUFFERS];
- int cur_back;
- int num_back;
-
- uint32_t *stamp;
-
- xcb_present_event_t eid;
- xcb_gcontext_t gc;
- xcb_special_event_t *special_event;
/* LIBGL_SHOW_FPS support */
uint64_t previous_ust;
diff --git a/src/loader/Makefile.am b/src/loader/Makefile.am
index 5190f7f8a46..9ca17540d54 100644
--- a/src/loader/Makefile.am
+++ b/src/loader/Makefile.am
@@ -25,13 +25,16 @@ EXTRA_DIST = SConscript
noinst_LTLIBRARIES = libloader.la
-libloader_la_CPPFLAGS = \
+AM_CPPFLAGS = \
$(DEFINES) \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src \
$(VISIBILITY_CFLAGS) \
+ $(XCB_DRI3_CFLAGS) \
+ $(LIBDRM_CFLAGS) \
$(LIBUDEV_CFLAGS)
+libloader_la_CPPFLAGS = $(AM_CPPFLAGS)
libloader_la_SOURCES = $(LOADER_C_FILES)
libloader_la_LIBADD =
@@ -49,9 +52,15 @@ libloader_la_CPPFLAGS += \
endif
if HAVE_LIBDRM
-libloader_la_CPPFLAGS += \
- $(LIBDRM_CFLAGS)
-
libloader_la_LIBADD += \
$(LIBDRM_LIBS)
endif
+
+if HAVE_DRI3
+noinst_LTLIBRARIES += libloader_dri3_helper.la
+
+libloader_dri3_helper_la_SOURCES = \
+ loader_dri3_helper.c \
+ loader_dri3_helper.h
+libloader_dri3_helper_la_LIBADD = $(XCB_DRI3_LIBS)
+endif
diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
new file mode 100644
index 00000000000..62bfe845c08
--- /dev/null
+++ b/src/loader/loader_dri3_helper.c
@@ -0,0 +1,1396 @@
+/*
+ * Copyright © 2013 Keith Packard
+ * Copyright © 2015 Boyan Ding
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <X11/xshmfence.h>
+#include <xcb/xcb.h>
+#include <xcb/dri3.h>
+#include <xcb/present.h>
+
+#include <X11/Xlib-xcb.h>
+
+#include "loader_dri3_helper.h"
+
+/* From xmlpool/options.h, user exposed so should be stable */
+#define DRI_CONF_VBLANK_NEVER 0
+#define DRI_CONF_VBLANK_DEF_INTERVAL_0 1
+#define DRI_CONF_VBLANK_DEF_INTERVAL_1 2
+#define DRI_CONF_VBLANK_ALWAYS_SYNC 3
+
+static inline void
+dri3_fence_reset(xcb_connection_t *c, struct loader_dri3_buffer *buffer)
+{
+ xshmfence_reset(buffer->shm_fence);
+}
+
+static inline void
+dri3_fence_set(struct loader_dri3_buffer *buffer)
+{
+ xshmfence_trigger(buffer->shm_fence);
+}
+
+static inline void
+dri3_fence_trigger(xcb_connection_t *c, struct loader_dri3_buffer *buffer)
+{
+ xcb_sync_trigger_fence(c, buffer->sync_fence);
+}
+
+static inline void
+dri3_fence_await(xcb_connection_t *c, struct loader_dri3_buffer *buffer)
+{
+ xcb_flush(c);
+ xshmfence_await(buffer->shm_fence);
+}
+
+static void
+dri3_update_num_back(struct loader_dri3_drawable *draw)
+{
+ draw->num_back = 1;
+ if (draw->flipping) {
+ if (!draw->is_pixmap &&
+ !(draw->present_capabilities & XCB_PRESENT_CAPABILITY_ASYNC))
+ draw->num_back++;
+ draw->num_back++;
+ }
+ if (draw->vtable->get_swap_interval(draw) == 0)
+ draw->num_back++;
+}
+
+void
+loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
+{
+ interval = draw->vtable->clamp_swap_interval(draw, interval);
+ draw->vtable->set_swap_interval(draw, interval);
+ dri3_update_num_back(draw);
+}
+
+/** dri3_free_render_buffer
+ *
+ * Free everything associated with one render buffer including pixmap, fence
+ * stuff and the driver image
+ */
+static void
+dri3_free_render_buffer(struct loader_dri3_drawable *draw,
+ struct loader_dri3_buffer *buffer)
+{
+ if (buffer->own_pixmap)
+ xcb_free_pixmap(draw->conn, buffer->pixmap);
+ xcb_sync_destroy_fence(draw->conn, buffer->sync_fence);
+ xshmfence_unmap_shm(buffer->shm_fence);
+ (draw->ext->image->destroyImage)(buffer->image);
+ if (buffer->linear_buffer)
+ (draw->ext->image->destroyImage)(buffer->linear_buffer);
+ free(buffer);
+}
+
+void
+loader_dri3_drawable_fini(struct loader_dri3_drawable *draw)
+{
+ int i;
+
+ (draw->ext->core->destroyDrawable)(draw->dri_drawable);
+
+ for (i = 0; i < LOADER_DRI3_NUM_BUFFERS; i++) {
+ if (draw->buffers[i])
+ dri3_free_render_buffer(draw, draw->buffers[i]);
+ }
+
+ if (draw->special_event)
+ xcb_unregister_for_special_event(draw->conn, draw->special_event);
+}
+
+int
+loader_dri3_drawable_init(xcb_connection_t *conn,
+ xcb_drawable_t drawable,
+ __DRIscreen *dri_screen,
+ bool is_different_gpu,
+ const __DRIconfig *dri_config,
+ struct loader_dri3_extensions *ext,
+ struct loader_dri3_vtable *vtable,
+ struct loader_dri3_drawable *draw)
+{
+ xcb_get_geometry_cookie_t cookie;
+ xcb_get_geometry_reply_t *reply;
+ xcb_generic_error_t *error;
+ GLint vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1;
+ int swap_interval;
+
+ draw->conn = conn;
+ draw->ext = ext;
+ draw->vtable = vtable;
+ draw->drawable = drawable;
+ draw->dri_screen = dri_screen;
+ draw->is_different_gpu = is_different_gpu;
+
+ draw->have_back = 0;
+ draw->have_fake_front = 0;
+ draw->first_init = true;
+
+ if (draw->ext->config)
+ draw->ext->config->configQueryi(draw->dri_screen,
+ "vblank_mode", &vblank_mode);
+
+ switch (vblank_mode) {
+ case DRI_CONF_VBLANK_NEVER:
+ case DRI_CONF_VBLANK_DEF_INTERVAL_0:
+ swap_interval = 0;
+ break;
+ case DRI_CONF_VBLANK_DEF_INTERVAL_1:
+ case DRI_CONF_VBLANK_ALWAYS_SYNC:
+ default:
+ swap_interval = 1;
+ break;
+ }
+ draw->vtable->set_swap_interval(draw, swap_interval);
+
+ dri3_update_num_back(draw);
+
+ /* Create a new drawable */
+ draw->dri_drawable =
+ (draw->ext->image_driver->createNewDrawable)(dri_screen,
+ dri_config,
+ draw);
+
+ if (!draw->dri_drawable)
+ return 1;
+
+ cookie = xcb_get_geometry(draw->conn, draw->drawable);
+ reply = xcb_get_geometry_reply(draw->conn, cookie, &error);
+ if (reply == NULL || error != NULL) {
+ draw->ext->core->destroyDrawable(draw->dri_drawable);
+ return 1;
+ }
+
+ draw->width = reply->width;
+ draw->height = reply->height;
+ draw->depth = reply->depth;
+ draw->vtable->set_drawable_size(draw, draw->width, draw->height);
+ free(reply);
+
+ /*
+ * Make sure server has the same swap interval we do for the new
+ * drawable.
+ */
+ loader_dri3_set_swap_interval(draw, swap_interval);
+
+ return 0;
+}
+
+/*
+ * Process one Present event
+ */
+static void
+dri3_handle_present_event(struct loader_dri3_drawable *draw,
+ xcb_present_generic_event_t *ge)
+{
+ switch (ge->evtype) {
+ case XCB_PRESENT_CONFIGURE_NOTIFY: {
+ xcb_present_configure_notify_event_t *ce = (void *) ge;
+
+ draw->width = ce->width;
+ draw->height = ce->height;
+ draw->vtable->set_drawable_size(draw, draw->width, draw->height);
+ break;
+ }
+ case XCB_PRESENT_COMPLETE_NOTIFY: {
+ xcb_present_complete_notify_event_t *ce = (void *) ge;
+
+ /* Compute the processed SBC number from the received 32-bit serial number
+ * merged with the upper 32-bits of the sent 64-bit serial number while
+ * checking for wrap.
+ */
+ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) {
+ draw->recv_sbc = (draw->send_sbc & 0xffffffff00000000LL) | ce->serial;
+ if (draw->recv_sbc > draw->send_sbc)
+ draw->recv_sbc -= 0x100000000;
+ switch (ce->mode) {
+ case XCB_PRESENT_COMPLETE_MODE_FLIP:
+ draw->flipping = true;
+ break;
+ case XCB_PRESENT_COMPLETE_MODE_COPY:
+ draw->flipping = false;
+ break;
+ }
+ dri3_update_num_back(draw);
+
+ if (draw->vtable->show_fps)
+ draw->vtable->show_fps(draw, ce->ust);
+
+ draw->ust = ce->ust;
+ draw->msc = ce->msc;
+ } else {
+ draw->recv_msc_serial = ce->serial;
+ draw->notify_ust = ce->ust;
+ draw->notify_msc = ce->msc;
+ }
+ break;
+ }
+ case XCB_PRESENT_EVENT_IDLE_NOTIFY: {
+ xcb_present_idle_notify_event_t *ie = (void *) ge;
+ int b;
+
+ for (b = 0; b < sizeof(draw->buffers) / sizeof(draw->buffers[0]); b++) {
+ struct loader_dri3_buffer *buf = draw->buffers[b];
+
+ if (buf && buf->pixmap == ie->pixmap) {
+ buf->busy = 0;
+ if (draw->num_back <= b && b < LOADER_DRI3_MAX_BACK) {
+ dri3_free_render_buffer(draw, buf);
+ draw->buffers[b] = NULL;
+ }
+ break;
+ }
+ }
+ break;
+ }
+ }
+ free(ge);
+}
+
+static bool
+dri3_wait_for_event(struct loader_dri3_drawable *draw)
+{
+ xcb_generic_event_t *ev;
+ xcb_present_generic_event_t *ge;
+
+ xcb_flush(draw->conn);
+ ev = xcb_wait_for_special_event(draw->conn, draw->special_event);
+ if (!ev)
+ return false;
+ ge = (void *) ev;
+ dri3_handle_present_event(draw, ge);
+ return true;
+}
+
+/** loader_dri3_wait_for_msc
+ *
+ * Get the X server to send an event when the target msc/divisor/remainder is
+ * reached.
+ */
+bool
+loader_dri3_wait_for_msc(struct loader_dri3_drawable *draw,
+ int64_t target_msc,
+ int64_t divisor, int64_t remainder,
+ int64_t *ust, int64_t *msc, int64_t *sbc)
+{
+ uint32_t msc_serial;
+
+ msc_serial = ++draw->send_msc_serial;
+ xcb_present_notify_msc(draw->conn,
+ draw->drawable,
+ msc_serial,
+ target_msc,
+ divisor,
+ remainder);
+
+ xcb_flush(draw->conn);
+
+ /* Wait for the event */
+ if (draw->special_event) {
+ while ((int32_t) (msc_serial - draw->recv_msc_serial) > 0) {
+ if (!dri3_wait_for_event(draw))
+ return false;
+ }
+ }
+
+ *ust = draw->notify_ust;
+ *msc = draw->notify_msc;
+ *sbc = draw->recv_sbc;
+
+ return true;
+}
+
+/** loader_dri3_wait_for_sbc
+ *
+ * Wait for the completed swap buffer count to reach the specified
+ * target. Presumably the application knows that this will be reached with
+ * outstanding complete events, or we're going to be here awhile.
+ */
+int
+loader_dri3_wait_for_sbc(struct loader_dri3_drawable *draw,
+ int64_t target_sbc, int64_t *ust,
+ int64_t *msc, int64_t *sbc)
+{
+ /* From the GLX_OML_sync_control spec:
+ *
+ * "If <target_sbc> = 0, the function will block until all previous
+ * swaps requested with glXSwapBuffersMscOML for that window have
+ * completed."
+ */
+ if (!target_sbc)
+ target_sbc = draw->send_sbc;
+
+ while (draw->recv_sbc < target_sbc) {
+ if (!dri3_wait_for_event(draw))
+ return 0;
+ }
+
+ *ust = draw->ust;
+ *msc = draw->msc;
+ *sbc = draw->recv_sbc;
+ return 1;
+}
+
+/** loader_dri3_find_back
+ *
+ * Find an idle back buffer. If there isn't one, then
+ * wait for a present idle notify event from the X server
+ */
+static int
+dri3_find_back(struct loader_dri3_drawable *draw)
+{
+ int b;
+ xcb_generic_event_t *ev;
+ xcb_present_generic_event_t *ge;
+
+ for (;;) {
+ for (b = 0; b < draw->num_back; b++) {
+ int id = LOADER_DRI3_BACK_ID((b + draw->cur_back) % draw->num_back);
+ struct loader_dri3_buffer *buffer = draw->buffers[id];
+
+ if (!buffer || !buffer->busy) {
+ draw->cur_back = id;
+ return id;
+ }
+ }
+ xcb_flush(draw->conn);
+ ev = xcb_wait_for_special_event(draw->conn, draw->special_event);
+ if (!ev)
+ return -1;
+ ge = (void *) ev;
+ dri3_handle_present_event(draw, ge);
+ }
+}
+
+static xcb_gcontext_t
+dri3_drawable_gc(struct loader_dri3_drawable *draw)
+{
+ if (!draw->gc) {
+ uint32_t v = 0;
+ xcb_create_gc(draw->conn,
+ (draw->gc = xcb_generate_id(draw->conn)),
+ draw->drawable,
+ XCB_GC_GRAPHICS_EXPOSURES,
+ &v);
+ }
+ return draw->gc;
+}
+
+
+static struct loader_dri3_buffer *
+dri3_back_buffer(struct loader_dri3_drawable *draw)
+{
+ return draw->buffers[LOADER_DRI3_BACK_ID(draw->cur_back)];
+}
+
+static struct loader_dri3_buffer *
+dri3_fake_front_buffer(struct loader_dri3_drawable *draw)
+{
+ return draw->buffers[LOADER_DRI3_FRONT_ID];
+}
+
+static void
+dri3_copy_area(xcb_connection_t *c,
+ xcb_drawable_t src_drawable,
+ xcb_drawable_t dst_drawable,
+ xcb_gcontext_t gc,
+ int16_t src_x,
+ int16_t src_y,
+ int16_t dst_x,
+ int16_t dst_y,
+ uint16_t width,
+ uint16_t height)
+{
+ xcb_void_cookie_t cookie;
+
+ cookie = xcb_copy_area_checked(c,
+ src_drawable,
+ dst_drawable,
+ gc,
+ src_x,
+ src_y,
+ dst_x,
+ dst_y,
+ width,
+ height);
+ xcb_discard_reply(c, cookie.sequence);
+}
+
+/**
+ * Asks the driver to flush any queued work necessary for serializing with the
+ * X command stream, and optionally the slightly more strict requirement of
+ * glFlush() equivalence (which would require flushing even if nothing had
+ * been drawn to a window system framebuffer, for example).
+ */
+void
+loader_dri3_flush(struct loader_dri3_drawable *draw,
+ unsigned flags,
+ enum __DRI2throttleReason throttle_reason)
+{
+ /* NEED TO CHECK WHETHER CONTEXT IS NULL */
+ __DRIcontext *dri_context = draw->vtable->get_dri_context(draw);
+
+ if (dri_context) {
+ draw->ext->flush->flush_with_flags(dri_context, draw->dri_drawable,
+ flags, throttle_reason);
+ }
+}
+
+void
+loader_dri3_copy_sub_buffer(struct loader_dri3_drawable *draw,
+ int x, int y,
+ int width, int height,
+ bool flush)
+{
+ struct loader_dri3_buffer *back;
+ unsigned flags = __DRI2_FLUSH_DRAWABLE;
+ __DRIcontext *dri_context;
+
+ dri_context = draw->vtable->get_dri_context(draw);
+
+ /* Check we have the right attachments */
+ if (!draw->have_back || draw->is_pixmap)
+ return;
+
+ if (flush)
+ flags |= __DRI2_FLUSH_CONTEXT;
+ loader_dri3_flush(draw, flags, __DRI2_THROTTLE_SWAPBUFFER);
+
+ back = dri3_back_buffer(draw);
+ y = draw->height - y - height;
+
+ if (draw->is_different_gpu && draw->vtable->in_current_context(draw)) {
+ /* Update the linear buffer part of the back buffer
+ * for the dri3_copy_area operation
+ */
+ draw->ext->image->blitImage(dri_context,
+ back->linear_buffer,
+ back->image,
+ 0, 0, back->width,
+ back->height,
+ 0, 0, back->width,
+ back->height, __BLIT_FLAG_FLUSH);
+ /* We use blitImage to update our fake front,
+ */
+ if (draw->have_fake_front)
+ draw->ext->image->blitImage(dri_context,
+ dri3_fake_front_buffer(draw)->image,
+ back->image,
+ x, y, width, height,
+ x, y, width, height, __BLIT_FLAG_FLUSH);
+ }
+
+ dri3_fence_reset(draw->conn, back);
+ dri3_copy_area(draw->conn,
+ dri3_back_buffer(draw)->pixmap,
+ draw->drawable,
+ dri3_drawable_gc(draw),
+ x, y, x, y, width, height);
+ dri3_fence_trigger(draw->conn, back);
+ /* Refresh the fake front (if present) after we just damaged the real
+ * front.
+ */
+ if (draw->have_fake_front && !draw->is_different_gpu) {
+ dri3_fence_reset(draw->conn, dri3_fake_front_buffer(draw));
+ dri3_copy_area(draw->conn,
+ dri3_back_buffer(draw)->pixmap,
+ dri3_fake_front_buffer(draw)->pixmap,
+ dri3_drawable_gc(draw),
+ x, y, x, y, width, height);
+ dri3_fence_trigger(draw->conn, dri3_fake_front_buffer(draw));
+ dri3_fence_await(draw->conn, dri3_fake_front_buffer(draw));
+ }
+ dri3_fence_await(draw->conn, back);
+}
+
+void
+loader_dri3_copy_drawable(struct loader_dri3_drawable *draw,
+ xcb_drawable_t dest,
+ xcb_drawable_t src)
+{
+ loader_dri3_flush(draw, __DRI2_FLUSH_DRAWABLE, 0);
+
+ dri3_fence_reset(draw->conn, dri3_fake_front_buffer(draw));
+ dri3_copy_area(draw->conn,
+ src, dest,
+ dri3_drawable_gc(draw),
+ 0, 0, 0, 0, draw->width, draw->height);
+ dri3_fence_trigger(draw->conn, dri3_fake_front_buffer(draw));
+ dri3_fence_await(draw->conn, dri3_fake_front_buffer(draw));
+}
+
+void
+loader_dri3_wait_x(struct loader_dri3_drawable *draw)
+{
+ struct loader_dri3_buffer *front;
+ __DRIcontext *dri_context;
+
+ if (draw == NULL || !draw->have_fake_front)
+ return;
+
+ front = dri3_fake_front_buffer(draw);
+ dri_context = draw->vtable->get_dri_context(draw);
+
+ loader_dri3_copy_drawable(draw, front->pixmap, draw->drawable);
+
+ /* In the psc->is_different_gpu case, the linear buffer has been updated,
+ * but not yet the tiled buffer.
+ * Copy back to the tiled buffer we use for rendering.
+ * Note that we don't need flushing.
+ */
+ if (draw->is_different_gpu && draw->vtable->in_current_context(draw))
+ draw->ext->image->blitImage(dri_context,
+ front->image,
+ front->linear_buffer,
+ 0, 0, front->width,
+ front->height,
+ 0, 0, front->width,
+ front->height, 0);
+}
+
+void
+loader_dri3_wait_gl(struct loader_dri3_drawable *draw)
+{
+ struct loader_dri3_buffer *front;
+ __DRIcontext *dri_context;
+
+ if (draw == NULL || !draw->have_fake_front)
+ return;
+
+ front = dri3_fake_front_buffer(draw);
+ dri_context = draw->vtable->get_dri_context(draw);
+
+ /* In the psc->is_different_gpu case, we update the linear_buffer
+ * before updating the real front.
+ */
+ if (draw->is_different_gpu && draw->vtable->in_current_context(draw))
+ draw->ext->image->blitImage(dri_context,
+ front->linear_buffer,
+ front->image,
+ 0, 0, front->width,
+ front->height,
+ 0, 0, front->width,
+ front->height, __BLIT_FLAG_FLUSH);
+ loader_dri3_copy_drawable(draw, draw->drawable, front->pixmap);
+}
+
+/** dri3_flush_present_events
+ *
+ * Process any present events that have been received from the X server
+ */
+static void
+dri3_flush_present_events(struct loader_dri3_drawable *draw)
+{
+ /* Check to see if any configuration changes have occurred
+ * since we were last invoked
+ */
+ if (draw->special_event) {
+ xcb_generic_event_t *ev;
+
+ while ((ev = xcb_poll_for_special_event(draw->conn,
+ draw->special_event)) != NULL) {
+ xcb_present_generic_event_t *ge = (void *) ev;
+ dri3_handle_present_event(draw, ge);
+ }
+ }
+}
+
+/** loader_dri3_swap_buffers_msc
+ *
+ * Make the current back buffer visible using the present extension
+ */
+int64_t
+loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw,
+ int64_t target_msc, int64_t divisor,
+ int64_t remainder, unsigned flush_flags,
+ bool force_copy)
+{
+ struct loader_dri3_buffer *back;
+ __DRIcontext *dri_context;
+ int64_t ret = 0;
+ uint32_t options = XCB_PRESENT_OPTION_NONE;
+ int swap_interval;
+
+ dri_context = draw->vtable->get_dri_context(draw);
+ swap_interval = draw->vtable->get_swap_interval(draw);
+
+ draw->vtable->flush_drawable(draw, flush_flags);
+
+ back = draw->buffers[LOADER_DRI3_BACK_ID(draw->cur_back)];
+ if (draw->is_different_gpu && back) {
+ /* Update the linear buffer before presenting the pixmap */
+ draw->ext->image->blitImage(dri_context,
+ back->linear_buffer,
+ back->image,
+ 0, 0, back->width,
+ back->height,
+ 0, 0, back->width,
+ back->height, __BLIT_FLAG_FLUSH);
+ /* Update the fake front */
+ if (draw->have_fake_front)
+ draw->ext->image->blitImage(dri_context,
+ draw->buffers[LOADER_DRI3_FRONT_ID]->image,
+ back->image,
+ 0, 0, draw->width, draw->height,
+ 0, 0, draw->width, draw->height,
+ __BLIT_FLAG_FLUSH);
+ }
+
+ dri3_flush_present_events(draw);
+
+ if (back && !draw->is_pixmap) {
+ dri3_fence_reset(draw->conn, back);
+
+ /* Compute when we want the frame shown by taking the last known
+ * successful MSC and adding in a swap interval for each outstanding swap
+ * request. target_msc=divisor=remainder=0 means "Use glXSwapBuffers()
+ * semantic"
+ */
+ ++draw->send_sbc;
+ if (target_msc == 0 && divisor == 0 && remainder == 0)
+ target_msc = draw->msc + swap_interval *
+ (draw->send_sbc - draw->recv_sbc);
+ else if (divisor == 0 && remainder > 0) {
+ /* From the GLX_OML_sync_control spec:
+ * "If <divisor> = 0, the swap will occur when MSC becomes
+ * greater than or equal to <target_msc>."
+ *
+ * Note that there's no mention of the remainder. The Present
+ * extension throws BadValue for remainder != 0 with divisor == 0, so
+ * just drop the passed in value.
+ */
+ remainder = 0;
+ }
+
+ /* From the GLX_EXT_swap_control spec
+ * and the EGL 1.4 spec (page 53):
+ *
+ * "If <interval> is set to a value of 0, buffer swaps are not
+ * synchronized to a video frame."
+ *
+ * Implementation note: It is possible to enable triple buffering
+ * behaviour by not using XCB_PRESENT_OPTION_ASYNC, but this should not be
+ * the default.
+ */
+ if (swap_interval == 0)
+ options |= XCB_PRESENT_OPTION_ASYNC;
+ if (force_copy)
+ options |= XCB_PRESENT_OPTION_COPY;
+
+ back->busy = 1;
+ back->last_swap = draw->send_sbc;
+ xcb_present_pixmap(draw->conn,
+ draw->drawable,
+ back->pixmap,
+ (uint32_t) draw->send_sbc,
+ 0, /* valid */
+ 0, /* update */
+ 0, /* x_off */
+ 0, /* y_off */
+ None, /* target_crtc */
+ None,
+ back->sync_fence,
+ options,
+ target_msc,
+ divisor,
+ remainder, 0, NULL);
+ ret = (int64_t) draw->send_sbc;
+
+ /* If there's a fake front, then copy the source back buffer
+ * to the fake front to keep it up to date. This needs
+ * to reset the fence and make future users block until
+ * the X server is done copying the bits
+ */
+ if (draw->have_fake_front && !draw->is_different_gpu) {
+ dri3_fence_reset(draw->conn, draw->buffers[LOADER_DRI3_FRONT_ID]);
+ dri3_copy_area(draw->conn,
+ back->pixmap,
+ draw->buffers[LOADER_DRI3_FRONT_ID]->pixmap,
+ dri3_drawable_gc(draw),
+ 0, 0, 0, 0,
+ draw->width, draw->height);
+ dri3_fence_trigger(draw->conn, draw->buffers[LOADER_DRI3_FRONT_ID]);
+ }
+ xcb_flush(draw->conn);
+ if (draw->stamp)
+ ++(*draw->stamp);
+ }
+
+ (draw->ext->flush->invalidate)(draw->dri_drawable);
+
+ return ret;
+}
+
+int
+loader_dri3_query_buffer_age(struct loader_dri3_drawable *draw)
+{
+ int back_id = LOADER_DRI3_BACK_ID(dri3_find_back(draw));
+
+ if (back_id < 0 || !draw->buffers[back_id])
+ return 0;
+
+ if (draw->buffers[back_id]->last_swap != 0)
+ return draw->send_sbc - draw->buffers[back_id]->last_swap + 1;
+ else
+ return 0;
+}
+
+/** loader_dri3_open
+ *
+ * Wrapper around xcb_dri3_open
+ */
+int
+loader_dri3_open(xcb_connection_t *conn,
+ xcb_window_t root,
+ uint32_t provider)
+{
+ xcb_dri3_open_cookie_t cookie;
+ xcb_dri3_open_reply_t *reply;
+ int fd;
+
+ cookie = xcb_dri3_open(conn,
+ root,
+ provider);
+
+ reply = xcb_dri3_open_reply(conn, cookie, NULL);
+ if (!reply)
+ return -1;
+
+ if (reply->nfd != 1) {
+ free(reply);
+ return -1;
+ }
+
+ fd = xcb_dri3_open_reply_fds(conn, reply)[0];
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+
+ return fd;
+}
+
+static uint32_t
+dri3_cpp_for_format(uint32_t format) {
+ switch (format) {
+ case __DRI_IMAGE_FORMAT_R8:
+ return 1;
+ case __DRI_IMAGE_FORMAT_RGB565:
+ case __DRI_IMAGE_FORMAT_GR88:
+ return 2;
+ case __DRI_IMAGE_FORMAT_XRGB8888:
+ case __DRI_IMAGE_FORMAT_ARGB8888:
+ case __DRI_IMAGE_FORMAT_ABGR8888:
+ case __DRI_IMAGE_FORMAT_XBGR8888:
+ case __DRI_IMAGE_FORMAT_XRGB2101010:
+ case __DRI_IMAGE_FORMAT_ARGB2101010:
+ case __DRI_IMAGE_FORMAT_SARGB8:
+ return 4;
+ case __DRI_IMAGE_FORMAT_NONE:
+ default:
+ return 0;
+ }
+}
+
+/** loader_dri3_alloc_render_buffer
+ *
+ * Use the driver createImage function to construct a __DRIimage, then
+ * get a file descriptor for that and create an X pixmap from that
+ *
+ * Allocate an xshmfence for synchronization
+ */
+static struct loader_dri3_buffer *
+dri3_alloc_render_buffer(struct loader_dri3_drawable *draw, unsigned int format,
+ int width, int height, int depth)
+{
+ struct loader_dri3_buffer *buffer;
+ __DRIimage *pixmap_buffer;
+ xcb_pixmap_t pixmap;
+ xcb_sync_fence_t sync_fence;
+ struct xshmfence *shm_fence;
+ int buffer_fd, fence_fd;
+ int stride;
+
+ /* Create an xshmfence object and
+ * prepare to send that to the X server
+ */
+
+ fence_fd = xshmfence_alloc_shm();
+ if (fence_fd < 0)
+ return NULL;
+
+ shm_fence = xshmfence_map_shm(fence_fd);
+ if (shm_fence == NULL)
+ goto no_shm_fence;
+
+ /* Allocate the image from the driver
+ */
+ buffer = calloc(1, sizeof *buffer);
+ if (!buffer)
+ goto no_buffer;
+
+ buffer->cpp = dri3_cpp_for_format(format);
+ if (!buffer->cpp)
+ goto no_image;
+
+ if (!draw->is_different_gpu) {
+ buffer->image = (draw->ext->image->createImage)(draw->dri_screen,
+ width, height,
+ format,
+ __DRI_IMAGE_USE_SHARE |
+ __DRI_IMAGE_USE_SCANOUT,
+ buffer);
+ pixmap_buffer = buffer->image;
+
+ if (!buffer->image)
+ goto no_image;
+ } else {
+ buffer->image = (draw->ext->image->createImage)(draw->dri_screen,
+ width, height,
+ format,
+ 0,
+ buffer);
+
+ if (!buffer->image)
+ goto no_image;
+
+ buffer->linear_buffer =
+ (draw->ext->image->createImage)(draw->dri_screen,
+ width, height, format,
+ __DRI_IMAGE_USE_SHARE |
+ __DRI_IMAGE_USE_LINEAR,
+ buffer);
+ pixmap_buffer = buffer->linear_buffer;
+
+ if (!buffer->linear_buffer)
+ goto no_linear_buffer;
+ }
+
+ /* X wants the stride, so ask the image for it
+ */
+ if (!(draw->ext->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_STRIDE,
+ &stride))
+ goto no_buffer_attrib;
+
+ buffer->pitch = stride;
+
+ if (!(draw->ext->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_FD,
+ &buffer_fd))
+ goto no_buffer_attrib;
+
+ xcb_dri3_pixmap_from_buffer(draw->conn,
+ (pixmap = xcb_generate_id(draw->conn)),
+ draw->drawable,
+ buffer->size,
+ width, height, buffer->pitch,
+ depth, buffer->cpp * 8,
+ buffer_fd);
+
+ xcb_dri3_fence_from_fd(draw->conn,
+ pixmap,
+ (sync_fence = xcb_generate_id(draw->conn)),
+ false,
+ fence_fd);
+
+ buffer->pixmap = pixmap;
+ buffer->own_pixmap = true;
+ buffer->sync_fence = sync_fence;
+ buffer->shm_fence = shm_fence;
+ buffer->width = width;
+ buffer->height = height;
+
+ /* Mark the buffer as idle
+ */
+ dri3_fence_set(buffer);
+
+ return buffer;
+
+no_buffer_attrib:
+ (draw->ext->image->destroyImage)(pixmap_buffer);
+no_linear_buffer:
+ if (draw->is_different_gpu)
+ (draw->ext->image->destroyImage)(buffer->image);
+no_image:
+ free(buffer);
+no_buffer:
+ xshmfence_unmap_shm(shm_fence);
+no_shm_fence:
+ close(fence_fd);
+ return NULL;
+}
+
+/** loader_dri3_update_drawable
+ *
+ * Called the first time we use the drawable and then
+ * after we receive present configure notify events to
+ * track the geometry of the drawable
+ */
+static int
+dri3_update_drawable(__DRIdrawable *driDrawable,
+ struct loader_dri3_drawable *draw)
+{
+ if (draw->first_init) {
+ xcb_get_geometry_cookie_t geom_cookie;
+ xcb_get_geometry_reply_t *geom_reply;
+ xcb_void_cookie_t cookie;
+ xcb_generic_error_t *error;
+ xcb_present_query_capabilities_cookie_t present_capabilities_cookie;
+ xcb_present_query_capabilities_reply_t *present_capabilities_reply;
+
+ draw->first_init = false;
+
+ /* Try to select for input on the window.
+ *
+ * If the drawable is a window, this will get our events
+ * delivered.
+ *
+ * Otherwise, we'll get a BadWindow error back from this request which
+ * will let us know that the drawable is a pixmap instead.
+ */
+
+ draw->eid = xcb_generate_id(draw->conn);
+ cookie =
+ xcb_present_select_input_checked(draw->conn, draw->eid, draw->drawable,
+ XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY |
+ XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY |
+ XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY);
+
+ present_capabilities_cookie =
+ xcb_present_query_capabilities(draw->conn, draw->drawable);
+
+ /* Create an XCB event queue to hold present events outside of the usual
+ * application event queue
+ */
+ draw->special_event = xcb_register_for_special_xge(draw->conn,
+ &xcb_present_id,
+ draw->eid,
+ draw->stamp);
+ geom_cookie = xcb_get_geometry(draw->conn, draw->drawable);
+
+ geom_reply = xcb_get_geometry_reply(draw->conn, geom_cookie, NULL);
+
+ if (!geom_reply)
+ return false;
+
+ draw->width = geom_reply->width;
+ draw->height = geom_reply->height;
+ draw->depth = geom_reply->depth;
+ draw->vtable->set_drawable_size(draw, draw->width, draw->height);
+
+ free(geom_reply);
+
+ draw->is_pixmap = false;
+
+ /* Check to see if our select input call failed. If it failed with a
+ * BadWindow error, then assume the drawable is a pixmap. Destroy the
+ * special event queue created above and mark the drawable as a pixmap
+ */
+
+ error = xcb_request_check(draw->conn, cookie);
+
+ present_capabilities_reply =
+ xcb_present_query_capabilities_reply(draw->conn,
+ present_capabilities_cookie,
+ NULL);
+
+ if (present_capabilities_reply) {
+ draw->present_capabilities = present_capabilities_reply->capabilities;
+ free(present_capabilities_reply);
+ } else
+ draw->present_capabilities = 0;
+
+ if (error) {
+ if (error->error_code != BadWindow) {
+ free(error);
+ return false;
+ }
+ draw->is_pixmap = true;
+ xcb_unregister_for_special_event(draw->conn, draw->special_event);
+ draw->special_event = NULL;
+ }
+ }
+ dri3_flush_present_events(draw);
+ return true;
+}
+
+/* the DRIimage createImage function takes __DRI_IMAGE_FORMAT codes, while
+ * the createImageFromFds call takes __DRI_IMAGE_FOURCC codes. To avoid
+ * complete confusion, just deal in __DRI_IMAGE_FORMAT codes for now and
+ * translate to __DRI_IMAGE_FOURCC codes in the call to createImageFromFds
+ */
+static int
+image_format_to_fourcc(int format)
+{
+
+ /* Convert from __DRI_IMAGE_FORMAT to __DRI_IMAGE_FOURCC (sigh) */
+ switch (format) {
+ case __DRI_IMAGE_FORMAT_SARGB8: return __DRI_IMAGE_FOURCC_SARGB8888;
+ case __DRI_IMAGE_FORMAT_RGB565: return __DRI_IMAGE_FOURCC_RGB565;
+ case __DRI_IMAGE_FORMAT_XRGB8888: return __DRI_IMAGE_FOURCC_XRGB8888;
+ case __DRI_IMAGE_FORMAT_ARGB8888: return __DRI_IMAGE_FOURCC_ARGB8888;
+ case __DRI_IMAGE_FORMAT_ABGR8888: return __DRI_IMAGE_FOURCC_ABGR8888;
+ case __DRI_IMAGE_FORMAT_XBGR8888: return __DRI_IMAGE_FOURCC_XBGR8888;
+ }
+ return 0;
+}
+
+__DRIimage *
+loader_dri3_create_image(xcb_connection_t *c,
+ xcb_dri3_buffer_from_pixmap_reply_t *bp_reply,
+ unsigned int format,
+ __DRIscreen *dri_screen,
+ const __DRIimageExtension *image,
+ void *loaderPrivate)
+{
+ int *fds;
+ __DRIimage *image_planar, *ret;
+ int stride, offset;
+
+ /* Get an FD for the pixmap object
+ */
+ fds = xcb_dri3_buffer_from_pixmap_reply_fds(c, bp_reply);
+
+ stride = bp_reply->stride;
+ offset = 0;
+
+ /* createImageFromFds creates a wrapper __DRIimage structure which
+ * can deal with multiple planes for things like Yuv images. So, once
+ * we've gotten the planar wrapper, pull the single plane out of it and
+ * discard the wrapper.
+ */
+ image_planar = (image->createImageFromFds)(dri_screen,
+ bp_reply->width,
+ bp_reply->height,
+ image_format_to_fourcc(format),
+ fds, 1,
+ &stride, &offset, loaderPrivate);
+ close(fds[0]);
+ if (!image_planar)
+ return NULL;
+
+ ret = (image->fromPlanar)(image_planar, 0, loaderPrivate);
+
+ (image->destroyImage)(image_planar);
+
+ return ret;
+}
+
+/** dri3_get_pixmap_buffer
+ *
+ * Get the DRM object for a pixmap from the X server and
+ * wrap that with a __DRIimage structure using createImageFromFds
+ */
+static struct loader_dri3_buffer *
+dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, unsigned int format,
+ enum loader_dri3_buffer_type buffer_type,
+ struct loader_dri3_drawable *draw)
+{
+ int buf_id = loader_dri3_pixmap_buf_id(buffer_type);
+ struct loader_dri3_buffer *buffer = draw->buffers[buf_id];
+ xcb_drawable_t pixmap;
+ xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie;
+ xcb_dri3_buffer_from_pixmap_reply_t *bp_reply;
+ xcb_sync_fence_t sync_fence;
+ struct xshmfence *shm_fence;
+ int fence_fd;
+
+ if (buffer)
+ return buffer;
+
+ pixmap = draw->drawable;
+
+ buffer = calloc(1, sizeof *buffer);
+ if (!buffer)
+ goto no_buffer;
+
+ fence_fd = xshmfence_alloc_shm();
+ if (fence_fd < 0)
+ goto no_fence;
+ shm_fence = xshmfence_map_shm(fence_fd);
+ if (shm_fence == NULL) {
+ close (fence_fd);
+ goto no_fence;
+ }
+
+ xcb_dri3_fence_from_fd(draw->conn,
+ pixmap,
+ (sync_fence = xcb_generate_id(draw->conn)),
+ false,
+ fence_fd);
+
+ bp_cookie = xcb_dri3_buffer_from_pixmap(draw->conn, pixmap);
+ bp_reply = xcb_dri3_buffer_from_pixmap_reply(draw->conn, bp_cookie, NULL);
+ if (!bp_reply)
+ goto no_image;
+
+ buffer->image = loader_dri3_create_image(draw->conn, bp_reply, format,
+ draw->dri_screen, draw->ext->image,
+ buffer);
+ if (!buffer->image)
+ goto no_image;
+
+ buffer->pixmap = pixmap;
+ buffer->own_pixmap = false;
+ buffer->width = bp_reply->width;
+ buffer->height = bp_reply->height;
+ buffer->buffer_type = buffer_type;
+ buffer->shm_fence = shm_fence;
+ buffer->sync_fence = sync_fence;
+
+ draw->buffers[buf_id] = buffer;
+
+ free(bp_reply);
+
+ return buffer;
+
+no_image:
+ free(bp_reply);
+ xcb_sync_destroy_fence(draw->conn, sync_fence);
+ xshmfence_unmap_shm(shm_fence);
+no_fence:
+ free(buffer);
+no_buffer:
+ return NULL;
+}
+
+/** dri3_get_buffer
+ *
+ * Find a front or back buffer, allocating new ones as necessary
+ */
+static struct loader_dri3_buffer *
+dri3_get_buffer(__DRIdrawable *driDrawable,
+ unsigned int format,
+ enum loader_dri3_buffer_type buffer_type,
+ struct loader_dri3_drawable *draw)
+{
+ struct loader_dri3_buffer *buffer;
+ int buf_id;
+ __DRIcontext *dri_context;
+
+ dri_context = draw->vtable->get_dri_context(draw);
+
+ if (buffer_type == loader_dri3_buffer_back) {
+ buf_id = dri3_find_back(draw);
+
+ if (buf_id < 0)
+ return NULL;
+ } else {
+ buf_id = LOADER_DRI3_FRONT_ID;
+ }
+
+ buffer = draw->buffers[buf_id];
+
+ /* Allocate a new buffer if there isn't an old one, or if that
+ * old one is the wrong size
+ */
+ if (!buffer || buffer->width != draw->width ||
+ buffer->height != draw->height) {
+ struct loader_dri3_buffer *new_buffer;
+
+ /* Allocate the new buffers
+ */
+ new_buffer = dri3_alloc_render_buffer(draw,
+ format,
+ draw->width,
+ draw->height,
+ draw->depth);
+ if (!new_buffer)
+ return NULL;
+
+ /* When resizing, copy the contents of the old buffer, waiting for that
+ * copy to complete using our fences before proceeding
+ */
+ switch (buffer_type) {
+ case loader_dri3_buffer_back:
+ if (buffer) {
+ if (!buffer->linear_buffer) {
+ dri3_fence_reset(draw->conn, new_buffer);
+ dri3_fence_await(draw->conn, buffer);
+ dri3_copy_area(draw->conn,
+ buffer->pixmap,
+ new_buffer->pixmap,
+ dri3_drawable_gc(draw),
+ 0, 0, 0, 0,
+ draw->width, draw->height);
+ dri3_fence_trigger(draw->conn, new_buffer);
+ } else if (draw->vtable->in_current_context(draw)) {
+ draw->ext->image->blitImage(dri_context,
+ new_buffer->image,
+ buffer->image,
+ 0, 0, draw->width, draw->height,
+ 0, 0, draw->width, draw->height, 0);
+ }
+ dri3_free_render_buffer(draw, buffer);
+ }
+ break;
+ case loader_dri3_buffer_front:
+ dri3_fence_reset(draw->conn, new_buffer);
+ dri3_copy_area(draw->conn,
+ draw->drawable,
+ new_buffer->pixmap,
+ dri3_drawable_gc(draw),
+ 0, 0, 0, 0,
+ draw->width, draw->height);
+ dri3_fence_trigger(draw->conn, new_buffer);
+
+ if (new_buffer->linear_buffer &&
+ draw->vtable->in_current_context(draw)) {
+ dri3_fence_await(draw->conn, new_buffer);
+ draw->ext->image->blitImage(dri_context,
+ new_buffer->image,
+ new_buffer->linear_buffer,
+ 0, 0, draw->width, draw->height,
+ 0, 0, draw->width, draw->height, 0);
+ }
+ break;
+ }
+ buffer = new_buffer;
+ buffer->buffer_type = buffer_type;
+ draw->buffers[buf_id] = buffer;
+ }
+ dri3_fence_await(draw->conn, buffer);
+
+ /* Return the requested buffer */
+ return buffer;
+}
+
+/** dri3_free_buffers
+ *
+ * Free the front bufffer or all of the back buffers. Used
+ * when the application changes which buffers it needs
+ */
+static void
+dri3_free_buffers(__DRIdrawable *driDrawable,
+ enum loader_dri3_buffer_type buffer_type,
+ struct loader_dri3_drawable *draw)
+{
+ struct loader_dri3_buffer *buffer;
+ int first_id;
+ int n_id;
+ int buf_id;
+
+ switch (buffer_type) {
+ case loader_dri3_buffer_back:
+ first_id = LOADER_DRI3_BACK_ID(0);
+ n_id = LOADER_DRI3_MAX_BACK;
+ break;
+ case loader_dri3_buffer_front:
+ first_id = LOADER_DRI3_FRONT_ID;
+ n_id = 1;
+ }
+
+ for (buf_id = first_id; buf_id < first_id + n_id; buf_id++) {
+ buffer = draw->buffers[buf_id];
+ if (buffer) {
+ dri3_free_render_buffer(draw, buffer);
+ draw->buffers[buf_id] = NULL;
+ }
+ }
+}
+
+/** loader_dri3_get_buffers
+ *
+ * The published buffer allocation API.
+ * Returns all of the necessary buffers, allocating
+ * as needed.
+ */
+int
+loader_dri3_get_buffers(__DRIdrawable *driDrawable,
+ unsigned int format,
+ uint32_t *stamp,
+ void *loaderPrivate,
+ uint32_t buffer_mask,
+ struct __DRIimageList *buffers)
+{
+ struct loader_dri3_drawable *draw = loaderPrivate;
+ struct loader_dri3_buffer *front, *back;
+
+ buffers->image_mask = 0;
+ buffers->front = NULL;
+ buffers->back = NULL;
+
+ front = NULL;
+ back = NULL;
+
+ if (!dri3_update_drawable(driDrawable, draw))
+ return false;
+
+ /* pixmaps always have front buffers */
+ if (draw->is_pixmap)
+ buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
+
+ if (buffer_mask & __DRI_IMAGE_BUFFER_FRONT) {
+ /* All pixmaps are owned by the server gpu.
+ * When we use a different gpu, we can't use the pixmap
+ * as buffer since it is potentially tiled a way
+ * our device can't understand. In this case, use
+ * a fake front buffer. Hopefully the pixmap
+ * content will get synced with the fake front
+ * buffer.
+ */
+ if (draw->is_pixmap && !draw->is_different_gpu)
+ front = dri3_get_pixmap_buffer(driDrawable,
+ format,
+ loader_dri3_buffer_front,
+ draw);
+ else
+ front = dri3_get_buffer(driDrawable,
+ format,
+ loader_dri3_buffer_front,
+ draw);
+
+ if (!front)
+ return false;
+ } else {
+ dri3_free_buffers(driDrawable, loader_dri3_buffer_front, draw);
+ draw->have_fake_front = 0;
+ }
+
+ if (buffer_mask & __DRI_IMAGE_BUFFER_BACK) {
+ back = dri3_get_buffer(driDrawable,
+ format,
+ loader_dri3_buffer_back,
+ draw);
+ if (!back)
+ return false;
+ draw->have_back = 1;
+ } else {
+ dri3_free_buffers(driDrawable, loader_dri3_buffer_back, draw);
+ draw->have_back = 0;
+ }
+
+ if (front) {
+ buffers->image_mask |= __DRI_IMAGE_BUFFER_FRONT;
+ buffers->front = front->image;
+ draw->have_fake_front = draw->is_different_gpu || !draw->is_pixmap;
+ }
+
+ if (back) {
+ buffers->image_mask |= __DRI_IMAGE_BUFFER_BACK;
+ buffers->back = back->image;
+ }
+
+ draw->stamp = stamp;
+
+ return true;
+}
diff --git a/src/loader/loader_dri3_helper.h b/src/loader/loader_dri3_helper.h
new file mode 100644
index 00000000000..5b8fd1d24ca
--- /dev/null
+++ b/src/loader/loader_dri3_helper.h
@@ -0,0 +1,241 @@
+/*
+ * Copyright © 2013 Keith Packard
+ * Copyright © 2015 Boyan Ding
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef LOADER_DRI3_HEADER_H
+#define LOADER_DRI3_HEADER_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <xcb/xcb.h>
+#include <xcb/dri3.h>
+#include <xcb/present.h>
+
+#include <GL/gl.h>
+#include <GL/internal/dri_interface.h>
+
+enum loader_dri3_buffer_type {
+ loader_dri3_buffer_back = 0,
+ loader_dri3_buffer_front = 1
+};
+
+struct loader_dri3_buffer {
+ __DRIimage *image;
+ __DRIimage *linear_buffer;
+ uint32_t pixmap;
+
+ /* Synchronization between the client and X server is done using an
+ * xshmfence that is mapped into an X server SyncFence. This lets the
+ * client check whether the X server is done using a buffer with a simple
+ * xshmfence call, rather than going to read X events from the wire.
+ *
+ * However, we can only wait for one xshmfence to be triggered at a time,
+ * so we need to know *which* buffer is going to be idle next. We do that
+ * by waiting for a PresentIdleNotify event. When that event arrives, the
+ * 'busy' flag gets cleared and the client knows that the fence has been
+ * triggered, and that the wait call will not block.
+ */
+
+ uint32_t sync_fence; /* XID of X SyncFence object */
+ struct xshmfence *shm_fence; /* pointer to xshmfence object */
+ bool busy; /* Set on swap, cleared on IdleNotify */
+ bool own_pixmap; /* We allocated the pixmap ID, free on destroy */
+
+ uint32_t size;
+ uint32_t pitch;
+ uint32_t cpp;
+ uint32_t flags;
+ uint32_t width, height;
+ uint64_t last_swap;
+
+ enum loader_dri3_buffer_type buffer_type;
+};
+
+
+#define LOADER_DRI3_MAX_BACK 4
+#define LOADER_DRI3_BACK_ID(i) (i)
+#define LOADER_DRI3_FRONT_ID (LOADER_DRI3_MAX_BACK)
+
+static inline int
+loader_dri3_pixmap_buf_id(enum loader_dri3_buffer_type buffer_type)
+{
+ if (buffer_type == loader_dri3_buffer_back)
+ return LOADER_DRI3_BACK_ID(0);
+ else
+ return LOADER_DRI3_FRONT_ID;
+}
+
+struct loader_dri3_extensions {
+ const __DRIcoreExtension *core;
+ const __DRIimageDriverExtension *image_driver;
+ const __DRI2flushExtension *flush;
+ const __DRI2configQueryExtension *config;
+ const __DRItexBufferExtension *tex_buffer;
+ const __DRIimageExtension *image;
+};
+
+struct loader_dri3_drawable;
+
+struct loader_dri3_vtable {
+ int (*get_swap_interval)(struct loader_dri3_drawable *);
+ int (*clamp_swap_interval)(struct loader_dri3_drawable *, int);
+ void (*set_swap_interval)(struct loader_dri3_drawable *, int);
+ void (*set_drawable_size)(struct loader_dri3_drawable *, int, int);
+ bool (*in_current_context)(struct loader_dri3_drawable *);
+ __DRIcontext *(*get_dri_context)(struct loader_dri3_drawable *);
+ void (*flush_drawable)(struct loader_dri3_drawable *, unsigned);
+ void (*show_fps)(struct loader_dri3_drawable *, uint64_t);
+};
+
+#define LOADER_DRI3_NUM_BUFFERS (1 + LOADER_DRI3_MAX_BACK)
+
+struct loader_dri3_drawable {
+ xcb_connection_t *conn;
+ __DRIdrawable *dri_drawable;
+ xcb_drawable_t drawable;
+ int width;
+ int height;
+ int depth;
+ uint8_t have_back;
+ uint8_t have_fake_front;
+ uint8_t is_pixmap;
+ uint8_t flipping;
+
+ /* Information about the GPU owning the buffer */
+ __DRIscreen *dri_screen;
+ bool is_different_gpu;
+
+ /* Present extension capabilities
+ */
+ uint32_t present_capabilities;
+
+ /* SBC numbers are tracked by using the serial numbers
+ * in the present request and complete events
+ */
+ uint64_t send_sbc;
+ uint64_t recv_sbc;
+
+ /* Last received UST/MSC values for pixmap present complete */
+ uint64_t ust, msc;
+
+ /* Last received UST/MSC values from present notify msc event */
+ uint64_t notify_ust, notify_msc;
+
+ /* Serial numbers for tracking wait_for_msc events */
+ uint32_t send_msc_serial;
+ uint32_t recv_msc_serial;
+
+ struct loader_dri3_buffer *buffers[LOADER_DRI3_NUM_BUFFERS];
+ int cur_back;
+ int num_back;
+
+ uint32_t *stamp;
+
+ xcb_present_event_t eid;
+ xcb_gcontext_t gc;
+ xcb_special_event_t *special_event;
+
+ bool first_init;
+
+ struct loader_dri3_extensions *ext;
+ struct loader_dri3_vtable *vtable;
+};
+
+void
+loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw,
+ int interval);
+
+void
+loader_dri3_drawable_fini(struct loader_dri3_drawable *draw);
+
+int
+loader_dri3_drawable_init(xcb_connection_t *conn,
+ xcb_drawable_t drawable,
+ __DRIscreen *dri_screen,
+ bool is_different_gpu,
+ const __DRIconfig *dri_config,
+ struct loader_dri3_extensions *ext,
+ struct loader_dri3_vtable *vtable,
+ struct loader_dri3_drawable*);
+
+bool loader_dri3_wait_for_msc(struct loader_dri3_drawable *draw,
+ int64_t target_msc,
+ int64_t divisor, int64_t remainder,
+ int64_t *ust, int64_t *msc, int64_t *sbc);
+
+int64_t
+loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw,
+ int64_t target_msc, int64_t divisor,
+ int64_t remainder, unsigned flush_flags,
+ bool force_copy);
+
+int
+loader_dri3_wait_for_sbc(struct loader_dri3_drawable *draw,
+ int64_t target_sbc, int64_t *ust,
+ int64_t *msc, int64_t *sbc);
+
+int loader_dri3_query_buffer_age(struct loader_dri3_drawable *draw);
+
+void
+loader_dri3_flush(struct loader_dri3_drawable *draw,
+ unsigned flags,
+ enum __DRI2throttleReason throttle_reason);
+
+void
+loader_dri3_copy_sub_buffer(struct loader_dri3_drawable *draw,
+ int x, int y,
+ int width, int height,
+ bool flush);
+
+void
+loader_dri3_copy_drawable(struct loader_dri3_drawable *draw,
+ xcb_drawable_t dest,
+ xcb_drawable_t src);
+
+void
+loader_dri3_wait_x(struct loader_dri3_drawable *draw);
+
+void
+loader_dri3_wait_gl(struct loader_dri3_drawable *draw);
+
+int loader_dri3_open(xcb_connection_t *conn,
+ xcb_window_t root,
+ uint32_t provider);
+
+__DRIimage *
+loader_dri3_create_image(xcb_connection_t *c,
+ xcb_dri3_buffer_from_pixmap_reply_t *bp_reply,
+ unsigned int format,
+ __DRIscreen *dri_screen,
+ const __DRIimageExtension *image,
+ void *loaderPrivate);
+
+int
+loader_dri3_get_buffers(__DRIdrawable *driDrawable,
+ unsigned int format,
+ uint32_t *stamp,
+ void *loaderPrivate,
+ uint32_t buffer_mask,
+ struct __DRIimageList *buffers);
+
+#endif
diff --git a/src/mapi/glapi/gen/EXT_gpu_shader4.xml b/src/mapi/glapi/gen/EXT_gpu_shader4.xml
index b1f7eae2610..b4120b9c192 100644
--- a/src/mapi/glapi/gen/EXT_gpu_shader4.xml
+++ b/src/mapi/glapi/gen/EXT_gpu_shader4.xml
@@ -232,7 +232,8 @@
<param name="params" type="GLuint *"/>
</function>
- <function name="BindFragDataLocationEXT" alias="BindFragDataLocation">
+ <function name="BindFragDataLocationEXT" alias="BindFragDataLocation"
+ es2="3.0">
<param name="program" type="GLuint"/>
<param name="colorNumber" type="GLuint"/>
<param name="name" type="const GLchar *"/>
diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml
index 9a777a24c61..577d8254c43 100644
--- a/src/mapi/glapi/gen/es_EXT.xml
+++ b/src/mapi/glapi/gen/es_EXT.xml
@@ -914,4 +914,30 @@
</function>
</category>
+<category name="GL_EXT_blend_func_extended" number="247">
+
+ <function name="BindFragDataLocationIndexedEXT" alias="BindFragDataLocationIndexed"
+ es2="3.0">
+ <param name="program" type="GLuint"/>
+ <param name="colorNumber" type="GLuint"/>
+ <param name="index" type="GLuint"/>
+ <param name="name" type="const GLchar *"/>
+ </function>
+
+ <function name="GetFragDataIndexEXT" alias="GetFragDataIndex"
+ es2="3.0">
+ <param name="program" type="GLuint"/>
+ <param name="name" type="const GLchar *"/>
+ <return type="GLint"/>
+ </function>
+
+ <function name="GetProgramResourceLocationIndexEXT" alias="GetProgramResourceLocationIndex"
+ es2="3.1">
+ <param name="program" type="GLuint"/>
+ <param name="programInterface" type="GLenum"/>
+ <param name="name" type="const GLchar *"/>
+ <return type="GLint"/>
+ </function>
+
+</category>
</OpenGLAPI>
diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c
index a9da0a21ba3..bde170fcf6f 100644
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -102,13 +102,13 @@ fallback_required(struct gl_context *ctx, GLenum target,
*/
if (!mipmap->FBO)
_mesa_GenFramebuffers(1, &mipmap->FBO);
- _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, mipmap->FBO);
+ _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, mipmap->FBO);
- _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, baseImage, 0);
+ _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, baseImage, 0);
- status = _mesa_CheckFramebufferStatus(GL_FRAMEBUFFER_EXT);
+ status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER);
- _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, fboSave);
+ _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fboSave);
if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
_mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH,
@@ -131,6 +131,11 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gen_mipmap_state *mipmap)
_mesa_DeleteSamplers(1, &mipmap->Sampler);
mipmap->Sampler = 0;
+ if (mipmap->FBO != 0) {
+ _mesa_DeleteFramebuffers(1, &mipmap->FBO);
+ mipmap->FBO = 0;
+ }
+
_mesa_meta_blit_shader_table_cleanup(&mipmap->shaders);
}
diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c
index b8ab480ddfe..a8f7c9b854b 100644
--- a/src/mesa/drivers/dri/common/xmlconfig.c
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -59,6 +59,9 @@ extern char *program_invocation_name, *program_invocation_short_name;
#elif defined(__NetBSD__) && defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 106000100)
# include <stdlib.h>
# define GET_PROGRAM_NAME() getprogname()
+#elif defined(__DragonFly__)
+# include <stdlib.h>
+# define GET_PROGRAM_NAME() getprogname()
#elif defined(__APPLE__)
# include <stdlib.h>
# define GET_PROGRAM_NAME() getprogname()
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index 7fa4ce87f18..b8990cef89e 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -85,7 +85,7 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst,
unsigned msg_length)
{
fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf),
- fs_reg(0u), fs_reg(0u));
+ brw_imm_ud(0u), brw_imm_ud(0u));
inst->base_mrf = base_mrf;
inst->mlen = msg_length;
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index 40ad14402a7..73ba85e2a61 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -224,7 +224,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
vec1(t_nopersp),
brw_imm_f(0));
brw_IF(p, BRW_EXECUTE_1);
- brw_MOV(p, t_nopersp, brw_imm_vf4(1, 0, 0, 0));
+ brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0),
+ brw_float_to_vf(0.0),
+ brw_float_to_vf(0.0),
+ brw_float_to_vf(0.0)));
brw_ENDIF(p);
/* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index cd78af0dce4..e49994f19a8 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -23,6 +23,7 @@
#pragma once
+#include <stdio.h>
#include "brw_device_info.h"
#include "main/mtypes.h"
@@ -89,8 +90,7 @@ struct brw_compiler {
void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
- bool scalar_vs;
- bool scalar_gs;
+ bool scalar_stage[MESA_SHADER_STAGES];
struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
};
@@ -493,6 +493,34 @@ enum shader_dispatch_mode {
DISPATCH_MODE_SIMD8 = 3,
};
+/**
+ * @defgroup Tessellator parameter enumerations.
+ *
+ * These correspond to the hardware values in 3DSTATE_TE, and are provided
+ * as part of the tessellation evaluation shader.
+ *
+ * @{
+ */
+enum brw_tess_partitioning {
+ BRW_TESS_PARTITIONING_INTEGER = 0,
+ BRW_TESS_PARTITIONING_ODD_FRACTIONAL = 1,
+ BRW_TESS_PARTITIONING_EVEN_FRACTIONAL = 2,
+};
+
+enum brw_tess_output_topology {
+ BRW_TESS_OUTPUT_TOPOLOGY_POINT = 0,
+ BRW_TESS_OUTPUT_TOPOLOGY_LINE = 1,
+ BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2,
+ BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3,
+};
+
+enum brw_tess_domain {
+ BRW_TESS_DOMAIN_QUAD = 0,
+ BRW_TESS_DOMAIN_TRI = 1,
+ BRW_TESS_DOMAIN_ISOLINE = 2,
+};
+/** @} */
+
struct brw_vue_prog_data {
struct brw_stage_prog_data base;
struct brw_vue_map vue_map;
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index ac6045dbba9..2ea0a9eca92 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -322,64 +322,82 @@ static void
brw_initialize_context_constants(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
+
+ const bool stage_exists[MESA_SHADER_STAGES] = {
+ [MESA_SHADER_VERTEX] = true,
+ [MESA_SHADER_TESS_CTRL] = false,
+ [MESA_SHADER_TESS_EVAL] = false,
+ [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
+ [MESA_SHADER_FRAGMENT] = true,
+ [MESA_SHADER_COMPUTE] = _mesa_extension_override_enables.ARB_compute_shader,
+ };
+
+ unsigned num_stages = 0;
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (stage_exists[i])
+ num_stages++;
+ }
unsigned max_samplers =
brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
+ ctx->Const.MaxDualSourceDrawBuffers = 1;
+ ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
+ ctx->Const.MaxCombinedShaderOutputResources =
+ MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
+
ctx->Const.QueryCounterBits.Timestamp = 36;
+ ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
+ ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
+ ctx->Const.MaxRenderbufferSize = 8192;
+ ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
+ ctx->Const.Max3DTextureLevels = 12; /* 2048 */
+ ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
+ ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
+ ctx->Const.MaxTextureMbytes = 1536;
+ ctx->Const.MaxTextureRectSize = 1 << 12;
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
ctx->Const.StripTextureBorder = true;
+ if (brw->gen >= 7)
+ ctx->Const.MaxProgramTextureGatherComponents = 4;
+ else if (brw->gen == 6)
+ ctx->Const.MaxProgramTextureGatherComponents = 1;
ctx->Const.MaxUniformBlockSize = 65536;
+
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_program_constants *prog = &ctx->Const.Program[i];
+
+ if (!stage_exists[i])
+ continue;
+
+ prog->MaxTextureImageUnits = max_samplers;
+
prog->MaxUniformBlocks = BRW_MAX_UBO;
prog->MaxCombinedUniformComponents =
prog->MaxUniformComponents +
ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
+
+ prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
+ prog->MaxAtomicBuffers = BRW_MAX_ABO;
+ prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
+ prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
}
- ctx->Const.MaxDualSourceDrawBuffers = 1;
- ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
- ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
ctx->Const.MaxTextureUnits =
MIN2(ctx->Const.MaxTextureCoordUnits,
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
- if (brw->gen >= 6)
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
- else
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
- if (_mesa_extension_override_enables.ARB_compute_shader) {
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
- ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO;
- } else {
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
- }
- ctx->Const.MaxCombinedTextureImageUnits =
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
-
- ctx->Const.MaxTextureLevels = 14; /* 8192 */
- if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
- ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
- ctx->Const.Max3DTextureLevels = 12; /* 2048 */
- ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
- ctx->Const.MaxTextureMbytes = 1536;
-
- if (brw->gen >= 7)
- ctx->Const.MaxArrayTextureLayers = 2048;
- else
- ctx->Const.MaxArrayTextureLayers = 512;
- ctx->Const.MaxTextureRectSize = 1 << 12;
+ ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
+ ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
+ ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
+ ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
+ ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
+ ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
+ ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
- ctx->Const.MaxTextureMaxAnisotropy = 16.0;
-
- ctx->Const.MaxRenderbufferSize = 8192;
/* Hardware only supports a limited number of transform feedback buffers.
* So we need to override the Mesa default (which is based only on software
@@ -427,6 +445,7 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.MaxColorTextureSamples = max_samples;
ctx->Const.MaxDepthTextureSamples = max_samples;
ctx->Const.MaxIntegerSamples = max_samples;
+ ctx->Const.MaxImageSamples = 0;
/* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
* to map indices of rectangular grid to sample numbers within a pixel.
@@ -436,11 +455,6 @@ brw_initialize_context_constants(struct brw_context *brw)
*/
gen6_set_sample_maps(ctx);
- if (brw->gen >= 7)
- ctx->Const.MaxProgramTextureGatherComponents = 4;
- else if (brw->gen == 6)
- ctx->Const.MaxProgramTextureGatherComponents = 1;
-
ctx->Const.MinLineWidth = 1.0;
ctx->Const.MinLineWidthAA = 1.0;
if (brw->gen >= 6) {
@@ -511,30 +525,6 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
- if (brw->gen >= 7) {
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
- ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
-
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
- BRW_MAX_IMAGES;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
- (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0);
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
- BRW_MAX_IMAGES;
- ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
- ctx->Const.MaxCombinedShaderOutputResources =
- MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
- ctx->Const.MaxImageSamples = 0;
- ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES;
- }
-
/* Gen6 converts quads to polygon in beginning of 3D pipeline,
* but we're not sure how it's actually done for vertex order,
* that affect provoking vertex decision. Always use last vertex
@@ -586,21 +576,6 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.TextureBufferOffsetAlignment = 16;
ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
- /* FIXME: Tessellation stages are not yet supported in i965, so
- * MaxCombinedShaderStorageBlocks doesn't take them into account.
- */
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = BRW_MAX_SSBO;
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = BRW_MAX_SSBO;
- ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
- ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = BRW_MAX_SSBO;
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = BRW_MAX_SSBO;
- ctx->Const.MaxCombinedShaderStorageBlocks = BRW_MAX_SSBO * 3;
- ctx->Const.MaxShaderStorageBufferBindings = BRW_MAX_SSBO * 3;
-
- if (_mesa_extension_override_enables.ARB_compute_shader)
- ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO;
-
if (brw->gen >= 6) {
ctx->Const.MaxVarying = 32;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4b2db61c758..fe45edb89ff 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -523,6 +523,8 @@ struct brw_tracked_state {
enum shader_time_shader_type {
ST_NONE,
ST_VS,
+ ST_TCS,
+ ST_TES,
ST_GS,
ST_FS8,
ST_FS16,
@@ -1465,6 +1467,8 @@ void brw_upload_image_surfaces(struct brw_context *brw,
/* brw_surface_formats.c */
bool brw_render_target_supported(struct brw_context *brw,
struct gl_renderbuffer *rb);
+bool brw_losslessly_compressible_format(struct brw_context *brw,
+ uint32_t brw_format);
uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo,
mesa_format format);
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 3ad90da8b2f..36d9f716e03 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1291,6 +1291,16 @@ enum opcode {
* Calculate the high 32-bits of a 32x32 multiply.
*/
SHADER_OPCODE_MULH,
+
+ /**
+ * A MOV that uses VxH indirect addressing.
+ *
+ * Source 0: A register to start from (HW_REG).
+ * Source 1: An indirect offset (in bytes, UD GRF).
+ * Source 2: The length of the region that could be accessed (in bytes,
+ * UD immediate).
+ */
+ SHADER_OPCODE_MOV_INDIRECT,
};
enum brw_urb_write_flags {
@@ -1930,8 +1940,14 @@ enum brw_message_target {
/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
* is 2^9, or 512. It's counted in multiples of 64 bytes.
+ *
+ * Identical for VS, DS, and HS.
*/
#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64)
+#define GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES (512*64)
+#define GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES (512*64)
+#define GEN7_MAX_VS_URB_ENTRY_SIZE_BYTES (512*64)
+
/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
* (128 bytes) URB rows and the maximum allowed value is 5 rows.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 250d4097e38..419168966de 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -43,6 +43,7 @@
#include "brw_wm.h"
#include "brw_fs.h"
#include "brw_cs.h"
+#include "brw_nir.h"
#include "brw_vec4_gs_visitor.h"
#include "brw_cfg.h"
#include "brw_dead_control_flow.h"
@@ -186,7 +187,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
* the redundant ones.
*/
fs_reg vec4_offset = vgrf(glsl_type::int_type);
- bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3));
+ bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3));
int scale = 1;
if (devinfo->gen == 4 && bld.dispatch_width() == 8) {
@@ -374,54 +375,6 @@ fs_reg::fs_reg()
this->file = BAD_FILE;
}
-/** Immediate value constructor. */
-fs_reg::fs_reg(float f)
-{
- init();
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_F;
- this->stride = 0;
- this->f = f;
-}
-
-/** Immediate value constructor. */
-fs_reg::fs_reg(int32_t i)
-{
- init();
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_D;
- this->stride = 0;
- this->d = i;
-}
-
-/** Immediate value constructor. */
-fs_reg::fs_reg(uint32_t u)
-{
- init();
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_UD;
- this->stride = 0;
- this->ud = u;
-}
-
-/** Vector float immediate value constructor. */
-fs_reg::fs_reg(uint8_t vf[4])
-{
- init();
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_VF;
- memcpy(&this->ud, vf, sizeof(unsigned));
-}
-
-/** Vector float immediate value constructor. */
-fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
-{
- init();
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_VF;
- this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24);
-}
-
fs_reg::fs_reg(struct brw_reg reg) :
backend_reg(reg)
{
@@ -591,7 +544,7 @@ fs_visitor::emit_shader_time_end()
fs_reg reset = shader_end_time;
reset.set_smear(2);
set_condmod(BRW_CONDITIONAL_Z,
- ibld.AND(ibld.null_reg_ud(), reset, fs_reg(1u)));
+ ibld.AND(ibld.null_reg_ud(), reset, brw_imm_ud(1u)));
ibld.IF(BRW_PREDICATE_NORMAL);
fs_reg start = shader_start_time;
@@ -606,11 +559,11 @@ fs_visitor::emit_shader_time_end()
* is 2 cycles. Remove that overhead, so I can forget about that when
* trying to determine the time taken for single instructions.
*/
- cbld.ADD(diff, diff, fs_reg(-2u));
+ cbld.ADD(diff, diff, brw_imm_ud(-2u));
SHADER_TIME_ADD(cbld, 0, diff);
- SHADER_TIME_ADD(cbld, 1, fs_reg(1u));
+ SHADER_TIME_ADD(cbld, 1, brw_imm_ud(1u));
ibld.emit(BRW_OPCODE_ELSE);
- SHADER_TIME_ADD(cbld, 2, fs_reg(1u));
+ SHADER_TIME_ADD(cbld, 2, brw_imm_ud(1u));
ibld.emit(BRW_OPCODE_ENDIF);
}
@@ -620,7 +573,7 @@ fs_visitor::SHADER_TIME_ADD(const fs_builder &bld,
fs_reg value)
{
int index = shader_time_index * 3 + shader_time_subindex;
- fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE);
+ struct brw_reg offset = brw_imm_d(index * SHADER_TIME_STRIDE);
fs_reg payload;
if (dispatch_width == 8)
@@ -841,6 +794,34 @@ fs_inst::regs_read(int arg) const
case SHADER_OPCODE_BARRIER:
return 1;
+ case SHADER_OPCODE_MOV_INDIRECT:
+ if (arg == 0) {
+ assert(src[2].file == IMM);
+ unsigned region_length = src[2].ud;
+
+ if (src[0].file == FIXED_GRF) {
+ /* If the start of the region is not register aligned, then
+ * there's some portion of the register that's technically
+ * unread at the beginning.
+ *
+ * However, the register allocator works in terms of whole
+ * registers, and does not use subnr. It assumes that the
+ * read starts at the beginning of the register, and extends
+ * regs_read() whole registers beyond that.
+ *
+ * To compensate, we extend the region length to include this
+ * unread portion at the beginning.
+ */
+ if (src[0].subnr)
+ region_length += src[0].subnr * type_sz(src[0].type);
+
+ return DIV_ROUND_UP(region_length, REG_SIZE);
+ } else {
+ assert(!"Invalid register file");
+ }
+ }
+ break;
+
default:
if (is_tex() && arg == 0 && src[0].file == VGRF)
return mlen;
@@ -1005,7 +986,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
if (pixel_center_integer) {
bld.MOV(wpos, this->pixel_x);
} else {
- bld.ADD(wpos, this->pixel_x, fs_reg(0.5f));
+ bld.ADD(wpos, this->pixel_x, brw_imm_f(0.5f));
}
wpos = offset(wpos, bld, 1);
@@ -1021,7 +1002,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
offset += key->drawable_height - 1.0f;
}
- bld.ADD(wpos, pixel_y, fs_reg(offset));
+ bld.ADD(wpos, pixel_y, brw_imm_f(offset));
}
wpos = offset(wpos, bld, 1);
@@ -1198,7 +1179,7 @@ fs_visitor::emit_frontfacing_interpolation()
fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
g0.negate = true;
- bld.ASR(*reg, g0, fs_reg(15));
+ bld.ASR(*reg, g0, brw_imm_d(15));
} else {
/* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create
* a boolean result from this (1/true or 0/false).
@@ -1213,7 +1194,7 @@ fs_visitor::emit_frontfacing_interpolation()
fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
g1_6.negate = true;
- bld.ASR(*reg, g1_6, fs_reg(31));
+ bld.ASR(*reg, g1_6, brw_imm_d(31));
}
return reg;
@@ -1230,7 +1211,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
/* Convert int_sample_pos to floating point */
bld.MOV(dst, int_sample_pos);
/* Scale to the range [0, 1] */
- bld.MUL(dst, dst, fs_reg(1 / 16.0f));
+ bld.MUL(dst, dst, brw_imm_f(1 / 16.0f));
}
else {
/* From ARB_sample_shading specification:
@@ -1238,7 +1219,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
* rasterization is disabled, gl_SamplePosition will always be
* (0.5, 0.5).
*/
- bld.MOV(dst, fs_reg(0.5f));
+ bld.MOV(dst, brw_imm_f(0.5f));
}
}
@@ -1333,8 +1314,8 @@ fs_visitor::emit_sampleid_setup()
abld.exec_all().group(1, 0)
.AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
- fs_reg(sspi_mask));
- abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5));
+ brw_imm_ud(sspi_mask));
+ abld.exec_all().group(1, 0).SHR(t1, t1, brw_imm_d(5));
/* This works for both SIMD8 and SIMD16 */
abld.exec_all().group(4, 0)
@@ -1349,7 +1330,7 @@ fs_visitor::emit_sampleid_setup()
* "When rendering to a non-multisample buffer, or if multisample
* rasterization is disabled, gl_SampleID will always be zero."
*/
- abld.MOV(*reg, fs_reg(0));
+ abld.MOV(*reg, brw_imm_d(0));
}
return reg;
@@ -1662,24 +1643,7 @@ fs_visitor::assign_gs_urb_setup()
first_non_payload_grf +=
8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in;
- const unsigned first_icp_handle = payload.num_regs -
- (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0);
-
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- /* Lower URB_READ_SIMD8 opcodes into real messages. */
- if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) {
- assert(inst->src[0].file == IMM);
- inst->src[0] = retype(brw_vec8_grf(first_icp_handle +
- inst->src[0].ud,
- 0), BRW_REGISTER_TYPE_UD);
- /* for now, assume constant - we can do per-slot offsets later */
- assert(inst->src[1].file == IMM);
- inst->offset = inst->src[1].ud;
- inst->src[1] = fs_reg();
- inst->mlen = 1;
- inst->base_mrf = -1;
- }
-
/* Rewrite all ATTR file references to GRFs. */
convert_attr_sources_to_hw_regs(inst);
}
@@ -2037,16 +2001,16 @@ fs_visitor::demote_pull_constants()
/* Generate a pull load into dst. */
if (inst->src[i].reladdr) {
VARYING_PULL_CONSTANT_LOAD(ibld, dst,
- fs_reg(index),
+ brw_imm_ud(index),
*inst->src[i].reladdr,
pull_index);
inst->src[i].reladdr = NULL;
inst->src[i].stride = 1;
} else {
const fs_builder ubld = ibld.exec_all().group(8, 0);
- fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
+ struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- dst, fs_reg(index), offset);
+ dst, brw_imm_ud(index), offset);
inst->src[i].set_smear(pull_index & 3);
}
brw_mark_surface_used(prog_data, index);
@@ -2738,7 +2702,7 @@ fs_visitor::eliminate_find_live_channel()
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
if (depth == 0) {
inst->opcode = BRW_OPCODE_MOV;
- inst->src[0] = fs_reg(0u);
+ inst->src[0] = brw_imm_ud(0u);
inst->sources = 1;
inst->force_writemask_all = true;
progress = true;
@@ -3591,6 +3555,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
assert(devinfo->gen >= 9);
assert(bld.dispatch_width() != 16);
+ /* XXX: src_stencil is only available on gen9+. dst_depth is never
+ * available on gen9+. As such it's impossible to have both enabled at the
+ * same time and therefore length cannot overrun the array.
+ */
+ assert(length < 15);
+
sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.exec_all().annotate("FB write OS")
.emit(FS_OPCODE_PACK_STENCIL_REF, sources[length],
@@ -3660,7 +3630,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
(has_lod || shadow_c.file != BAD_FILE ||
(op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) {
for (unsigned i = coord_components; i < 3; i++)
- bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f));
+ bld.MOV(offset(msg_end, bld, i), brw_imm_f(0.0f));
msg_end = offset(msg_end, bld, 3 - coord_components);
}
@@ -3717,7 +3687,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
/* There's no plain shadow compare message, so we use shadow
* compare with a bias of 0.0.
*/
- bld.MOV(msg_end, fs_reg(0.0f));
+ bld.MOV(msg_end, brw_imm_f(0.0f));
msg_end = offset(msg_end, bld, 1);
}
@@ -3813,7 +3783,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
case SHADER_OPCODE_TXF_CMS:
msg_lod = offset(msg_coords, bld, 3);
/* lod */
- bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
+ bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
/* sample index */
bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index);
msg_end = offset(msg_lod, bld, 2);
@@ -3896,7 +3866,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
if (bld.shader->stage != MESA_SHADER_FRAGMENT &&
op == SHADER_OPCODE_TEX) {
op = SHADER_OPCODE_TXL;
- lod = fs_reg(0.0f);
+ lod = brw_imm_f(0.0f);
}
/* Set up the LOD info */
@@ -4110,7 +4080,7 @@ emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask)
{
fs_builder ubld = bld.exec_all().group(8, 0);
const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
- ubld.MOV(dst, fs_reg(0));
+ ubld.MOV(dst, brw_imm_d(0));
ubld.MOV(component(dst, 7), sample_mask);
return dst;
}
@@ -4252,7 +4222,7 @@ fs_visitor::lower_logical_sends()
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_TYPED_SURFACE_READ,
- fs_reg(0xffff));
+ brw_imm_d(0xffff));
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
@@ -4677,6 +4647,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
case IMM:
unreachable("not reached");
}
+ if (inst->dst.stride != 1)
+ fprintf(file, "<%u>", inst->dst.stride);
fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type));
for (int i = 0; i < inst->sources; i++) {
@@ -4764,6 +4736,16 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, "|");
if (inst->src[i].file != IMM) {
+ unsigned stride;
+ if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) {
+ unsigned hstride = inst->src[i].hstride;
+ stride = (hstride == 0 ? 0 : (1 << (hstride - 1)));
+ } else {
+ stride = inst->src[i].stride;
+ }
+ if (stride != 1)
+ fprintf(file, "<%u>", stride);
+
fprintf(file, ":%s", brw_reg_type_letters(inst->src[i].type));
}
@@ -5241,7 +5223,7 @@ fs_visitor::run_gs()
*/
if (gs_compile->control_data_header_size_bits <= 32) {
const fs_builder abld = bld.annotate("initialize control data bits");
- abld.MOV(this->control_data_bits, fs_reg(0u));
+ abld.MOV(this->control_data_bits, brw_imm_ud(0u));
}
}
@@ -5474,13 +5456,18 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const struct brw_wm_prog_key *key,
struct brw_wm_prog_data *prog_data,
- const nir_shader *shader,
+ const nir_shader *src_shader,
struct gl_program *prog,
int shader_time_index8, int shader_time_index16,
bool use_rep_send,
unsigned *final_assembly_size,
char **error_str)
{
+ nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+ shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+ true);
+ shader = brw_postprocess_nir(shader, compiler->devinfo, true);
+
/* key->alpha_test_func means simulating alpha testing via discards,
* so the shader definitely kills pixels.
*/
@@ -5633,11 +5620,16 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const struct brw_cs_prog_key *key,
struct brw_cs_prog_data *prog_data,
- const nir_shader *shader,
+ const nir_shader *src_shader,
int shader_time_index,
unsigned *final_assembly_size,
char **error_str)
{
+ nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+ shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+ true);
+ shader = brw_postprocess_nir(shader, compiler->devinfo, true);
+
prog_data->local_size[0] = shader->info.cs.local_size[0];
prog_data->local_size[1] = shader->info.cs.local_size[1];
prog_data->local_size[2] = shader->info.cs.local_size[2];
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 9b56afd292f..658608f9951 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -116,10 +116,6 @@ public:
void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
void compute_clip_distance(gl_clip_plane *clip_planes);
- uint32_t gather_channel(int orig_chan, uint32_t surface, uint32_t sampler);
- void swizzle_result(ir_texture_opcode op, int dest_components,
- fs_reg orig_val, uint32_t sampler);
-
fs_inst *get_instruction_generating_reg(fs_inst *start,
fs_inst *end,
const fs_reg &reg);
@@ -218,8 +214,6 @@ public:
void emit_interpolation_setup_gen4();
void emit_interpolation_setup_gen6();
void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
- fs_reg rescale_texcoord(fs_reg coordinate, int coord_components,
- bool is_rect, uint32_t sampler);
void emit_texture(ir_texture_opcode op,
const glsl_type *dest_type,
fs_reg coordinate, int components,
@@ -230,7 +224,6 @@ public:
fs_reg mcs,
int gather_component,
bool is_cube_array,
- bool is_rect,
uint32_t surface,
fs_reg surface_reg,
uint32_t sampler,
@@ -305,7 +298,8 @@ public:
unsigned stream_id);
void emit_gs_thread_end();
void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
- unsigned offset, unsigned num_components);
+ const fs_reg &indirect_offset, unsigned imm_offset,
+ unsigned num_components);
void emit_cs_terminate();
fs_reg *emit_cs_local_invocation_id_setup();
fs_reg *emit_cs_work_group_id_setup();
@@ -530,6 +524,11 @@ private:
struct brw_reg offset,
struct brw_reg value);
+ void generate_mov_indirect(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg reg,
+ struct brw_reg indirect_byte_offset);
+
bool patch_discard_jumps_to_fb_writes();
const struct brw_compiler *compiler;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index 22b2f22073f..dd3c383a17d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -225,7 +225,7 @@ namespace brw {
sample_mask_reg() const
{
if (shader->stage != MESA_SHADER_FRAGMENT) {
- return src_reg(0xffff);
+ return brw_imm_d(0xffff);
} else if (((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill) {
return brw_flag_reg(0, 1);
} else {
@@ -548,7 +548,7 @@ namespace brw {
const dst_reg x_times_one_minus_a = vgrf(dst.type);
MUL(y_times_a, y, a);
- ADD(one_minus_a, negate(a), src_reg(1.0f));
+ ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
index 8fdc959f992..7c01f1e3d62 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
@@ -90,7 +90,8 @@ opt_cmod_propagation_local(bblock_t *block)
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (scan_inst->overwrites_reg(inst->src[0])) {
if (scan_inst->is_partial_write() ||
- scan_inst->dst.reg_offset != inst->src[0].reg_offset)
+ scan_inst->dst.reg_offset != inst->src[0].reg_offset ||
+ scan_inst->exec_size != inst->exec_size)
break;
/* CMP's result is the same regardless of dest type. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index 0c115f50748..c3ad7ad4771 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -279,7 +279,7 @@ fs_visitor::opt_combine_constants()
imm->block->last_non_control_flow_inst()->next);
const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0);
- ibld.MOV(reg, fs_reg(imm->val));
+ ibld.MOV(reg, brw_imm_f(imm->val));
imm->nr = reg.nr;
imm->subreg_offset = reg.subreg_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 426ea57d8f9..62ae9abede7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -275,6 +275,59 @@ is_logic_op(enum opcode opcode)
opcode == BRW_OPCODE_NOT);
}
+static bool
+can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
+ const brw_device_info *devinfo)
+{
+ if (stride > 4)
+ return false;
+
+ /* 3-source instructions can only be Align16, which restricts what strides
+ * they can take. They can only take a stride of 1 (the usual case), or 0
+ * with a special "repctrl" bit. But the repctrl bit doesn't work for
+ * 64-bit datatypes, so if the source type is 64-bit then only a stride of
+ * 1 is allowed. From the Broadwell PRM, Volume 7 "3D Media GPGPU", page
+ * 944:
+ *
+ * This is applicable to 32b datatypes and 16b datatype. 64b datatypes
+ * cannot use the replicate control.
+ */
+ if (inst->is_3src()) {
+ if (type_sz(inst->src[arg].type) > 4)
+ return stride == 1;
+ else
+ return stride == 1 || stride == 0;
+ }
+
+ /* From the Broadwell PRM, Volume 2a "Command Reference - Instructions",
+ * page 391 ("Extended Math Function"):
+ *
+ * The following restrictions apply for align1 mode: Scalar source is
+ * supported. Source and destination horizontal stride must be the
+ * same.
+ *
+ * From the Haswell PRM Volume 2b "Command Reference - Instructions", page
+ * 134 ("Extended Math Function"):
+ *
+ * Scalar source is supported. Source and destination horizontal stride
+ * must be 1.
+ *
+ * and similar language exists for IVB and SNB. Pre-SNB, math instructions
+ * are sends, so the sources are moved to MRF's and there are no
+ * restrictions.
+ */
+ if (inst->is_math()) {
+ if (devinfo->gen == 6 || devinfo->gen == 7) {
+ assert(inst->dst.stride == 1);
+ return stride == 1 || stride == 0;
+ } else if (devinfo->gen >= 8) {
+ return stride == inst->dst.stride || stride == 0;
+ }
+ }
+
+ return true;
+}
+
bool
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
{
@@ -326,7 +379,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
/* Bail if the result of composing both strides would exceed the
* hardware limit.
*/
- if (entry->src.stride * inst->src[arg].stride > 4)
+ if (!can_take_stride(inst, arg, entry->src.stride * inst->src[arg].stride,
+ devinfo))
return false;
/* Bail if the instruction type is larger than the execution type of the
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 8c67caff6e0..3b65a382dc8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -78,6 +78,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
case FS_OPCODE_LINTERP:
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
case SHADER_OPCODE_BROADCAST:
+ case SHADER_OPCODE_MOV_INDIRECT:
return true;
case SHADER_OPCODE_RCP:
case SHADER_OPCODE_RSQ:
@@ -209,6 +210,8 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
} else {
copy = bld.MOV(inst->dst, src);
+ copy->force_sechalf = inst->force_sechalf;
+ copy->force_writemask_all = inst->force_writemask_all;
copy->src[0].negate = negate;
}
assert(copy->regs_written == written);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 9d7fb94c397..8528f391941 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -89,39 +89,9 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
brw_reg.abs = reg->abs;
brw_reg.negate = reg->negate;
break;
- case IMM:
- assert(reg->stride == ((reg->type == BRW_REGISTER_TYPE_V ||
- reg->type == BRW_REGISTER_TYPE_UV ||
- reg->type == BRW_REGISTER_TYPE_VF) ? 1 : 0));
-
- switch (reg->type) {
- case BRW_REGISTER_TYPE_F:
- brw_reg = brw_imm_f(reg->f);
- break;
- case BRW_REGISTER_TYPE_D:
- brw_reg = brw_imm_d(reg->d);
- break;
- case BRW_REGISTER_TYPE_UD:
- brw_reg = brw_imm_ud(reg->ud);
- break;
- case BRW_REGISTER_TYPE_W:
- brw_reg = brw_imm_w(reg->d);
- break;
- case BRW_REGISTER_TYPE_UW:
- brw_reg = brw_imm_uw(reg->ud);
- break;
- case BRW_REGISTER_TYPE_VF:
- brw_reg = brw_imm_vf(reg->ud);
- break;
- case BRW_REGISTER_TYPE_V:
- brw_reg = brw_imm_v(reg->ud);
- break;
- default:
- unreachable("not reached");
- }
- break;
case ARF:
case FIXED_GRF:
+ case IMM:
brw_reg = *static_cast<struct brw_reg *>(reg);
break;
case BAD_FILE:
@@ -372,6 +342,36 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
}
void
+fs_generator::generate_mov_indirect(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg reg,
+ struct brw_reg indirect_byte_offset)
+{
+ assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD);
+ assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE);
+
+ unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
+
+ /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
+ struct brw_reg addr = vec8(brw_address_reg(0));
+
+ /* The destination stride of an instruction (in bytes) must be greater
+ * than or equal to the size of the rest of the instruction. Since the
+ * address register is of type UW, we can't use a D-type instruction.
+ * In order to get around this, re re-type to UW and use a stride.
+ */
+ indirect_byte_offset =
+ retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+
+ /* Prior to Broadwell, there are only 8 address registers. */
+ assert(inst->exec_size == 8 || devinfo->gen >= 8);
+
+ brw_MOV(p, addr, indirect_byte_offset);
+ brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE);
+ brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+}
+
+void
fs_generator::generate_urb_read(fs_inst *inst,
struct brw_reg dst,
struct brw_reg header)
@@ -700,6 +700,17 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
break;
}
+ /* Stomp the resinfo output type to UINT32. On gens 4-5, the output type
+ * is set as part of the message descriptor. On gen4, the PRM seems to
+ * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
+ * later gens UINT32 is required. Once you hit Sandy Bridge, the bit is
+ * gone from the message descriptor entirely and you just get UINT32 all
+ * the time regasrdless. Since we can really only do non-UINT32 on gen4,
+ * just stomp it to UINT32 all the time.
+ */
+ if (inst->opcode == SHADER_OPCODE_TXS)
+ return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
+
switch (inst->exec_size) {
case 8:
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
@@ -2087,6 +2098,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
fill_count++;
break;
+ case SHADER_OPCODE_MOV_INDIRECT:
+ generate_mov_indirect(inst, dst, src[0], src[1]);
+ break;
+
case SHADER_OPCODE_URB_READ_SIMD8:
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
generate_urb_read(inst, dst, src[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 3a666b8debc..6b0c4a5b36e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -208,7 +208,7 @@ emit_system_values_block(nir_block *block, void *void_visitor)
const fs_builder abld = v->bld.annotate("gl_InvocationID", NULL);
fs_reg g1(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
fs_reg iid = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- abld.SHR(iid, g1, fs_reg(27u));
+ abld.SHR(iid, g1, brw_imm_ud(27u));
*reg = iid;
}
break;
@@ -250,6 +250,57 @@ emit_system_values_block(nir_block *block, void *void_visitor)
*reg = *v->emit_cs_work_group_id_setup();
break;
+ case nir_intrinsic_load_helper_invocation:
+ assert(v->stage == MESA_SHADER_FRAGMENT);
+ reg = &v->nir_system_values[SYSTEM_VALUE_HELPER_INVOCATION];
+ if (reg->file == BAD_FILE) {
+ const fs_builder abld =
+ v->bld.annotate("gl_HelperInvocation", NULL);
+
+ /* On Gen6+ (gl_HelperInvocation is only exposed on Gen7+) the
+ * pixel mask is in g1.7 of the thread payload.
+ *
+ * We move the per-channel pixel enable bit to the low bit of each
+ * channel by shifting the byte containing the pixel mask by the
+ * vector immediate 0x76543210UV.
+ *
+ * The region of <1,8,0> reads only 1 byte (the pixel masks for
+ * subspans 0 and 1) in SIMD8 and an additional byte (the pixel
+ * masks for 2 and 3) in SIMD16.
+ */
+ fs_reg shifted = abld.vgrf(BRW_REGISTER_TYPE_UW, 1);
+ abld.SHR(shifted,
+ stride(byte_offset(retype(brw_vec1_grf(1, 0),
+ BRW_REGISTER_TYPE_UB), 28),
+ 1, 8, 0),
+ brw_imm_uv(0x76543210));
+
+ /* A set bit in the pixel mask means the channel is enabled, but
+ * that is the opposite of gl_HelperInvocation so we need to invert
+ * the mask.
+ *
+ * The negate source-modifier bit of logical instructions on Gen8+
+ * performs 1's complement negation, so we can use that instead of
+ * a NOT instruction.
+ */
+ fs_reg inverted = negate(shifted);
+ if (v->devinfo->gen < 8) {
+ inverted = abld.vgrf(BRW_REGISTER_TYPE_UW);
+ abld.NOT(inverted, shifted);
+ }
+
+ /* We then resolve the 0/1 result to 0/~0 boolean values by ANDing
+ * with 1 and negating.
+ */
+ fs_reg anded = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ abld.AND(anded, inverted, brw_imm_uw(1));
+
+ fs_reg dst = abld.vgrf(BRW_REGISTER_TYPE_D, 1);
+ abld.MOV(dst, negate(retype(anded, BRW_REGISTER_TYPE_D)));
+ *reg = dst;
+ }
+ break;
+
default:
break;
}
@@ -454,8 +505,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
tmp.subreg_offset = 2;
tmp.stride = 2;
- fs_inst *or_inst = bld.OR(tmp, g0, fs_reg(0x3f80));
- or_inst->src[1].type = BRW_REGISTER_TYPE_UW;
+ bld.OR(tmp, g0, brw_imm_uw(0x3f80));
tmp.type = BRW_REGISTER_TYPE_D;
tmp.subreg_offset = 0;
@@ -479,9 +529,9 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
g1_6.negate = true;
}
- bld.OR(tmp, g1_6, fs_reg(0x3f800000));
+ bld.OR(tmp, g1_6, brw_imm_d(0x3f800000));
}
- bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000));
+ bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, brw_imm_d(0xbf800000));
return true;
}
@@ -594,14 +644,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
* Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
* zero.
*/
- bld.CMP(bld.null_reg_f(), op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ);
+ bld.CMP(bld.null_reg_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
op[0].type = BRW_REGISTER_TYPE_UD;
result.type = BRW_REGISTER_TYPE_UD;
- bld.AND(result_int, op[0], fs_reg(0x80000000u));
+ bld.AND(result_int, op[0], brw_imm_ud(0x80000000u));
- inst = bld.OR(result_int, result_int, fs_reg(0x3f800000u));
+ inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
inst->predicate = BRW_PREDICATE_NORMAL;
if (instr->dest.saturate) {
inst = bld.MOV(result, result);
@@ -615,9 +665,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
* -> non-negative val generates 0x00000000.
* Predicated OR sets 1 if val is positive.
*/
- bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_G);
- bld.ASR(result, op[0], fs_reg(31));
- inst = bld.OR(result, result, fs_reg(1));
+ bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G);
+ bld.ASR(result, op[0], brw_imm_d(31));
+ inst = bld.OR(result, result, brw_imm_d(1));
inst->predicate = BRW_PREDICATE_NORMAL;
break;
@@ -665,21 +715,21 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_fddy:
if (fs_key->high_quality_derivatives) {
inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
- fs_reg(fs_key->render_to_fbo));
+ brw_imm_d(fs_key->render_to_fbo));
} else {
inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
- fs_reg(fs_key->render_to_fbo));
+ brw_imm_d(fs_key->render_to_fbo));
}
inst->saturate = instr->dest.saturate;
break;
case nir_op_fddy_fine:
inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
- fs_reg(fs_key->render_to_fbo));
+ brw_imm_d(fs_key->render_to_fbo));
inst->saturate = instr->dest.saturate;
break;
case nir_op_fddy_coarse:
inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
- fs_reg(fs_key->render_to_fbo));
+ brw_imm_d(fs_key->render_to_fbo));
inst->saturate = instr->dest.saturate;
break;
@@ -828,10 +878,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
case nir_op_f2b:
- bld.CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ);
+ bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
break;
case nir_op_i2b:
- bld.CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ);
+ bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
break;
case nir_op_ftrunc:
@@ -931,9 +981,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
* subtract the result from 31 to convert the MSB count into an LSB count.
*/
- bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ);
+ bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
- inst = bld.ADD(result, result, fs_reg(31));
+ inst = bld.ADD(result, result, brw_imm_d(31));
inst->predicate = BRW_PREDICATE_NORMAL;
inst->src[0].negate = true;
break;
@@ -986,7 +1036,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
if (optimize_frontfacing_ternary(instr, result))
return;
- bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_NZ);
+ bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
inst = bld.SEL(result, op[1], op[2]);
inst->predicate = BRW_PREDICATE_NORMAL;
break;
@@ -1001,7 +1051,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
if (devinfo->gen <= 5 &&
(instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
fs_reg masked = vgrf(glsl_type::int_type);
- bld.AND(masked, result, fs_reg(1));
+ bld.AND(masked, result, brw_imm_d(1));
masked.negate = true;
bld.MOV(retype(result, BRW_REGISTER_TYPE_D), masked);
}
@@ -1014,7 +1064,7 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
for (unsigned i = 0; i < instr->def.num_components; i++)
- bld.MOV(offset(reg, bld, i), fs_reg(instr->value.i[i]));
+ bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i[i]));
nir_ssa_values[instr->def.index] = reg;
}
@@ -1042,7 +1092,7 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type));
v->bld.MUL(*reg.reladdr, v->get_nir_src(*indirect),
- fs_reg(multiplier));
+ brw_imm_d(multiplier));
}
return reg;
@@ -1108,12 +1158,12 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
*/
bld.emit_minmax(tmp, retype(get_nir_src(deref_array->indirect),
BRW_REGISTER_TYPE_UD),
- fs_reg(size - base - 1), BRW_CONDITIONAL_L);
+ brw_imm_ud(size - base - 1), BRW_CONDITIONAL_L);
} else {
bld.MOV(tmp, get_nir_src(deref_array->indirect));
}
- bld.MUL(tmp, tmp, fs_reg(element_size));
+ bld.MUL(tmp, tmp, brw_imm_ud(element_size));
if (image.reladdr)
bld.ADD(*image.reladdr, *image.reladdr, tmp);
else
@@ -1232,7 +1282,7 @@ intexp2(const fs_builder &bld, const fs_reg &x)
fs_reg result = bld.vgrf(x.type, 1);
fs_reg one = bld.vgrf(x.type, 1);
- bld.MOV(one, retype(fs_reg(1), one.type));
+ bld.MOV(one, retype(brw_imm_d(1), one.type));
bld.SHL(result, one, x);
return result;
}
@@ -1285,7 +1335,7 @@ fs_visitor::emit_gs_end_primitive(const nir_src &vertex_count_nir_src)
/* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu));
+ abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu));
fs_reg mask = intexp2(abld, prev_count);
/* Note: we're relying on the fact that the GEN SHL instruction only pays
* attention to the lower 5 bits of its second source argument, so on this
@@ -1356,26 +1406,26 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
if (opcode != SHADER_OPCODE_URB_WRITE_SIMD8) {
fs_reg dword_index = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu));
+ abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu));
unsigned log2_bits_per_vertex =
_mesa_fls(gs_compile->control_data_bits_per_vertex);
- abld.SHR(dword_index, prev_count, fs_reg(6u - log2_bits_per_vertex));
+ abld.SHR(dword_index, prev_count, brw_imm_ud(6u - log2_bits_per_vertex));
if (per_slot_offset.file != BAD_FILE) {
/* Set the per-slot offset to dword_index / 4, so that we'll write to
* the appropriate OWord within the control data header.
*/
- abld.SHR(per_slot_offset, dword_index, fs_reg(2u));
+ abld.SHR(per_slot_offset, dword_index, brw_imm_ud(2u));
}
/* Set the channel masks to 1 << (dword_index % 4), so that we'll
* write to the appropriate DWORD within the OWORD.
*/
fs_reg channel = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- fwa_bld.AND(channel, dword_index, fs_reg(3u));
+ fwa_bld.AND(channel, dword_index, brw_imm_ud(3u));
channel_mask = intexp2(fwa_bld, channel);
/* Then the channel masks need to be in bits 23:16. */
- fwa_bld.SHL(channel_mask, channel_mask, fs_reg(16u));
+ fwa_bld.SHL(channel_mask, channel_mask, brw_imm_ud(16u));
}
/* Store the control data bits in the message payload and send it. */
@@ -1435,11 +1485,11 @@ fs_visitor::set_gs_stream_control_data_bits(const fs_reg &vertex_count,
/* reg::sid = stream_id */
fs_reg sid = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- abld.MOV(sid, fs_reg(stream_id));
+ abld.MOV(sid, brw_imm_ud(stream_id));
/* reg:shift_count = 2 * (vertex_count - 1) */
fs_reg shift_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- abld.SHL(shift_count, vertex_count, fs_reg(1u));
+ abld.SHL(shift_count, vertex_count, brw_imm_ud(1u));
/* Note: we're relying on the fact that the GEN SHL instruction only pays
* attention to the lower 5 bits of its second source argument, so on this
@@ -1510,14 +1560,14 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
*/
fs_inst *inst =
abld.AND(bld.null_reg_d(), vertex_count,
- fs_reg(32u / gs_compile->control_data_bits_per_vertex - 1u));
+ brw_imm_ud(32u / gs_compile->control_data_bits_per_vertex - 1u));
inst->conditional_mod = BRW_CONDITIONAL_Z;
abld.IF(BRW_PREDICATE_NORMAL);
/* If vertex_count is 0, then no control data bits have been
* accumulated yet, so we can skip emitting them.
*/
- abld.CMP(bld.null_reg_d(), vertex_count, fs_reg(0u),
+ abld.CMP(bld.null_reg_d(), vertex_count, brw_imm_ud(0u),
BRW_CONDITIONAL_NEQ);
abld.IF(BRW_PREDICATE_NORMAL);
emit_gs_control_data_bits(vertex_count);
@@ -1530,7 +1580,7 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
* effect of any call to EndPrimitive() that the shader may have
* made before outputting its first vertex.
*/
- inst = abld.MOV(this->control_data_bits, fs_reg(0u));
+ inst = abld.MOV(this->control_data_bits, brw_imm_ud(0u));
inst->force_writemask_all = true;
abld.emit(BRW_OPCODE_ENDIF);
}
@@ -1551,42 +1601,113 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
void
fs_visitor::emit_gs_input_load(const fs_reg &dst,
const nir_src &vertex_src,
- unsigned input_offset,
+ const fs_reg &indirect_offset,
+ unsigned imm_offset,
unsigned num_components)
{
- const brw_vue_prog_data *vue_prog_data = (const brw_vue_prog_data *) prog_data;
- const unsigned vertex = nir_src_as_const_value(vertex_src)->u[0];
+ struct brw_gs_prog_data *gs_prog_data = (struct brw_gs_prog_data *) prog_data;
- const unsigned array_stride = vue_prog_data->urb_read_length * 8;
+ /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y],
+ * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. Only
+ * gl_PointSize is available as a GS input, however, so it must be that.
+ */
+ const bool is_point_size =
+ indirect_offset.file == BAD_FILE && imm_offset == 0;
+
+ nir_const_value *vertex_const = nir_src_as_const_value(vertex_src);
+ const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
+
+ if (indirect_offset.file == BAD_FILE && vertex_const != NULL &&
+ 4 * imm_offset < push_reg_count) {
+ imm_offset = 4 * imm_offset + vertex_const->u[0] * push_reg_count;
+ /* This input was pushed into registers. */
+ if (is_point_size) {
+ /* gl_PointSize comes in .w */
+ bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type));
+ } else {
+ for (unsigned i = 0; i < num_components; i++) {
+ bld.MOV(offset(dst, bld, i),
+ fs_reg(ATTR, imm_offset + i, dst.type));
+ }
+ }
+ } else {
+ /* Resort to the pull model. Ensure the VUE handles are provided. */
+ gs_prog_data->base.include_vue_handles = true;
- const bool pushed = 4 * input_offset < array_stride;
+ unsigned first_icp_handle = gs_prog_data->include_primitive_id ? 3 : 2;
+ fs_reg icp_handle;
- if (input_offset == 0) {
- /* This is the VUE header, containing VARYING_SLOT_LAYER [.y],
- * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w].
- * Only gl_PointSize is available as a GS input, so they must
- * be asking for that input.
- */
- if (pushed) {
- bld.MOV(dst, fs_reg(ATTR, array_stride * vertex + 3, dst.type));
+ if (vertex_const) {
+ /* The vertex index is constant; just select the proper URB handle. */
+ icp_handle =
+ retype(brw_vec8_grf(first_icp_handle + vertex_const->i[0], 0),
+ BRW_REGISTER_TYPE_UD);
} else {
- fs_reg tmp = bld.vgrf(dst.type, 4);
- fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
- fs_reg(vertex), fs_reg(0));
- inst->regs_written = 4;
- bld.MOV(dst, offset(tmp, bld, 3));
+ /* The vertex index is non-constant. We need to use indirect
+ * addressing to fetch the proper URB handle.
+ *
+ * First, we start with the sequence <7, 6, 5, 4, 3, 2, 1, 0>
+ * indicating that channel <n> should read the handle from
+ * DWord <n>. We convert that to bytes by multiplying by 4.
+ *
+ * Next, we convert the vertex index to bytes by multiplying
+ * by 32 (shifting by 5), and add the two together. This is
+ * the final indirect byte offset.
+ */
+ fs_reg sequence = bld.vgrf(BRW_REGISTER_TYPE_W, 1);
+ fs_reg channel_offsets = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+
+ /* sequence = <7, 6, 5, 4, 3, 2, 1, 0> */
+ bld.MOV(sequence, fs_reg(brw_imm_v(0x76543210)));
+ /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */
+ bld.SHL(channel_offsets, sequence, brw_imm_ud(2u));
+ /* Convert vertex_index to bytes (multiply by 32) */
+ bld.SHL(vertex_offset_bytes,
+ retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(5u));
+ bld.ADD(icp_offset_bytes, vertex_offset_bytes, channel_offsets);
+
+ /* Use first_icp_handle as the base offset. There is one register
+ * of URB handles per vertex, so inform the register allocator that
+ * we might read up to nir->info.gs.vertices_in registers.
+ */
+ bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
+ fs_reg(brw_vec8_grf(first_icp_handle, 0)),
+ fs_reg(icp_offset_bytes),
+ brw_imm_ud(nir->info.gs.vertices_in * REG_SIZE));
}
- } else {
- if (pushed) {
- int index = vertex * array_stride + 4 * input_offset;
- for (unsigned i = 0; i < num_components; i++) {
- bld.MOV(offset(dst, bld, i), fs_reg(ATTR, index + i, dst.type));
- }
+
+ fs_inst *inst;
+ if (indirect_offset.file == BAD_FILE) {
+ /* Constant indexing - use global offset. */
+ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
+ inst->offset = imm_offset;
+ inst->base_mrf = -1;
+ inst->mlen = 1;
+ inst->regs_written = num_components;
} else {
- fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
- fs_reg(vertex), fs_reg(input_offset));
+ /* Indirect indexing - use per-slot offsets as well. */
+ const fs_reg srcs[] = { icp_handle, indirect_offset };
+ fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
+
+ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload);
+ inst->offset = imm_offset;
+ inst->base_mrf = -1;
+ inst->mlen = 2;
inst->regs_written = num_components;
}
+
+ if (is_point_size) {
+ /* Read the whole VUE header (because of alignment) and read .w. */
+ fs_reg tmp = bld.vgrf(dst.type, 4);
+ inst->dst = tmp;
+ inst->regs_written = 4;
+ bld.MOV(dst, offset(tmp, bld, 3));
+ }
}
}
@@ -1626,6 +1747,7 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
{
assert(stage == MESA_SHADER_GEOMETRY);
+ fs_reg indirect_offset;
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
@@ -1644,9 +1766,11 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
unreachable("load_input intrinsics are invalid for the GS stage");
case nir_intrinsic_load_per_vertex_input_indirect:
- assert(!"Not allowed");
+ indirect_offset = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_D);
+ /* fallthrough */
case nir_intrinsic_load_per_vertex_input:
- emit_gs_input_load(dest, instr->src[0], instr->const_index[0],
+ emit_gs_input_load(dest, instr->src[0],
+ indirect_offset, instr->const_index[0],
instr->num_components);
break;
@@ -1703,6 +1827,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
break;
}
+ case nir_intrinsic_load_helper_invocation:
case nir_intrinsic_load_sample_mask_in:
case nir_intrinsic_load_sample_id: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
@@ -1723,7 +1848,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
fs_inst *cmp;
if (instr->intrinsic == nir_intrinsic_discard_if) {
cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]),
- fs_reg(0), BRW_CONDITIONAL_Z);
+ brw_imm_d(0), BRW_CONDITIONAL_Z);
} else {
fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
BRW_REGISTER_TYPE_UW));
@@ -1771,7 +1896,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
FS_OPCODE_INTERPOLATE_AT_CENTROID,
dst_xy,
fs_reg(), /* src */
- fs_reg(0u),
+ brw_imm_ud(0u),
interpolation);
break;
@@ -1785,7 +1910,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dst_xy,
fs_reg(), /* src */
- fs_reg(msg_data),
+ brw_imm_ud(msg_data),
interpolation);
} else {
const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
@@ -1794,7 +1919,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
if (nir_src_is_dynamically_uniform(instr->src[0])) {
const fs_reg sample_id = bld.emit_uniformize(sample_src);
const fs_reg msg_data = vgrf(glsl_type::uint_type);
- bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+ bld.exec_all().group(1, 0)
+ .SHL(msg_data, sample_id, brw_imm_ud(4u));
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dst_xy,
@@ -1820,7 +1946,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
sample_src, sample_id,
BRW_CONDITIONAL_EQ);
const fs_reg msg_data = vgrf(glsl_type::uint_type);
- bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+ bld.exec_all().group(1, 0)
+ .SHL(msg_data, sample_id, brw_imm_ud(4u));
fs_inst *inst =
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
@@ -1851,7 +1978,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
dst_xy,
fs_reg(), /* src */
- fs_reg(off_x | (off_y << 4)),
+ brw_imm_ud(off_x | (off_y << 4)),
interpolation);
} else {
fs_reg src = vgrf(glsl_type::ivec2_type);
@@ -1859,7 +1986,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
BRW_REGISTER_TYPE_F);
for (int i = 0; i < 2; i++) {
fs_reg temp = vgrf(glsl_type::float_type);
- bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
+ bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f));
fs_reg itemp = vgrf(glsl_type::int_type);
bld.MOV(itemp, temp); /* float to int */
@@ -1879,7 +2006,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
* FRAGMENT_INTERPOLATION_OFFSET_BITS"
*/
set_condmod(BRW_CONDITIONAL_L,
- bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
+ bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7)));
}
const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
@@ -1887,7 +2014,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
opcode,
dst_xy,
src,
- fs_reg(0u),
+ brw_imm_ud(0u),
interpolation);
}
break;
@@ -1947,14 +2074,14 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
cs_prog_data->uses_num_work_groups = true;
- fs_reg surf_index = fs_reg(surface);
+ fs_reg surf_index = brw_imm_ud(surface);
brw_mark_surface_used(prog_data, surface);
/* Read the 3 GLuint components of gl_NumWorkGroups */
for (unsigned i = 0; i < 3; i++) {
fs_reg read_result =
emit_untyped_read(bld, surf_index,
- fs_reg(i << 2),
+ brw_imm_ud(i << 2),
1 /* dims */, 1 /* size */,
BRW_PREDICATE_NONE);
read_result.type = dest.type;
@@ -1994,16 +2121,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
/* Emit a surface read or atomic op. */
switch (instr->intrinsic) {
case nir_intrinsic_atomic_counter_read:
- tmp = emit_untyped_read(bld, fs_reg(surface), offset, 1, 1);
+ tmp = emit_untyped_read(bld, brw_imm_ud(surface), offset, 1, 1);
break;
case nir_intrinsic_atomic_counter_inc:
- tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(),
+ tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, fs_reg(),
fs_reg(), 1, 1, BRW_AOP_INC);
break;
case nir_intrinsic_atomic_counter_dec:
- tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(),
+ tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, fs_reg(),
fs_reg(), 1, 1, BRW_AOP_PREDEC);
break;
@@ -2145,14 +2272,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
for (unsigned c = 0; c < info->dest_components; ++c) {
if ((int)c >= type->coordinate_components()) {
bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
- fs_reg(1));
+ brw_imm_d(1));
} else if (c == 1 && is_1d_array_image) {
bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
offset(size, bld, 2));
} else if (c == 2 && is_cube_array_image) {
bld.emit(SHADER_OPCODE_INT_QUOTIENT,
offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
- offset(size, bld, c), fs_reg(6));
+ offset(size, bld, c), brw_imm_d(6));
} else {
bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
offset(size, bld, c));
@@ -2164,7 +2291,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_image_samples:
/* The driver does not support multi-sampled images. */
- bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1));
break;
case nir_intrinsic_load_uniform_indirect:
@@ -2195,7 +2322,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (const_index) {
const unsigned index = stage_prog_data->binding_table.ubo_start +
const_index->u[0];
- surf_index = fs_reg(index);
+ surf_index = brw_imm_ud(index);
brw_mark_surface_used(prog_data, index);
} else {
/* The block index is not a constant. Evaluate the index expression
@@ -2204,7 +2331,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
*/
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[0]),
- fs_reg(stage_prog_data->binding_table.ubo_start));
+ brw_imm_ud(stage_prog_data->binding_table.ubo_start));
surf_index = bld.emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
@@ -2220,7 +2347,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg base_offset = vgrf(glsl_type::int_type);
bld.SHR(base_offset, retype(get_nir_src(instr->src[1]),
BRW_REGISTER_TYPE_D),
- fs_reg(2));
+ brw_imm_d(2));
unsigned vec4_offset = instr->const_index[0] / 4;
for (int i = 0; i < instr->num_components; i++)
@@ -2230,7 +2357,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg packed_consts = vgrf(glsl_type::float_type);
packed_consts.type = dest.type;
- fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
+ struct brw_reg const_offset_reg = brw_imm_ud(instr->const_index[0] & ~15);
bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
surf_index, const_offset_reg);
@@ -2262,12 +2389,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (const_uniform_block) {
unsigned index = stage_prog_data->binding_table.ssbo_start +
const_uniform_block->u[0];
- surf_index = fs_reg(index);
+ surf_index = brw_imm_ud(index);
brw_mark_surface_used(prog_data, index);
} else {
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[0]),
- fs_reg(stage_prog_data->binding_table.ssbo_start));
+ brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
@@ -2282,7 +2409,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (has_indirect) {
offset_reg = get_nir_src(instr->src[1]);
} else {
- offset_reg = fs_reg(instr->const_index[0]);
+ offset_reg = brw_imm_ud(instr->const_index[0]);
}
/* Read the vector */
@@ -2333,12 +2460,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (const_uniform_block) {
unsigned index = stage_prog_data->binding_table.ssbo_start +
const_uniform_block->u[0];
- surf_index = fs_reg(index);
+ surf_index = brw_imm_ud(index);
brw_mark_surface_used(prog_data, index);
} else {
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[1]),
- fs_reg(stage_prog_data->binding_table.ssbo_start));
+ brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ssbo_start +
@@ -2362,12 +2489,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg offset_reg;
if (!has_indirect) {
- offset_reg = fs_reg(instr->const_index[0] + 4 * first_component);
+ offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component);
} else {
offset_reg = vgrf(glsl_type::uint_type);
bld.ADD(offset_reg,
retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD),
- fs_reg(4 * first_component));
+ brw_imm_ud(4 * first_component));
}
emit_untyped_write(bld, surf_index, offset_reg,
@@ -2438,7 +2565,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
int reg_width = dispatch_width / 8;
/* Set LOD = 0 */
- fs_reg source = fs_reg(0);
+ fs_reg source = brw_imm_d(0);
int mlen = 1 * reg_width;
@@ -2457,7 +2584,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
BRW_REGISTER_TYPE_UD);
const unsigned index = prog_data->binding_table.ssbo_start + ssbo_index;
fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, buffer_size,
- src_payload, fs_reg(index));
+ src_payload, brw_imm_ud(index));
inst->header_size = 0;
inst->mlen = mlen;
inst->regs_written = regs_written;
@@ -2486,12 +2613,12 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
if (const_surface) {
unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
const_surface->u[0];
- surface = fs_reg(surf_index);
+ surface = brw_imm_ud(surf_index);
brw_mark_surface_used(prog_data, surf_index);
} else {
surface = vgrf(glsl_type::uint_type);
bld.ADD(surface, get_nir_src(instr->src[0]),
- fs_reg(stage_prog_data->binding_table.ssbo_start));
+ brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
/* Assume this may touch any SSBO. This is the same we do for other
* UBO/SSBO accesses with non-constant surface.
@@ -2524,13 +2651,11 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
{
unsigned texture = instr->texture_index;
unsigned sampler = instr->sampler_index;
- fs_reg texture_reg(texture);
- fs_reg sampler_reg(sampler);
+ fs_reg texture_reg(brw_imm_ud(texture));
+ fs_reg sampler_reg(brw_imm_ud(sampler));
int gather_component = instr->component;
- bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
-
bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
instr->is_array;
@@ -2552,6 +2677,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
switch (instr->op) {
case nir_texop_txf:
case nir_texop_txf_ms:
+ case nir_texop_samples_identical:
coordinate = retype(src, BRW_REGISTER_TYPE_D);
break;
default:
@@ -2604,7 +2730,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
/* Emit code to evaluate the actual indexing expression */
texture_reg = vgrf(glsl_type::uint_type);
- bld.ADD(texture_reg, src, fs_reg(texture));
+ bld.ADD(texture_reg, src, brw_imm_ud(texture));
texture_reg = bld.emit_uniformize(texture_reg);
break;
}
@@ -2612,7 +2738,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
case nir_tex_src_sampler_offset: {
/* Emit code to evaluate the actual indexing expression */
sampler_reg = vgrf(glsl_type::uint_type);
- bld.ADD(sampler_reg, src, fs_reg(sampler));
+ bld.ADD(sampler_reg, src, brw_imm_ud(sampler));
sampler_reg = bld.emit_uniformize(sampler_reg);
break;
}
@@ -2622,19 +2748,20 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
}
}
- if (instr->op == nir_texop_txf_ms) {
+ if (instr->op == nir_texop_txf_ms ||
+ instr->op == nir_texop_samples_identical) {
if (devinfo->gen >= 7 &&
key_tex->compressed_multisample_layout_mask & (1 << texture)) {
mcs = emit_mcs_fetch(coordinate, instr->coord_components, texture_reg);
} else {
- mcs = fs_reg(0u);
+ mcs = brw_imm_ud(0u);
}
}
for (unsigned i = 0; i < 3; i++) {
if (instr->const_offset[i] != 0) {
assert(offset_components == 0);
- tex_offset = fs_reg(brw_texture_offset(instr->const_offset, 3));
+ tex_offset = brw_imm_ud(brw_texture_offset(instr->const_offset, 3));
break;
}
}
@@ -2668,6 +2795,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
inst->base_mrf = -1;
return;
}
+ case nir_texop_samples_identical: op = ir_samples_identical; break;
default:
unreachable("unknown texture opcode");
}
@@ -2675,8 +2803,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
emit_texture(op, dest_type, coordinate, instr->coord_components,
shadow_comparitor, lod, lod2, lod_components, sample_index,
tex_offset, mcs, gather_component,
- is_cube_array, is_rect,
- texture, texture_reg, sampler, sampler_reg);
+ is_cube_array, texture, texture_reg, sampler, sampler_reg);
fs_reg dest = get_nir_dest(instr->dest);
dest.type = this->result.type;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index 534d8490cdf..45694ec0894 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -44,7 +44,7 @@ namespace brw {
*/
const fs_reg usurface = bld.emit_uniformize(surface);
const fs_reg srcs[] = {
- addr, src, usurface, fs_reg(dims), fs_reg(arg)
+ addr, src, usurface, brw_imm_ud(dims), brw_imm_ud(arg)
};
const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize);
fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
@@ -330,7 +330,7 @@ namespace {
* messages causes a hang on IVB and VLV.
*/
set_predicate(pred,
- bld.CMP(bld.null_reg_ud(), stride, fs_reg(4),
+ bld.CMP(bld.null_reg_ud(), stride, brw_imm_d(4),
BRW_CONDITIONAL_G));
return BRW_PREDICATE_NORMAL;
@@ -361,7 +361,7 @@ namespace {
*/
bld.CMP(bld.null_reg_ud(),
retype(size, BRW_REGISTER_TYPE_UD),
- fs_reg(0), BRW_CONDITIONAL_NZ);
+ brw_imm_d(0), BRW_CONDITIONAL_NZ);
return BRW_PREDICATE_NORMAL;
} else {
@@ -438,7 +438,7 @@ namespace {
* FINISHME: Factor out this frequently recurring pattern into a
* helper function.
*/
- const fs_reg srcs[] = { addr, fs_reg(0), offset(addr, bld, 1) };
+ const fs_reg srcs[] = { addr, brw_imm_d(0), offset(addr, bld, 1) };
const fs_reg dst = bld.vgrf(addr.type, dims);
bld.LOAD_PAYLOAD(dst, srcs, dims, 0);
return dst;
@@ -488,7 +488,7 @@ namespace {
bld.ADD(offset(addr, bld, c), offset(off, bld, c),
(c < dims ?
offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) :
- fs_reg(0)));
+ fs_reg(brw_imm_d(0))));
/* The layout of 3-D textures in memory is sort-of like a tiling
* format. At each miplevel, the slices are arranged in rows of
@@ -515,7 +515,7 @@ namespace {
/* Decompose z into a major (tmp.y) and a minor (tmp.x)
* index.
*/
- bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), fs_reg(0),
+ bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), brw_imm_d(0),
offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2));
bld.SHR(offset(tmp, bld, 1),
offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2),
@@ -549,7 +549,7 @@ namespace {
for (unsigned c = 0; c < 2; ++c) {
/* Calculate the minor x and y indices. */
bld.BFE(offset(minor, bld, c), offset(tile, bld, c),
- fs_reg(0), offset(addr, bld, c));
+ brw_imm_d(0), offset(addr, bld, c));
/* Calculate the major x and y indices. */
bld.SHR(offset(major, bld, c),
@@ -595,7 +595,7 @@ namespace {
/* XOR tmp.x and tmp.y with bit 6 of the memory address. */
bld.XOR(tmp, tmp, offset(tmp, bld, 1));
- bld.AND(tmp, tmp, fs_reg(1 << 6));
+ bld.AND(tmp, tmp, brw_imm_d(1 << 6));
bld.XOR(dst, dst, tmp);
}
@@ -647,7 +647,7 @@ namespace {
const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
/* Shift each component left to the correct bitfield position. */
- bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32));
+ bld.SHL(tmp, offset(src, bld, c), brw_imm_ud(shifts[c] % 32));
/* Add everything up. */
if (seen[shifts[c] / 32]) {
@@ -679,13 +679,13 @@ namespace {
/* Shift left to discard the most significant bits. */
bld.SHL(offset(dst, bld, c),
offset(src, bld, shifts[c] / 32),
- fs_reg(32 - shifts[c] % 32 - widths[c]));
+ brw_imm_ud(32 - shifts[c] % 32 - widths[c]));
/* Shift back to the least significant bits using an arithmetic
* shift to get sign extension on signed types.
*/
bld.ASR(offset(dst, bld, c),
- offset(dst, bld, c), fs_reg(32 - widths[c]));
+ offset(dst, bld, c), brw_imm_ud(32 - widths[c]));
}
}
@@ -709,13 +709,13 @@ namespace {
if (widths[c]) {
/* Clamp to the maximum value. */
bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c),
- fs_reg((int)scale(widths[c] - s)),
+ brw_imm_d((int)scale(widths[c] - s)),
BRW_CONDITIONAL_L);
/* Clamp to the minimum value. */
if (is_signed)
bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
- fs_reg(-(int)scale(widths[c] - s) - 1),
+ brw_imm_d(-(int)scale(widths[c] - s) - 1),
BRW_CONDITIONAL_GE);
}
}
@@ -741,12 +741,12 @@ namespace {
/* Divide by the normalization constants. */
bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
- fs_reg(1.0f / scale(widths[c] - s)));
+ brw_imm_f(1.0f / scale(widths[c] - s)));
/* Clamp to the minimum value. */
if (is_signed)
bld.emit_minmax(offset(dst, bld, c),
- offset(dst, bld, c), fs_reg(-1.0f),
+ offset(dst, bld, c), brw_imm_f(-1.0f),
BRW_CONDITIONAL_GE);
}
}
@@ -771,10 +771,10 @@ namespace {
/* Clamp the normalized floating-point argument. */
if (is_signed) {
bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c),
- fs_reg(-1.0f), BRW_CONDITIONAL_GE);
+ brw_imm_f(-1.0f), BRW_CONDITIONAL_GE);
bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
- fs_reg(1.0f), BRW_CONDITIONAL_L);
+ brw_imm_f(1.0f), BRW_CONDITIONAL_L);
} else {
set_saturate(true, bld.MOV(offset(fdst, bld, c),
offset(src, bld, c)));
@@ -782,7 +782,7 @@ namespace {
/* Multiply by the normalization constants. */
bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
- fs_reg((float)scale(widths[c] - s)));
+ brw_imm_f((float)scale(widths[c] - s)));
/* Convert to integer. */
bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
@@ -814,7 +814,7 @@ namespace {
*/
if (widths[c] < 16)
bld.SHL(offset(dst, bld, c),
- offset(dst, bld, c), fs_reg(15 - widths[c]));
+ offset(dst, bld, c), brw_imm_ud(15 - widths[c]));
/* Convert to 32-bit floating point. */
bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
@@ -842,7 +842,7 @@ namespace {
/* Clamp to the minimum value. */
if (widths[c] < 16)
bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
- fs_reg(0.0f), BRW_CONDITIONAL_GE);
+ brw_imm_f(0.0f), BRW_CONDITIONAL_GE);
/* Convert to 16-bit floating-point. */
bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
@@ -855,7 +855,7 @@ namespace {
*/
if (widths[c] < 16)
bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
- fs_reg(15 - widths[c]));
+ brw_imm_ud(15 - widths[c]));
}
}
@@ -874,7 +874,8 @@ namespace {
for (unsigned c = 0; c < 4; ++c)
bld.MOV(offset(dst, bld, c),
- widths[c] ? offset(src, bld, c) : fs_reg(pad[c]));
+ widths[c] ? offset(src, bld, c)
+ : fs_reg(brw_imm_ud(pad[c])));
return dst;
}
@@ -939,7 +940,7 @@ namespace brw {
/* An out of bounds surface access should give zero as result. */
for (unsigned c = 0; c < size; ++c)
set_predicate(pred, bld.SEL(offset(tmp, bld, c),
- offset(tmp, bld, c), fs_reg(0)));
+ offset(tmp, bld, c), brw_imm_d(0)));
}
/* Set the register type to D instead of UD if the data type is
@@ -1122,7 +1123,7 @@ namespace brw {
/* An unbound surface access should give zero as result. */
if (rsize)
- set_predicate(pred, bld.SEL(tmp, tmp, fs_reg(0)));
+ set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0)));
return tmp;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2647a40c730..e82acd141f3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -79,122 +79,6 @@ fs_visitor::emit_vs_system_value(int location)
return reg;
}
-fs_reg
-fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
- bool is_rect, uint32_t sampler)
-{
- bool needs_gl_clamp = true;
- fs_reg scale_x, scale_y;
-
- /* The 965 requires the EU to do the normalization of GL rectangle
- * texture coordinates. We use the program parameter state
- * tracking to get the scaling factor.
- */
- if (is_rect &&
- (devinfo->gen < 6 ||
- (devinfo->gen >= 6 && (key_tex->gl_clamp_mask[0] & (1 << sampler) ||
- key_tex->gl_clamp_mask[1] & (1 << sampler))))) {
- struct gl_program_parameter_list *params = prog->Parameters;
-
-
- /* FINISHME: We're failing to recompile our programs when the sampler is
- * updated. This only matters for the texture rectangle scale
- * parameters (pre-gen6, or gen6+ with GL_CLAMP).
- */
- int tokens[STATE_LENGTH] = {
- STATE_INTERNAL,
- STATE_TEXRECT_SCALE,
- prog->SamplerUnits[sampler],
- 0,
- 0
- };
-
- no16("rectangle scale uniform setup not supported on SIMD16\n");
- if (dispatch_width == 16) {
- return coordinate;
- }
-
- GLuint index = _mesa_add_state_reference(params,
- (gl_state_index *)tokens);
- /* Try to find existing copies of the texrect scale uniforms. */
- for (unsigned i = 0; i < uniforms; i++) {
- if (stage_prog_data->param[i] ==
- &prog->Parameters->ParameterValues[index][0]) {
- scale_x = fs_reg(UNIFORM, i);
- scale_y = fs_reg(UNIFORM, i + 1);
- break;
- }
- }
-
- /* If we didn't already set them up, do so now. */
- if (scale_x.file == BAD_FILE) {
- scale_x = fs_reg(UNIFORM, uniforms);
- scale_y = fs_reg(UNIFORM, uniforms + 1);
-
- stage_prog_data->param[uniforms++] =
- &prog->Parameters->ParameterValues[index][0];
- stage_prog_data->param[uniforms++] =
- &prog->Parameters->ParameterValues[index][1];
- }
- }
-
- /* The 965 requires the EU to do the normalization of GL rectangle
- * texture coordinates. We use the program parameter state
- * tracking to get the scaling factor.
- */
- if (devinfo->gen < 6 && is_rect) {
- fs_reg dst = fs_reg(VGRF, alloc.allocate(coord_components));
- fs_reg src = coordinate;
- coordinate = dst;
-
- bld.MUL(dst, src, scale_x);
- dst = offset(dst, bld, 1);
- src = offset(src, bld, 1);
- bld.MUL(dst, src, scale_y);
- } else if (is_rect) {
- /* On gen6+, the sampler handles the rectangle coordinates
- * natively, without needing rescaling. But that means we have
- * to do GL_CLAMP clamping at the [0, width], [0, height] scale,
- * not [0, 1] like the default case below.
- */
- needs_gl_clamp = false;
-
- for (int i = 0; i < 2; i++) {
- if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
- fs_reg chan = coordinate;
- chan = offset(chan, bld, i);
-
- set_condmod(BRW_CONDITIONAL_GE,
- bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f)));
-
- /* Our parameter comes in as 1.0/width or 1.0/height,
- * because that's what people normally want for doing
- * texture rectangle handling. We need width or height
- * for clamping, but we don't care enough to make a new
- * parameter type, so just invert back.
- */
- fs_reg limit = vgrf(glsl_type::float_type);
- bld.MOV(limit, i == 0 ? scale_x : scale_y);
- bld.emit(SHADER_OPCODE_RCP, limit, limit);
-
- set_condmod(BRW_CONDITIONAL_L,
- bld.emit(BRW_OPCODE_SEL, chan, chan, limit));
- }
- }
- }
-
- if (coord_components > 0 && needs_gl_clamp) {
- for (int i = 0; i < MIN2(coord_components, 3); i++) {
- if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
- fs_reg chan = coordinate;
- chan = offset(chan, bld, i);
- set_saturate(true, bld.MOV(chan, chan));
- }
- }
- }
- return coordinate;
-}
-
/* Sample from the MCS surface attached to this multisample texture. */
fs_reg
fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
@@ -203,7 +87,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
const fs_reg dest = vgrf(glsl_type::uvec4_type);
const fs_reg srcs[] = {
coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(),
- texture, texture, fs_reg(), fs_reg(components), fs_reg(0)
+ texture, texture, fs_reg(), brw_imm_ud(components), brw_imm_d(0)
};
fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
ARRAY_SIZE(srcs));
@@ -227,7 +111,6 @@ fs_visitor::emit_texture(ir_texture_opcode op,
fs_reg mcs,
int gather_component,
bool is_cube_array,
- bool is_rect,
uint32_t surface,
fs_reg surface_reg,
uint32_t sampler,
@@ -235,38 +118,32 @@ fs_visitor::emit_texture(ir_texture_opcode op,
{
fs_inst *inst = NULL;
- if (op == ir_tg4) {
- /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
- * emitting anything other than setting up the constant result.
- */
- int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
- if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
-
- fs_reg res = vgrf(glsl_type::vec4_type);
- this->result = res;
-
- for (int i=0; i<4; i++) {
- bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f));
- res = offset(res, bld, 1);
- }
- return;
- }
- }
-
if (op == ir_query_levels) {
/* textureQueryLevels() is implemented in terms of TXS so we need to
* pass a valid LOD argument.
*/
assert(lod.file == BAD_FILE);
- lod = fs_reg(0u);
+ lod = brw_imm_ud(0u);
}
- if (coordinate.file != BAD_FILE) {
- /* FINISHME: Texture coordinate rescaling doesn't work with non-constant
- * samplers. This should only be a problem with GL_CLAMP on Gen7.
+ if (op == ir_samples_identical) {
+ fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 1));
+
+ /* If mcs is an immediate value, it means there is no MCS. In that case
+ * just return false.
*/
- coordinate = rescale_texcoord(coordinate, coord_components, is_rect,
- sampler);
+ if (mcs.file == BRW_IMMEDIATE_VALUE) {
+ bld.MOV(dst, brw_imm_ud(0u));
+ } else if ((key_tex->msaa_16 & (1 << sampler))) {
+ fs_reg tmp = vgrf(glsl_type::uint_type);
+ bld.OR(tmp, mcs, offset(mcs, bld, 1));
+ bld.CMP(dst, tmp, brw_imm_ud(0u), BRW_CONDITIONAL_EQ);
+ } else {
+ bld.CMP(dst, mcs, brw_imm_ud(0u), BRW_CONDITIONAL_EQ);
+ }
+
+ this->result = dst;
+ return;
}
/* Writemasking doesn't eliminate channels on SIMD8 texture
@@ -276,7 +153,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
const fs_reg srcs[] = {
coordinate, shadow_c, lod, lod2,
sample_index, mcs, surface_reg, sampler_reg, offset_value,
- fs_reg(coord_components), fs_reg(grad_components)
+ brw_imm_d(coord_components), brw_imm_d(grad_components)
};
enum opcode opcode;
@@ -327,8 +204,15 @@ fs_visitor::emit_texture(ir_texture_opcode op,
inst->offset = offset_value.ud;
if (op == ir_tg4) {
- inst->offset |=
- gather_channel(gather_component, surface, sampler) << 16; /* M0.2:16-17 */
+ if (gather_component == 1 &&
+ key_tex->gather_channel_quirk_mask & (1 << surface)) {
+ /* gather4 sampler is broken for green channel on RG32F --
+ * we must ask for blue instead.
+ */
+ inst->offset |= 2 << 16;
+ } else {
+ inst->offset |= gather_component << 16;
+ }
if (devinfo->gen == 6)
emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], dst);
@@ -338,7 +222,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
if (op == ir_txs && is_cube_array) {
fs_reg depth = offset(dst, bld, 2);
fs_reg fixed_depth = vgrf(glsl_type::int_type);
- bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
+ bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, brw_imm_d(6));
fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written);
int components = inst->regs_written / (inst->exec_size / 8);
@@ -352,7 +236,12 @@ fs_visitor::emit_texture(ir_texture_opcode op,
bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0);
}
- swizzle_result(op, dest_type->vector_elements, dst, sampler);
+ if (op == ir_query_levels) {
+ /* # levels is in .w */
+ dst = offset(dst, bld, 3);
+ }
+
+ this->result = dst;
}
/**
@@ -369,7 +258,7 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
for (int i = 0; i < 4; i++) {
fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F);
/* Convert from UNORM to UINT */
- bld.MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1)));
+ bld.MUL(dst_f, dst_f, brw_imm_f((1 << width) - 1));
bld.MOV(dst, dst_f);
if (wa & WA_SIGN) {
@@ -377,83 +266,14 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
* shifting the sign bit into place, then shifting back
* preserving sign.
*/
- bld.SHL(dst, dst, fs_reg(32 - width));
- bld.ASR(dst, dst, fs_reg(32 - width));
+ bld.SHL(dst, dst, brw_imm_d(32 - width));
+ bld.ASR(dst, dst, brw_imm_d(32 - width));
}
dst = offset(dst, bld, 1);
}
}
-/**
- * Set up the gather channel based on the swizzle, for gather4.
- */
-uint32_t
-fs_visitor::gather_channel(int orig_chan, uint32_t surface, uint32_t sampler)
-{
- int swiz = GET_SWZ(key_tex->swizzles[sampler], orig_chan);
- switch (swiz) {
- case SWIZZLE_X: return 0;
- case SWIZZLE_Y:
- /* gather4 sampler is broken for green channel on RG32F --
- * we must ask for blue instead.
- */
- if (key_tex->gather_channel_quirk_mask & (1 << surface))
- return 2;
- return 1;
- case SWIZZLE_Z: return 2;
- case SWIZZLE_W: return 3;
- default:
- unreachable("Not reached"); /* zero, one swizzles handled already */
- }
-}
-
-/**
- * Swizzle the result of a texture result. This is necessary for
- * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
- */
-void
-fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
- fs_reg orig_val, uint32_t sampler)
-{
- if (op == ir_query_levels) {
- /* # levels is in .w */
- this->result = offset(orig_val, bld, 3);
- return;
- }
-
- this->result = orig_val;
-
- /* txs,lod don't actually sample the texture, so swizzling the result
- * makes no sense.
- */
- if (op == ir_txs || op == ir_lod || op == ir_tg4)
- return;
-
- if (dest_components == 1) {
- /* Ignore DEPTH_TEXTURE_MODE swizzling. */
- } else if (key_tex->swizzles[sampler] != SWIZZLE_NOOP) {
- fs_reg swizzled_result = vgrf(glsl_type::vec4_type);
- swizzled_result.type = orig_val.type;
-
- for (int i = 0; i < 4; i++) {
- int swiz = GET_SWZ(key_tex->swizzles[sampler], i);
- fs_reg l = swizzled_result;
- l = offset(l, bld, i);
-
- if (swiz == SWIZZLE_ZERO) {
- bld.MOV(l, fs_reg(0.0f));
- } else if (swiz == SWIZZLE_ONE) {
- bld.MOV(l, fs_reg(1.0f));
- } else {
- bld.MOV(l, offset(orig_val, bld,
- GET_SWZ(key_tex->swizzles[sampler], i)));
- }
- }
- this->result = swizzled_result;
- }
-}
-
/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
void
fs_visitor::emit_dummy_fs()
@@ -464,7 +284,7 @@ fs_visitor::emit_dummy_fs()
const float color[4] = { 1.0, 0.0, 1.0, 0.0 };
for (int i = 0; i < 4; i++) {
bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F),
- fs_reg(color[i]));
+ brw_imm_f(color[i]));
}
fs_inst *write;
@@ -683,7 +503,7 @@ fs_visitor::emit_alpha_test()
fs_reg color = offset(outputs[0], bld, 3);
/* f0.1 &= func(color, ref) */
- cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref),
+ cmp = abld.CMP(bld.null_reg_f(), color, brw_imm_f(key->alpha_test_ref),
cond_for_alpha_func(key->alpha_test_func));
}
cmp->predicate = BRW_PREDICATE_NORMAL;
@@ -716,7 +536,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
const fs_reg sources[] = {
color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
- sample_mask, fs_reg(components)
+ sample_mask, brw_imm_ud(components)
};
assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS);
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
@@ -950,12 +770,12 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
fs_reg offset;
if (gs_vertex_count.file == IMM) {
- per_slot_offsets = fs_reg(output_vertex_size_owords *
- gs_vertex_count.ud);
+ per_slot_offsets = brw_imm_ud(output_vertex_size_owords *
+ gs_vertex_count.ud);
} else {
per_slot_offsets = vgrf(glsl_type::int_type);
bld.MUL(per_slot_offsets, gs_vertex_count,
- fs_reg(output_vertex_size_owords));
+ brw_imm_ud(output_vertex_size_owords));
}
}
@@ -978,7 +798,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
}
fs_reg zero(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- bld.MOV(zero, fs_reg(0u));
+ bld.MOV(zero, brw_imm_ud(0u));
sources[length++] = zero;
if (vue_map->slots_valid & VARYING_BIT_LAYER)
@@ -1038,7 +858,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
for (unsigned i = 0; i < output_components[varying]; i++)
sources[length++] = offset(this->outputs[varying], bld, i);
for (unsigned i = output_components[varying]; i < 4; i++)
- sources[length++] = fs_reg(0);
+ sources[length++] = brw_imm_d(0);
}
break;
}
@@ -1115,11 +935,11 @@ fs_visitor::emit_barrier()
const fs_builder pbld = bld.exec_all().group(8, 0);
/* Clear the message payload */
- pbld.MOV(payload, fs_reg(0u));
+ pbld.MOV(payload, brw_imm_ud(0u));
/* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD));
- pbld.AND(component(payload, 2), r0_2, fs_reg(0x0f000000u));
+ pbld.AND(component(payload, 2), r0_2, brw_imm_ud(0x0f000000u));
/* Emit a gateway "barrier" message using the payload we set up, followed
* by a wait instruction.
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index ed0890f430f..149b43ba055 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -75,7 +75,9 @@ brw_codegen_gs_prog(struct brw_context *brw,
* every uniform is a float which gets padded to the size of a vec4.
*/
struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
- int param_count = gp->program.Base.nir->num_uniforms * 4;
+ int param_count = gp->program.Base.nir->num_uniforms;
+ if (!compiler->scalar_stage[MESA_SHADER_GEOMETRY])
+ param_count *= 4;
prog_data.base.base.param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
@@ -87,7 +89,8 @@ brw_codegen_gs_prog(struct brw_context *brw,
prog_data.base.base.nr_image_params = gs->NumImages;
brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
- &prog_data.base.base, compiler->scalar_gs);
+ &prog_data.base.base,
+ compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h
index 4ed95c473cd..cd9f6ef591d 100644
--- a/src/mesa/drivers/dri/i965/brw_inst.h
+++ b/src/mesa/drivers/dri/i965/brw_inst.h
@@ -694,7 +694,7 @@ brw_inst_bits(const brw_inst *inst, unsigned high, unsigned low)
high %= 64;
low %= 64;
- const uint64_t mask = (1ull << (high - low + 1)) - 1;
+ const uint64_t mask = (~0ull >> (64 - (high - low + 1)));
return (inst->data[word] >> low) & mask;
}
@@ -713,7 +713,7 @@ brw_inst_set_bits(brw_inst *inst, unsigned high, unsigned low, uint64_t value)
high %= 64;
low %= 64;
- const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low;
+ const uint64_t mask = (~0ull >> (64 - (high - low + 1))) << low;
/* Make sure the supplied value actually fits in the given bitfield. */
assert((value & (mask >> low)) == value);
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 7e977e9e727..0410053ce27 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -36,11 +36,6 @@ public:
void init();
fs_reg();
- explicit fs_reg(float f);
- explicit fs_reg(int32_t i);
- explicit fs_reg(uint32_t u);
- explicit fs_reg(uint8_t vf[4]);
- explicit fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3);
fs_reg(struct brw_reg reg);
fs_reg(enum brw_reg_file file, int nr);
fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 110e64b979e..e2e66044d3a 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -41,11 +41,6 @@ public:
src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
src_reg();
- src_reg(float f);
- src_reg(uint32_t u);
- src_reg(int32_t i);
- src_reg(uint8_t vf[4]);
- src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3);
src_reg(struct brw_reg reg);
bool equals(const src_reg &r) const;
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
index 29911732761..14421d421b6 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -66,12 +66,14 @@ brw_lower_packing_builtins(struct brw_context *brw,
gl_shader_stage shader_type,
exec_list *ir)
{
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
+
int ops = LOWER_PACK_SNORM_2x16
| LOWER_UNPACK_SNORM_2x16
| LOWER_PACK_UNORM_2x16
| LOWER_UNPACK_UNORM_2x16;
- if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
+ if (compiler->scalar_stage[shader_type]) {
ops |= LOWER_UNPACK_UNORM_4x8
| LOWER_UNPACK_SNORM_4x8
| LOWER_PACK_UNORM_4x8
@@ -84,7 +86,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
* lowering is needed. For SOA code, the Half2x16 ops must be
* scalarized.
*/
- if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
+ if (compiler->scalar_stage[shader_type]) {
ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
| LOWER_UNPACK_HALF_2x16_TO_SPLIT;
}
@@ -103,6 +105,7 @@ process_glsl_ir(gl_shader_stage stage,
struct gl_shader *shader)
{
struct gl_context *ctx = &brw->ctx;
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
const struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[shader->Stage];
@@ -161,7 +164,7 @@ process_glsl_ir(gl_shader_stage stage,
do {
progress = false;
- if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) {
+ if (compiler->scalar_stage[shader->Stage]) {
brw_do_channel_expressions(shader->ir);
brw_do_vector_splitting(shader->ir);
}
@@ -252,7 +255,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
brw_add_texrect_params(prog);
prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
- is_scalar_shader_stage(compiler, stage));
+ compiler->scalar_stage[stage]);
_mesa_reference_program(ctx, &prog, NULL);
}
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index 12e7c32e424..1f8bfdfa492 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
}
static void
-get_fast_clear_rect(struct gl_framebuffer *fb,
+get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
struct intel_renderbuffer *irb, struct rect *rect)
{
unsigned int x_align, y_align;
@@ -228,7 +228,14 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
*/
intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
x_align *= 16;
- y_align *= 32;
+
+ /* SKL+ line alignment requirement for Y-tiled are half those of the prior
+ * generations.
+ */
+ if (brw->gen >= 9)
+ y_align *= 16;
+ else
+ y_align *= 32;
/* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
* Target(s)", beneath the "Fast Color Clear" bullet (p327):
@@ -265,8 +272,10 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
* terms of (width,height) of the RT.
*
* MSAA Width of Clear Rect Height of Clear Rect
+ * 2X Ceil(1/8*width) Ceil(1/2*height)
* 4X Ceil(1/8*width) Ceil(1/2*height)
* 8X Ceil(1/2*width) Ceil(1/2*height)
+ * 16X width Ceil(1/2*height)
*
* The text "with upper left co-ordinate to coincide with actual
* rectangle being cleared" is a little confusing--it seems to imply
@@ -289,6 +298,9 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
case 8:
x_scaledown = 2;
break;
+ case 16:
+ x_scaledown = 1;
+ break;
default:
unreachable("Unexpected sample count for fast clear");
}
@@ -347,8 +359,12 @@ is_color_fast_clear_compatible(struct brw_context *brw,
}
for (int i = 0; i < 4; i++) {
- if (color->f[i] != 0.0f && color->f[i] != 1.0f &&
- _mesa_format_has_color_component(format, i)) {
+ if (!_mesa_format_has_color_component(format, i)) {
+ continue;
+ }
+
+ if (brw->gen < 9 &&
+ color->f[i] != 0.0f && color->f[i] != 1.0f) {
return false;
}
}
@@ -357,18 +373,55 @@ is_color_fast_clear_compatible(struct brw_context *brw,
/**
* Convert the given color to a bitfield suitable for ORing into DWORD 7 of
- * SURFACE_STATE.
+ * SURFACE_STATE (DWORD 12-15 on SKL+).
*/
-static uint32_t
-compute_fast_clear_color_bits(const union gl_color_union *color)
+static void
+set_fast_clear_color(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ const union gl_color_union *color)
{
- uint32_t bits = 0;
- for (int i = 0; i < 4; i++) {
- /* Testing for non-0 works for integer and float colors */
- if (color->f[i] != 0.0f)
- bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+ union gl_color_union override_color = *color;
+
+ /* The sampler doesn't look at the format of the surface when the fast
+ * clear color is used so we need to implement luminance, intensity and
+ * missing components manually.
+ */
+ switch (_mesa_get_format_base_format(mt->format)) {
+ case GL_INTENSITY:
+ override_color.ui[3] = override_color.ui[0];
+ /* flow through */
+ case GL_LUMINANCE:
+ case GL_LUMINANCE_ALPHA:
+ override_color.ui[1] = override_color.ui[0];
+ override_color.ui[2] = override_color.ui[0];
+ break;
+ default:
+ for (int i = 0; i < 3; i++) {
+ if (!_mesa_format_has_color_component(mt->format, i))
+ override_color.ui[i] = 0;
+ }
+ break;
+ }
+
+ if (!_mesa_format_has_color_component(mt->format, 3)) {
+ if (_mesa_is_format_integer_color(mt->format))
+ override_color.ui[3] = 1;
+ else
+ override_color.f[3] = 1.0f;
+ }
+
+ if (brw->gen >= 9) {
+ mt->gen9_fast_clear_color = override_color;
+ } else {
+ mt->fast_clear_color_value = 0;
+ for (int i = 0; i < 4; i++) {
+ /* Testing for non-0 works for integer and float colors */
+ if (override_color.f[i] != 0.0f) {
+ mt->fast_clear_color_value |=
+ 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+ }
+ }
}
- return bits;
}
static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 };
@@ -408,6 +461,55 @@ use_rectlist(struct brw_context *brw, bool enable)
brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
}
+/**
+ * Individually fast clear each color buffer attachment. On previous gens this
+ * isn't required. The motivation for this comes from one line (which seems to
+ * be specific to SKL+). The list item is in section titled _MCS Buffer for
+ * Render Target(s)_
+ *
+ * "Since only one RT is bound with a clear pass, only one RT can be cleared
+ * at a time. To clear multiple RTs, multiple clear passes are required."
+ *
+ * The code follows the same idea as the resolve code which creates a fake FBO
+ * to avoid interfering with too much of the GL state.
+ */
+static void
+fast_clear_attachments(struct brw_context *brw,
+ struct gl_framebuffer *fb,
+ uint32_t fast_clear_buffers,
+ struct rect fast_clear_rect)
+{
+ assert(brw->gen >= 9);
+ struct gl_context *ctx = &brw->ctx;
+
+ brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
+
+ /* SKL+ also has a resolve mode for compressed render targets and thus more
+ * bits to let us select the type of resolve. For fast clear resolves, it
+ * turns out we can use the same value as pre-SKL though.
+ */
+ set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE);
+
+ while (fast_clear_buffers) {
+ int index = ffs(fast_clear_buffers) - 1;
+
+ fast_clear_buffers &= ~(1 << index);
+
+ _mesa_meta_drawbuffers_from_bitfield(1 << index);
+
+ brw_draw_rectlist(ctx, &fast_clear_rect, MAX2(1, fb->MaxNumLayers));
+
+ /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
+ * resolve them eventually.
+ */
+ struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[0];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
+ }
+
+ set_fast_clear_op(brw, 0);
+}
+
bool
brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
GLbitfield buffers, bool partial_clear)
@@ -447,13 +549,15 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
if (brw->gen < 7)
clear_type = REP_CLEAR;
- if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS)
+ /* Certain formats have unresolved issues with sampling from the MCS
+ * buffer on Gen9. This disables fast clears altogether for MSRTs until
+ * we can figure out what's going on.
+ */
+ if (brw->gen >= 9 && irb->mt->num_samples > 1)
clear_type = REP_CLEAR;
- if (brw->gen >= 9 && clear_type == FAST_CLEAR) {
- perf_debug("fast MCS clears are disabled on gen9");
+ if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS)
clear_type = REP_CLEAR;
- }
/* We can't do scissored fast clears because of the restrictions on the
* fast clear rectangle size.
@@ -503,8 +607,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
switch (clear_type) {
case FAST_CLEAR:
- irb->mt->fast_clear_color_value =
- compute_fast_clear_color_bits(&ctx->Color.ClearColor);
+ set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor);
irb->need_downsample = true;
/* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the
@@ -520,7 +623,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
irb->need_downsample = true;
fast_clear_buffers |= 1 << index;
- get_fast_clear_rect(fb, irb, &fast_clear_rect);
+ get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
break;
case REP_CLEAR:
@@ -584,12 +687,27 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
use_rectlist(brw, true);
layers = MAX2(1, fb->MaxNumLayers);
- if (fast_clear_buffers) {
+
+ if (brw->gen >= 9 && fast_clear_buffers) {
+ fast_clear_attachments(brw, fb, fast_clear_buffers, fast_clear_rect);
+ } else if (fast_clear_buffers) {
_mesa_meta_drawbuffers_from_bitfield(fast_clear_buffers);
brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE);
brw_draw_rectlist(ctx, &fast_clear_rect, layers);
set_fast_clear_op(brw, 0);
+
+ /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
+ * resolve them eventually.
+ */
+ for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
+ struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ int index = fb->_ColorDrawBufferIndexes[buf];
+
+ if ((1 << index) & fast_clear_buffers)
+ irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
+ }
}
if (rep_clear_buffers) {
@@ -598,18 +716,6 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
brw_draw_rectlist(ctx, &clear_rect, layers);
}
- /* Now set the mts we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
- * resolve them eventually.
- */
- for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
- struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
- struct intel_renderbuffer *irb = intel_renderbuffer(rb);
- int index = fb->_ColorDrawBufferIndexes[buf];
-
- if ((1 << index) & fast_clear_buffers)
- irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
- }
-
bail_to_meta:
/* Dirty _NEW_BUFFERS so we reemit SURFACE_STATE which sets the fast clear
* color before resolve and sets irb->mt->fast_clear_state to UNRESOLVED if
@@ -655,8 +761,9 @@ get_resolve_rect(struct brw_context *brw,
*
* The scaledown factors in the table that follows are related to the
* alignment size returned by intel_get_non_msrt_mcs_alignment() by a
- * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
- * by 8 and 16 and 8 and 8 for SKL.
+ * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
+ * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling
+ * by a factor of 2.
*/
intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
@@ -702,6 +809,10 @@ brw_meta_resolve_color(struct brw_context *brw,
brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
+ /* SKL+ also has a resolve mode for compressed render targets and thus more
+ * bits to let us select the type of resolve. For fast clear resolves, it
+ * turns out we can use the same value as pre-SKL though.
+ */
set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 58754adc887..91358d8f389 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -56,8 +56,9 @@ remap_vs_attrs(nir_block *block, void *closure)
}
static void
-brw_nir_lower_inputs(const struct brw_device_info *devinfo,
- nir_shader *nir, bool is_scalar)
+brw_nir_lower_inputs(nir_shader *nir,
+ const struct brw_device_info *devinfo,
+ bool is_scalar)
{
switch (nir->stage) {
case MESA_SHADER_VERTEX:
@@ -170,131 +171,159 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
}
}
-static void
+static bool
+should_clone_nir()
+{
+ static int should_clone = -1;
+ if (should_clone < 1)
+ should_clone = brw_env_var_as_boolean("NIR_TEST_CLONE", false);
+
+ return should_clone;
+}
+
+#define _OPT(do_pass) (({ \
+ bool this_progress = true; \
+ do_pass \
+ nir_validate_shader(nir); \
+ if (should_clone_nir()) { \
+ nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
+ ralloc_free(nir); \
+ nir = clone; \
+ } \
+ this_progress; \
+}))
+
+#define OPT(pass, ...) _OPT( \
+ nir_metadata_set_validation_flag(nir); \
+ this_progress = pass(nir ,##__VA_ARGS__); \
+ if (this_progress) { \
+ progress = true; \
+ nir_metadata_check_validation_flag(nir); \
+ } \
+)
+
+#define OPT_V(pass, ...) _OPT( \
+ pass(nir, ##__VA_ARGS__); \
+)
+
+static nir_shader *
nir_optimize(nir_shader *nir, bool is_scalar)
{
bool progress;
do {
progress = false;
- nir_lower_vars_to_ssa(nir);
- nir_validate_shader(nir);
+ OPT_V(nir_lower_vars_to_ssa);
if (is_scalar) {
- nir_lower_alu_to_scalar(nir);
- nir_validate_shader(nir);
+ OPT_V(nir_lower_alu_to_scalar);
}
- progress |= nir_copy_prop(nir);
- nir_validate_shader(nir);
+ OPT(nir_copy_prop);
if (is_scalar) {
- nir_lower_phis_to_scalar(nir);
- nir_validate_shader(nir);
+ OPT_V(nir_lower_phis_to_scalar);
}
- progress |= nir_copy_prop(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_dce(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_cse(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_peephole_select(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_algebraic(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_constant_folding(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_dead_cf(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_remove_phis(nir);
- nir_validate_shader(nir);
- progress |= nir_opt_undef(nir);
- nir_validate_shader(nir);
+ OPT(nir_copy_prop);
+ OPT(nir_opt_dce);
+ OPT(nir_opt_cse);
+ OPT(nir_opt_peephole_select);
+ OPT(nir_opt_algebraic);
+ OPT(nir_opt_constant_folding);
+ OPT(nir_opt_dead_cf);
+ OPT(nir_opt_remove_phis);
+ OPT(nir_opt_undef);
} while (progress);
+
+ return nir;
}
+/* Does some simple lowering and runs the standard suite of optimizations
+ *
+ * This is intended to be called more-or-less directly after you get the
+ * shader out of GLSL or some other source. While it is geared towards i965,
+ * it is not at all generator-specific except for the is_scalar flag. Even
+ * there, it is safe to call with is_scalar = false for a shader that is
+ * intended for the FS backend as long as nir_optimize is called again with
+ * is_scalar = true to scalarize everything prior to code gen.
+ */
nir_shader *
-brw_create_nir(struct brw_context *brw,
- const struct gl_shader_program *shader_prog,
- const struct gl_program *prog,
- gl_shader_stage stage,
- bool is_scalar)
+brw_preprocess_nir(nir_shader *nir, bool is_scalar)
{
- struct gl_context *ctx = &brw->ctx;
- const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
- const nir_shader_compiler_options *options =
- ctx->Const.ShaderCompilerOptions[stage].NirOptions;
- nir_shader *nir;
+ bool progress; /* Written by OPT and OPT_V */
+ (void)progress;
- /* First, lower the GLSL IR or Mesa IR to NIR */
- if (shader_prog) {
- nir = glsl_to_nir(shader_prog, stage, options);
- } else {
- nir = prog_to_nir(prog, options);
- nir_convert_to_ssa(nir); /* turn registers into SSA */
- }
- nir_validate_shader(nir);
+ if (nir->stage == MESA_SHADER_GEOMETRY)
+ OPT(nir_lower_gs_intrinsics);
- brw_preprocess_nir(nir, brw->intelScreen->devinfo, is_scalar);
+ static const nir_lower_tex_options tex_options = {
+ .lower_txp = ~0,
+ };
- if (shader_prog) {
- nir_lower_samplers(nir, shader_prog);
- nir_validate_shader(nir);
+ OPT(nir_lower_tex, &tex_options);
+ OPT(nir_normalize_cubemap_coords);
- nir_lower_atomics(nir, shader_prog);
- nir_validate_shader(nir);
- }
+ OPT(nir_lower_global_vars_to_local);
- brw_postprocess_nir(nir, brw->intelScreen->devinfo, is_scalar);
+ OPT(nir_split_var_copies);
- static GLuint msg_id = 0;
- _mesa_gl_debug(&brw->ctx, &msg_id,
- MESA_DEBUG_SOURCE_SHADER_COMPILER,
- MESA_DEBUG_TYPE_OTHER,
- MESA_DEBUG_SEVERITY_NOTIFICATION,
- "%s NIR shader:\n",
- _mesa_shader_stage_to_abbrev(nir->stage));
+ nir = nir_optimize(nir, is_scalar);
- return nir;
-}
+ /* Lower a bunch of stuff */
+ OPT_V(nir_lower_var_copies);
-void
-brw_preprocess_nir(nir_shader *nir,
- const struct brw_device_info *devinfo,
- bool is_scalar)
-{
- static const nir_lower_tex_options tex_options = {
- .lower_txp = ~0,
- };
+ /* Get rid of split copies */
+ nir = nir_optimize(nir, is_scalar);
- if (nir->stage == MESA_SHADER_GEOMETRY) {
- nir_lower_gs_intrinsics(nir);
- nir_validate_shader(nir);
- }
+ OPT(nir_remove_dead_variables);
- nir_lower_global_vars_to_local(nir);
- nir_validate_shader(nir);
+ return nir;
+}
- nir_lower_tex(nir, &tex_options);
- nir_validate_shader(nir);
+/* Lowers inputs, outputs, uniforms, and samplers for i965
+ *
+ * This function does all of the standard lowering prior to post-processing.
+ * The lowering done is highly gen, stage, and backend-specific. The
+ * shader_prog parameter is optional and is used only for lowering sampler
+ * derefs and atomics for GLSL shaders.
+ */
+nir_shader *
+brw_lower_nir(nir_shader *nir,
+ const struct brw_device_info *devinfo,
+ const struct gl_shader_program *shader_prog,
+ bool is_scalar)
+{
+ bool progress; /* Written by OPT and OPT_V */
+ (void)progress;
- nir_normalize_cubemap_coords(nir);
- nir_validate_shader(nir);
+ OPT_V(brw_nir_lower_inputs, devinfo, is_scalar);
+ OPT_V(brw_nir_lower_outputs, is_scalar);
+ nir_assign_var_locations(&nir->uniforms,
+ &nir->num_uniforms,
+ is_scalar ? type_size_scalar : type_size_vec4);
+ OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4);
- nir_split_var_copies(nir);
- nir_validate_shader(nir);
+ if (shader_prog) {
+ OPT_V(nir_lower_samplers, shader_prog);
+ }
- nir_optimize(nir, is_scalar);
+ OPT(nir_lower_system_values);
- /* Lower a bunch of stuff */
- nir_lower_var_copies(nir);
- nir_validate_shader(nir);
+ if (shader_prog) {
+ OPT_V(nir_lower_atomics, shader_prog);
+ }
- /* Get rid of split copies */
- nir_optimize(nir, is_scalar);
+ return nir_optimize(nir, is_scalar);
}
-void
+/* Prepare the given shader for codegen
+ *
+ * This function is intended to be called right before going into the actual
+ * backend and is highly backend-specific. Also, once this function has been
+ * called on a shader, it will no longer be in SSA form so most optimizations
+ * will not work.
+ */
+nir_shader *
brw_postprocess_nir(nir_shader *nir,
const struct brw_device_info *devinfo,
bool is_scalar)
@@ -302,40 +331,21 @@ brw_postprocess_nir(nir_shader *nir,
bool debug_enabled =
(INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage));
- brw_nir_lower_inputs(devinfo, nir, is_scalar);
- brw_nir_lower_outputs(nir, is_scalar);
- nir_assign_var_locations(&nir->uniforms,
- &nir->num_uniforms,
- is_scalar ? type_size_scalar : type_size_vec4);
- nir_lower_io(nir, -1, is_scalar ? type_size_scalar : type_size_vec4);
- nir_validate_shader(nir);
-
- nir_remove_dead_variables(nir);
- nir_validate_shader(nir);
-
- nir_lower_system_values(nir);
- nir_validate_shader(nir);
-
- nir_optimize(nir, is_scalar);
+ bool progress; /* Written by OPT and OPT_V */
+ (void)progress;
if (devinfo->gen >= 6) {
/* Try and fuse multiply-adds */
- brw_nir_opt_peephole_ffma(nir);
- nir_validate_shader(nir);
+ OPT(brw_nir_opt_peephole_ffma);
}
- nir_opt_algebraic_late(nir);
- nir_validate_shader(nir);
+ OPT(nir_opt_algebraic_late);
- nir_lower_locals_to_regs(nir);
- nir_validate_shader(nir);
+ OPT(nir_lower_locals_to_regs);
- nir_lower_to_source_mods(nir);
- nir_validate_shader(nir);
- nir_copy_prop(nir);
- nir_validate_shader(nir);
- nir_opt_dce(nir);
- nir_validate_shader(nir);
+ OPT_V(nir_lower_to_source_mods);
+ OPT(nir_copy_prop);
+ OPT(nir_opt_dce);
if (unlikely(debug_enabled)) {
/* Re-index SSA defs so we print more sensible numbers. */
@@ -349,15 +359,11 @@ brw_postprocess_nir(nir_shader *nir,
nir_print_shader(nir, stderr);
}
- nir_convert_from_ssa(nir, true);
- nir_validate_shader(nir);
+ OPT_V(nir_convert_from_ssa, true);
if (!is_scalar) {
- nir_move_vec_src_uses_to_dest(nir);
- nir_validate_shader(nir);
-
- nir_lower_vec_to_movs(nir);
- nir_validate_shader(nir);
+ OPT_V(nir_move_vec_src_uses_to_dest);
+ OPT(nir_lower_vec_to_movs);
}
/* This is the last pass we run before we start emitting stuff. It
@@ -375,13 +381,83 @@ brw_postprocess_nir(nir_shader *nir,
_mesa_shader_stage_to_string(nir->stage));
nir_print_shader(nir, stderr);
}
+
+ return nir;
+}
+
+nir_shader *
+brw_create_nir(struct brw_context *brw,
+ const struct gl_shader_program *shader_prog,
+ const struct gl_program *prog,
+ gl_shader_stage stage,
+ bool is_scalar)
+{
+ struct gl_context *ctx = &brw->ctx;
+ const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
+ const nir_shader_compiler_options *options =
+ ctx->Const.ShaderCompilerOptions[stage].NirOptions;
+ bool progress;
+ nir_shader *nir;
+
+ /* First, lower the GLSL IR or Mesa IR to NIR */
+ if (shader_prog) {
+ nir = glsl_to_nir(shader_prog, stage, options);
+ } else {
+ nir = prog_to_nir(prog, options);
+ OPT_V(nir_convert_to_ssa); /* turn registers into SSA */
+ }
+ nir_validate_shader(nir);
+
+ (void)progress;
+
+ nir = brw_preprocess_nir(nir, is_scalar);
+ nir = brw_lower_nir(nir, devinfo, shader_prog, is_scalar);
+
+ return nir;
+}
+
+nir_shader *
+brw_nir_apply_sampler_key(nir_shader *nir,
+ const struct brw_device_info *devinfo,
+ const struct brw_sampler_prog_key_data *key_tex,
+ bool is_scalar)
+{
+ nir_lower_tex_options tex_options = { 0 };
+
+ /* Iron Lake and prior require lowering of all rectangle textures */
+ if (devinfo->gen < 6)
+ tex_options.lower_rect = true;
+
+ /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */
+ if (devinfo->gen < 8) {
+ tex_options.saturate_s = key_tex->gl_clamp_mask[0];
+ tex_options.saturate_t = key_tex->gl_clamp_mask[1];
+ tex_options.saturate_r = key_tex->gl_clamp_mask[2];
+ }
+
+ /* Prior to Haswell, we have to fake texture swizzle */
+ for (unsigned s = 0; s < MAX_SAMPLERS; s++) {
+ if (key_tex->swizzles[s] == SWIZZLE_NOOP)
+ continue;
+
+ tex_options.swizzle_result |= (1 << s);
+ for (unsigned c = 0; c < 4; c++)
+ tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c);
+ }
+
+ if (nir_lower_tex(nir, &tex_options)) {
+ nir_validate_shader(nir);
+ nir = nir_optimize(nir, is_scalar);
+ }
+
+ return nir;
}
enum brw_reg_type
brw_type_for_nir_type(nir_alu_type type)
{
switch (type) {
- case nir_type_unsigned:
+ case nir_type_uint:
return BRW_REGISTER_TYPE_UD;
case nir_type_bool:
case nir_type_int:
@@ -408,7 +484,7 @@ brw_glsl_base_type_for_nir_type(nir_alu_type type)
case nir_type_int:
return GLSL_TYPE_INT;
- case nir_type_unsigned:
+ case nir_type_uint:
return GLSL_TYPE_UINT;
default:
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index d259777e1c9..0a8a5a280b1 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -81,19 +81,25 @@ nir_shader *brw_create_nir(struct brw_context *brw,
gl_shader_stage stage,
bool is_scalar);
+nir_shader *brw_preprocess_nir(nir_shader *nir, bool is_scalar);
+nir_shader *brw_lower_nir(nir_shader *nir,
+ const struct brw_device_info *devinfo,
+ const struct gl_shader_program *shader_prog,
+ bool is_scalar);
+nir_shader *brw_postprocess_nir(nir_shader *nir,
+ const struct brw_device_info *devinfo,
+ bool is_scalar);
+
+
+nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
+ const struct brw_device_info *devinfo,
+ const struct brw_sampler_prog_key_data *key,
+ bool is_scalar);
+
enum brw_reg_type brw_type_for_nir_type(nir_alu_type type);
enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type);
-void
-brw_preprocess_nir(nir_shader *nir,
- const struct brw_device_info *devinfo,
- bool is_scalar);
-void
-brw_postprocess_nir(nir_shader *nir,
- const struct brw_device_info *devinfo,
- bool is_scalar);
-
void brw_nir_setup_glsl_uniforms(nir_shader *shader,
struct gl_shader_program *shader_prog,
const struct gl_program *prog,
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 6433dec9041..3da8e9e8a97 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -126,6 +126,7 @@ brwProgramStringNotify(struct gl_context *ctx,
struct gl_program *prog)
{
struct brw_context *brw = brw_context(ctx);
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
switch (target) {
case GL_FRAGMENT_PROGRAM_ARB: {
@@ -165,7 +166,7 @@ brwProgramStringNotify(struct gl_context *ctx,
brw_add_texrect_params(prog);
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
- brw->intelScreen->compiler->scalar_vs);
+ compiler->scalar_stage[MESA_SHADER_VERTEX]);
brw_vs_precompile(ctx, NULL, prog);
break;
@@ -343,6 +344,8 @@ brw_report_shader_time(struct brw_context *brw)
switch (type) {
case ST_VS:
+ case ST_TCS:
+ case ST_TES:
case ST_GS:
case ST_FS8:
case ST_FS16:
@@ -369,6 +372,8 @@ brw_report_shader_time(struct brw_context *brw)
switch (type) {
case ST_VS:
+ case ST_TCS:
+ case ST_TES:
case ST_GS:
case ST_FS8:
case ST_FS16:
@@ -406,6 +411,12 @@ brw_report_shader_time(struct brw_context *brw)
case ST_VS:
stage = "vs";
break;
+ case ST_TCS:
+ stage = "tcs";
+ break;
+ case ST_TES:
+ stage = "tes";
+ break;
case ST_GS:
stage = "gs";
break;
@@ -429,6 +440,8 @@ brw_report_shader_time(struct brw_context *brw)
fprintf(stderr, "\n");
print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
+ print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
+ print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 3da83b43b5d..fa912c96c36 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -43,7 +43,6 @@
#define BRW_REG_H
#include <stdbool.h>
-#include "main/imports.h"
#include "main/compiler.h"
#include "main/macros.h"
#include "program/prog_instruction.h"
@@ -619,57 +618,37 @@ static inline struct brw_reg
brw_imm_v(unsigned v)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
- imm.vstride = BRW_VERTICAL_STRIDE_0;
- imm.width = BRW_WIDTH_8;
- imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.ud = v;
return imm;
}
+/** Construct vector of eight unsigned half-byte values */
+static inline struct brw_reg
+brw_imm_uv(unsigned uv)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UV);
+ imm.ud = uv;
+ return imm;
+}
+
/** Construct vector of four 8-bit float values */
static inline struct brw_reg
brw_imm_vf(unsigned v)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
- imm.vstride = BRW_VERTICAL_STRIDE_0;
- imm.width = BRW_WIDTH_4;
- imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.ud = v;
return imm;
}
-/**
- * Convert an integer into a "restricted" 8-bit float, used in vector
- * immediates. The 8-bit floating point format has a sign bit, an
- * excess-3 3-bit exponent, and a 4-bit mantissa. All integer values
- * from -31 to 31 can be represented exactly.
- */
-static inline uint8_t
-int_to_float8(int x)
-{
- if (x == 0) {
- return 0;
- } else if (x < 0) {
- return 1 << 7 | int_to_float8(-x);
- } else {
- const unsigned exponent = _mesa_logbase2(x);
- const unsigned mantissa = (x - (1 << exponent)) << (4 - exponent);
- assert(exponent <= 4);
- return (exponent + 3) << 4 | mantissa;
- }
-}
-
-/**
- * Construct a floating-point packed vector immediate from its integer
- * values. \sa int_to_float8()
- */
static inline struct brw_reg
-brw_imm_vf4(int v0, int v1, int v2, int v3)
+brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
{
- return brw_imm_vf((int_to_float8(v0) << 0) |
- (int_to_float8(v1) << 8) |
- (int_to_float8(v2) << 16) |
- (int_to_float8(v3) << 24));
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
+ return imm;
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 1f3ae7ab5e6..2f0e8b680ab 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -72,22 +72,6 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
va_end(args);
}
-bool
-is_scalar_shader_stage(const struct brw_compiler *compiler, int stage)
-{
- switch (stage) {
- case MESA_SHADER_FRAGMENT:
- case MESA_SHADER_COMPUTE:
- return true;
- case MESA_SHADER_GEOMETRY:
- return compiler->scalar_gs;
- case MESA_SHADER_VERTEX:
- return compiler->scalar_vs;
- default:
- return false;
- }
-}
-
struct brw_compiler *
brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
{
@@ -100,11 +84,12 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
brw_fs_alloc_reg_sets(compiler);
brw_vec4_alloc_reg_set(compiler);
- if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
- compiler->scalar_vs = true;
-
- if (devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false))
- compiler->scalar_gs = true;
+ compiler->scalar_stage[MESA_SHADER_VERTEX] =
+ devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
+ compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
+ devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false);
+ compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
+ compiler->scalar_stage[MESA_SHADER_COMPUTE] = true;
nir_shader_compiler_options *nir_options =
rzalloc(compiler, nir_shader_compiler_options);
@@ -139,7 +124,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
compiler->glsl_compiler_options[i].LowerClipDistance = true;
- bool is_scalar = is_scalar_shader_stage(compiler, i);
+ bool is_scalar = compiler->scalar_stage[i];
compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
@@ -154,6 +139,9 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
}
+ if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
+ compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false;
+
return compiler;
}
@@ -557,6 +545,8 @@ brw_instruction_name(enum opcode op)
return "barrier";
case SHADER_OPCODE_MULH:
return "mulh";
+ case SHADER_OPCODE_MOV_INDIRECT:
+ return "mov_indirect";
}
unreachable("not reached");
@@ -574,16 +564,12 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
switch (type) {
case BRW_REGISTER_TYPE_UD:
case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
case BRW_REGISTER_TYPE_UQ:
case BRW_REGISTER_TYPE_Q:
/* Nothing to do. */
return false;
- case BRW_REGISTER_TYPE_UW:
- sat_imm.ud = CLAMP(imm.ud, 0, USHRT_MAX);
- break;
- case BRW_REGISTER_TYPE_W:
- sat_imm.d = CLAMP(imm.d, SHRT_MIN, SHRT_MAX);
- break;
case BRW_REGISTER_TYPE_F:
sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index c4a37187ce2..9555406c777 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -252,8 +252,6 @@ int type_size_scalar(const struct glsl_type *type);
int type_size_vec4(const struct glsl_type *type);
int type_size_vec4_times_4(const struct glsl_type *type);
-bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage);
-
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c
index 0d49ab7b431..69eed4bc629 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -32,8 +32,8 @@
/* This macro allows us to write the table almost as it appears in the PRM,
* while restructuring it to turn it into the C code we want.
*/
-#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \
- [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, #sf},
+#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, sf) \
+ [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, #sf},
#define Y 0
#define x 999
@@ -61,6 +61,7 @@
* VB - Input Vertex Buffer
* SO - Steamed Output Vertex Buffers (transform feedback)
* color - Color Processing
+ * ccs_e - Lossless Compression Support (gen9+ only)
* sf - Surface Format
*
* See page 88 of the Sandybridge PRM VOL4_Part1 PDF.
@@ -71,257 +72,258 @@
* - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch).
* - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping.
* - VOL4_Part1 section 3.9.11 Render Target Write.
+ * - Render Target Surface Types [SKL+]
*/
const struct brw_surface_format_info surface_formats[] = {
-/* smpl filt shad CK RT AB VB SO color */
- SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32B32A32_FLOAT)
- SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_SINT)
- SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_UINT)
- SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, R64G64_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, R32G32B32X32_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_USCALED)
- SF( x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED)
- SF( x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU)
- SF( Y, 50, x, x, x, x, Y, Y, x, R32G32B32_FLOAT)
- SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_SINT)
- SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_UINT)
- SF( x, x, x, x, x, x, Y, x, x, R32G32B32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R32G32B32_USCALED)
- SF( x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED)
- SF( Y, Y, x, x, Y, 45, Y, x, 60, R16G16B16A16_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16B16A16_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_UINT)
- SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16B16A16_FLOAT)
- SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32_FLOAT)
- SF( Y, 70, x, x, Y, Y, Y, Y, x, R32G32_FLOAT_LD)
- SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_SINT)
- SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_UINT)
- SF( Y, 50, Y, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS)
- SF( Y, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT)
- SF( Y, 50, x, x, x, x, x, x, x, L32A32_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, R32G32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, R32G32_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, R64_FLOAT)
- SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, A32X32_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, L32X32_FLOAT)
- SF( Y, 50, x, x, x, x, x, x, x, I32X32_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, R32G32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R32G32_USCALED)
- SF( x, x, x, x, x, x, x, x, x, R32G32_SFIXED)
- SF( x, x, x, x, x, x, x, x, x, R64_PASSTHRU)
- SF( Y, Y, x, Y, Y, Y, Y, x, 60, B8G8R8A8_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, B8G8R8A8_UNORM_SRGB)
-/* smpl filt shad CK RT AB VB SO color */
- SF( Y, Y, x, x, Y, Y, Y, x, 60, R10G10B10A2_UNORM)
- SF( Y, Y, x, x, x, x, x, x, 60, R10G10B10A2_UNORM_SRGB)
- SF( Y, x, x, x, Y, x, Y, x, x, R10G10B10A2_UINT)
- SF( Y, Y, x, x, x, Y, Y, x, x, R10G10B10_SNORM_A2_UNORM)
- SF( Y, Y, x, x, Y, Y, Y, x, 60, R8G8B8A8_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, 60, R8G8B8A8_UNORM_SRGB)
- SF( Y, Y, x, x, Y, 60, Y, x, x, R8G8B8A8_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_UINT)
- SF( Y, Y, x, x, Y, 45, Y, x, x, R16G16_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, R16G16_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, R16G16_UINT)
- SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16_FLOAT)
- SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM_SRGB)
- SF( Y, Y, x, x, Y, Y, Y, x, x, R11G11B10_FLOAT)
- SF( Y, x, x, x, Y, x, Y, Y, x, R32_SINT)
- SF( Y, x, x, x, Y, x, Y, Y, x, R32_UINT)
- SF( Y, 50, Y, x, Y, Y, Y, Y, x, R32_FLOAT)
- SF( Y, 50, Y, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS)
- SF( Y, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT)
- SF( Y, Y, x, x, x, x, x, x, x, L16A16_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, I24X8_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, L24X8_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, A24X8_UNORM)
- SF( Y, 50, Y, x, x, x, x, x, x, I32_FLOAT)
- SF( Y, 50, Y, x, x, x, x, x, x, L32_FLOAT)
- SF( Y, 50, Y, x, x, x, x, x, x, A32_FLOAT)
- SF( Y, Y, x, Y, x, x, x, x, 60, B8G8R8X8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP)
- SF( Y, Y, x, x, x, x, x, x, x, B10G10R10X2_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, L16A16_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, R32_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, R32_SNORM)
-/* smpl filt shad CK RT AB VB SO color */
- SF( x, x, x, x, x, x, Y, x, x, R10G10B10X2_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, R16G16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R16G16_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, R32_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R32_USCALED)
- SF( Y, Y, x, Y, Y, Y, x, x, x, B5G6R5_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, B5G6R5_UNORM_SRGB)
- SF( Y, Y, x, Y, Y, Y, x, x, x, B5G5R5A1_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, B5G5R5A1_UNORM_SRGB)
- SF( Y, Y, x, Y, Y, Y, x, x, x, B4G4R4A4_UNORM)
- SF( Y, Y, x, x, Y, Y, x, x, x, B4G4R4A4_UNORM_SRGB)
- SF( Y, Y, x, x, Y, Y, Y, x, x, R8G8_UNORM)
- SF( Y, Y, x, Y, Y, 60, Y, x, x, R8G8_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, R8G8_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, R8G8_UINT)
- SF( Y, Y, Y, x, Y, 45, Y, x, 70, R16_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, R16_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, R16_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, R16_UINT)
- SF( Y, Y, x, x, Y, Y, Y, x, x, R16_FLOAT)
- SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0)
- SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1)
- SF( Y, Y, Y, x, x, x, x, x, x, I16_UNORM)
- SF( Y, Y, Y, x, x, x, x, x, x, L16_UNORM)
- SF( Y, Y, Y, x, x, x, x, x, x, A16_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, L8A8_UNORM)
- SF( Y, Y, Y, x, x, x, x, x, x, I16_FLOAT)
- SF( Y, Y, Y, x, x, x, x, x, x, L16_FLOAT)
- SF( Y, Y, Y, x, x, x, x, x, x, A16_FLOAT)
- SF(45, 45, x, x, x, x, x, x, x, L8A8_UNORM_SRGB)
- SF( Y, Y, x, Y, x, x, x, x, x, R5G5_SNORM_B6_UNORM)
- SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM)
- SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM_SRGB)
- SF( x, x, x, x, x, x, Y, x, x, R8G8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R8G8_USCALED)
-/* smpl filt shad CK RT AB VB SO color */
- SF( x, x, x, x, x, x, Y, x, x, R16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R16_USCALED)
- SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0)
- SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1)
- SF( x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM)
- SF( x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM)
- SF( x, x, x, x, x, x, x, x, x, L8A8_UINT)
- SF( x, x, x, x, x, x, x, x, x, L8A8_SINT)
- SF( Y, Y, x, 45, Y, Y, Y, x, x, R8_UNORM)
- SF( Y, Y, x, x, Y, 60, Y, x, x, R8_SNORM)
- SF( Y, x, x, x, Y, x, Y, x, x, R8_SINT)
- SF( Y, x, x, x, Y, x, Y, x, x, R8_UINT)
- SF( Y, Y, x, Y, Y, Y, x, x, x, A8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, I8_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, L8_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, P4A4_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, A4P4_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, R8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R8_USCALED)
- SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE0)
- SF(45, 45, x, x, x, x, x, x, x, L8_UNORM_SRGB)
- SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE1)
- SF(45, 45, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1)
- SF(45, 45, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1)
- SF( x, x, x, x, x, x, x, x, x, Y8_SNORM)
- SF( x, x, x, x, x, x, x, x, x, L8_UINT)
- SF( x, x, x, x, x, x, x, x, x, L8_SINT)
- SF( x, x, x, x, x, x, x, x, x, I8_UINT)
- SF( x, x, x, x, x, x, x, x, x, I8_SINT)
- SF(45, 45, x, x, x, x, x, x, x, DXT1_RGB_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, R1_UINT)
- SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_NORMAL)
- SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_SWAPUVY)
- SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE0)
- SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE1)
- SF( Y, Y, x, Y, x, x, x, x, x, BC1_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, BC2_UNORM)
- SF( Y, Y, x, Y, x, x, x, x, x, BC3_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BC4_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BC5_UNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BC1_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, BC2_UNORM_SRGB)
- SF( Y, Y, x, x, x, x, x, x, x, BC3_UNORM_SRGB)
- SF( Y, x, x, x, x, x, x, x, x, MONO8)
- SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPUV)
- SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPY)
- SF( Y, Y, x, x, x, x, x, x, x, DXT1_RGB)
-/* smpl filt shad CK RT AB VB SO color */
- SF( Y, Y, x, x, x, x, x, x, x, FXT1)
- SF( x, x, x, x, x, x, Y, x, x, R8G8B8_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R8G8B8_USCALED)
- SF( x, x, x, x, x, x, Y, x, x, R64G64B64A64_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, R64G64B64_FLOAT)
- SF( Y, Y, x, x, x, x, x, x, x, BC4_SNORM)
- SF( Y, Y, x, x, x, x, x, x, x, BC5_SNORM)
- SF(50, 50, x, x, x, x, 60, x, x, R16G16B16_FLOAT)
- SF( x, x, x, x, x, x, Y, x, x, R16G16B16_UNORM)
- SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SNORM)
- SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SSCALED)
- SF( x, x, x, x, x, x, Y, x, x, R16G16B16_USCALED)
- SF(70, 70, x, x, x, x, x, x, x, BC6H_SF16)
- SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM)
- SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM_SRGB)
- SF(70, 70, x, x, x, x, x, x, x, BC6H_UF16)
- SF( x, x, x, x, x, x, x, x, x, PLANAR_420_8)
- SF( x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB)
- SF( x, x, x, x, x, x, x, x, x, ETC1_RGB8)
- SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8)
- SF( x, x, x, x, x, x, x, x, x, EAC_R11)
- SF( x, x, x, x, x, x, x, x, x, EAC_RG11)
- SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11)
- SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11)
- SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8)
- SF( x, x, x, x, x, x, x, x, x, R16G16B16_UINT)
- SF( x, x, x, x, x, x, x, x, x, R16G16B16_SINT)
- SF( x, x, x, x, x, x, x, x, x, R32_SFIXED)
- SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM)
- SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED)
- SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED)
- SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT)
- SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM)
- SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED)
- SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED)
- SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT)
- SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT)
- SF( x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU)
- SF( x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU)
- SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA)
- SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA)
- SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8)
- SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8)
- SF( x, x, x, x, x, x, x, x, x, R8G8B8_UINT)
- SF( x, x, x, x, x, x, x, x, x, R8G8B8_SINT)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB)
- SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB)
+/* smpl filt shad CK RT AB VB SO color ccs_e */
+ SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32B32A32_FLOAT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_SINT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_UINT)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R64G64_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, x, R32G32B32X32_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_USCALED)
+ SF( x, x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED)
+ SF( x, x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU)
+ SF( Y, 50, x, x, x, x, Y, Y, x, x, R32G32B32_FLOAT)
+ SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_SINT)
+ SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_UINT)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_USCALED)
+ SF( x, x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED)
+ SF( Y, Y, x, x, Y, 45, Y, x, 60, 90, R16G16B16A16_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16B16A16_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_UINT)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16B16A16_FLOAT)
+ SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32_FLOAT)
+ SF( Y, 70, x, x, Y, Y, Y, Y, x, x, R32G32_FLOAT_LD)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_SINT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_UINT)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS)
+ SF( Y, x, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT)
+ SF( Y, 50, x, x, x, x, x, x, x, x, L32A32_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R64_FLOAT)
+ SF( Y, Y, x, x, x, x, x, x, x, x, R16G16B16X16_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, 90, R16G16B16X16_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, x, A32X32_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, x, L32X32_FLOAT)
+ SF( Y, 50, x, x, x, x, x, x, x, x, I32X32_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32G32_USCALED)
+ SF( x, x, x, x, x, x, x, x, x, x, R32G32_SFIXED)
+ SF( x, x, x, x, x, x, x, x, x, x, R64_PASSTHRU)
+ SF( Y, Y, x, Y, Y, Y, Y, x, 60, 90, B8G8R8A8_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, x, B8G8R8A8_UNORM_SRGB)
+/* smpl filt shad CK RT AB VB SO color ccs_e */
+ SF( Y, Y, x, x, Y, Y, Y, x, 60, x, R10G10B10A2_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, 60, x, R10G10B10A2_UNORM_SRGB)
+ SF( Y, x, x, x, Y, x, Y, x, x, x, R10G10B10A2_UINT)
+ SF( Y, Y, x, x, x, Y, Y, x, x, x, R10G10B10_SNORM_A2_UNORM)
+ SF( Y, Y, x, x, Y, Y, Y, x, 60, 90, R8G8B8A8_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, 60, x, R8G8B8A8_UNORM_SRGB)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R8G8B8A8_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_UINT)
+ SF( Y, Y, x, x, Y, 45, Y, x, x, 90, R16G16_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_UINT)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16_FLOAT)
+ SF( Y, Y, x, x, Y, Y, x, x, 60, x, B10G10R10A2_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, 60, x, B10G10R10A2_UNORM_SRGB)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, x, R11G11B10_FLOAT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_SINT)
+ SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_UINT)
+ SF( Y, 50, Y, x, Y, Y, Y, Y, x, 90, R32_FLOAT)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS)
+ SF( Y, x, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT)
+ SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, I24X8_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, L24X8_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, A24X8_UNORM)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, I32_FLOAT)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, L32_FLOAT)
+ SF( Y, 50, Y, x, x, x, x, x, x, x, A32_FLOAT)
+ SF( Y, Y, x, Y, x, x, x, x, 60, 90, B8G8R8X8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP)
+ SF( Y, Y, x, x, x, x, x, x, x, x, B10G10R10X2_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32_SNORM)
+/* smpl filt shad CK RT AB VB SO color ccs_e */
+ SF( x, x, x, x, x, x, Y, x, x, x, R10G10B10X2_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R16G16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R16G16_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R32_USCALED)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G6R5_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G6R5_UNORM_SRGB)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G5R5A1_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G5R5A1_UNORM_SRGB)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, x, B4G4R4A4_UNORM)
+ SF( Y, Y, x, x, Y, Y, x, x, x, x, B4G4R4A4_UNORM_SRGB)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, x, R8G8_UNORM)
+ SF( Y, Y, x, Y, Y, 60, Y, x, x, x, R8G8_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_UINT)
+ SF( Y, Y, Y, x, Y, 45, Y, x, 70, x, R16_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, x, R16_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, x, R16_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, x, R16_UINT)
+ SF( Y, Y, x, x, Y, Y, Y, x, x, x, R16_FLOAT)
+ SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0)
+ SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, I16_UNORM)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, L16_UNORM)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, A16_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, L8A8_UNORM)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, I16_FLOAT)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, L16_FLOAT)
+ SF( Y, Y, Y, x, x, x, x, x, x, x, A16_FLOAT)
+ SF(45, 45, x, x, x, x, x, x, x, x, L8A8_UNORM_SRGB)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, R5G5_SNORM_B6_UNORM)
+ SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM)
+ SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM_SRGB)
+ SF( x, x, x, x, x, x, Y, x, x, x, R8G8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R8G8_USCALED)
+/* smpl filt shad CK RT AB VB SO color ccs_e */
+ SF( x, x, x, x, x, x, Y, x, x, x, R16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R16_USCALED)
+ SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0)
+ SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1)
+ SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM)
+ SF( x, x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM)
+ SF( x, x, x, x, x, x, x, x, x, x, L8A8_UINT)
+ SF( x, x, x, x, x, x, x, x, x, x, L8A8_SINT)
+ SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM)
+ SF( Y, Y, x, x, Y, 60, Y, x, x, x, R8_SNORM)
+ SF( Y, x, x, x, Y, x, Y, x, x, x, R8_SINT)
+ SF( Y, x, x, x, Y, x, Y, x, x, x, R8_UINT)
+ SF( Y, Y, x, Y, Y, Y, x, x, x, x, A8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, I8_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, L8_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, P4A4_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, A4P4_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R8_USCALED)
+ SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE0)
+ SF(45, 45, x, x, x, x, x, x, x, x, L8_UNORM_SRGB)
+ SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE1)
+ SF(45, 45, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1)
+ SF(45, 45, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1)
+ SF( x, x, x, x, x, x, x, x, x, x, Y8_SNORM)
+ SF( x, x, x, x, x, x, x, x, x, x, L8_UINT)
+ SF( x, x, x, x, x, x, x, x, x, x, L8_SINT)
+ SF( x, x, x, x, x, x, x, x, x, x, I8_UINT)
+ SF( x, x, x, x, x, x, x, x, x, x, I8_SINT)
+ SF(45, 45, x, x, x, x, x, x, x, x, DXT1_RGB_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, x, R1_UINT)
+ SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_NORMAL)
+ SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_SWAPUVY)
+ SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE0)
+ SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE1)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, BC1_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, BC2_UNORM)
+ SF( Y, Y, x, Y, x, x, x, x, x, x, BC3_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, BC4_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, BC5_UNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, BC1_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, x, BC2_UNORM_SRGB)
+ SF( Y, Y, x, x, x, x, x, x, x, x, BC3_UNORM_SRGB)
+ SF( Y, x, x, x, x, x, x, x, x, x, MONO8)
+ SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPUV)
+ SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPY)
+ SF( Y, Y, x, x, x, x, x, x, x, x, DXT1_RGB)
+/* smpl filt shad CK RT AB VB SO color ccs_e */
+ SF( Y, Y, x, x, x, x, x, x, x, x, FXT1)
+ SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_USCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64A64_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64_FLOAT)
+ SF( Y, Y, x, x, x, x, x, x, x, x, BC4_SNORM)
+ SF( Y, Y, x, x, x, x, x, x, x, x, BC5_SNORM)
+ SF(50, 50, x, x, x, x, 60, x, x, x, R16G16B16_FLOAT)
+ SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_UNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_SNORM)
+ SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_SSCALED)
+ SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_USCALED)
+ SF(70, 70, x, x, x, x, x, x, x, x, BC6H_SF16)
+ SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM)
+ SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM_SRGB)
+ SF(70, 70, x, x, x, x, x, x, x, x, BC6H_UF16)
+ SF( x, x, x, x, x, x, x, x, x, x, PLANAR_420_8)
+ SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB)
+ SF( x, x, x, x, x, x, x, x, x, x, ETC1_RGB8)
+ SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8)
+ SF( x, x, x, x, x, x, x, x, x, x, EAC_R11)
+ SF( x, x, x, x, x, x, x, x, x, x, EAC_RG11)
+ SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11)
+ SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11)
+ SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8)
+ SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_UINT)
+ SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_SINT)
+ SF( x, x, x, x, x, x, x, x, x, x, R32_SFIXED)
+ SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM)
+ SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED)
+ SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED)
+ SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT)
+ SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM)
+ SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED)
+ SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED)
+ SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT)
+ SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT)
+ SF( x, x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU)
+ SF( x, x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU)
+ SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA)
+ SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA)
+ SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8)
+ SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8)
+ SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UINT)
+ SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_SINT)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB)
+ SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB)
};
#undef x
#undef Y
@@ -771,6 +773,26 @@ brw_render_target_supported(struct brw_context *brw,
return brw->format_supported_as_render_target[format];
}
+/*
+ * True if the underlying hardware format can support lossless color
+ * compression.
+ */
+bool
+brw_losslessly_compressible_format(struct brw_context *brw,
+ uint32_t brw_format)
+{
+ const struct brw_surface_format_info * const sinfo =
+ &surface_formats[brw_format];
+ const int gen = brw->gen * 10;
+
+ assert(brw->gen >= 9);
+
+ if (gen >= sinfo->lossless_compression)
+ return true;
+
+ return false;
+}
+
GLuint
translate_tex_format(struct brw_context *brw,
mesa_format mesa_format,
diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.h b/src/mesa/drivers/dri/i965/brw_surface_formats.h
index 5c7b60e680b..a5cd49f5260 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.h
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.h
@@ -34,6 +34,7 @@ struct brw_surface_format_info {
int input_vb;
int streamed_output_vb;
int color_processing;
+ int lossless_compression;
const char *name;
};
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index a086b43e11a..ae3cf728443 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -71,51 +71,6 @@ src_reg::src_reg()
init();
}
-src_reg::src_reg(float f)
-{
- init();
-
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_F;
- this->f = f;
-}
-
-src_reg::src_reg(uint32_t u)
-{
- init();
-
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_UD;
- this->ud = u;
-}
-
-src_reg::src_reg(int32_t i)
-{
- init();
-
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_D;
- this->d = i;
-}
-
-src_reg::src_reg(uint8_t vf[4])
-{
- init();
-
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_VF;
- memcpy(&this->ud, vf, sizeof(unsigned));
-}
-
-src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
-{
- init();
-
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_VF;
- this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24);
-}
-
src_reg::src_reg(struct brw_reg reg) :
backend_reg(reg)
{
@@ -382,7 +337,9 @@ vec4_visitor::opt_vector_float()
remaining_channels &= ~inst->dst.writemask;
if (remaining_channels == 0) {
- vec4_instruction *mov = MOV(inst->dst, imm);
+ unsigned vf;
+ memcpy(&vf, imm, sizeof(vf));
+ vec4_instruction *mov = MOV(inst->dst, brw_imm_vf(vf));
mov->dst.type = BRW_REGISTER_TYPE_F;
mov->dst.writemask = WRITEMASK_XYZW;
inst->insert_after(block, mov);
@@ -657,13 +614,13 @@ vec4_visitor::opt_algebraic()
inst->opcode = BRW_OPCODE_MOV;
switch (inst->src[0].type) {
case BRW_REGISTER_TYPE_F:
- inst->src[0] = src_reg(0.0f);
+ inst->src[0] = brw_imm_f(0.0f);
break;
case BRW_REGISTER_TYPE_D:
- inst->src[0] = src_reg(0);
+ inst->src[0] = brw_imm_d(0);
break;
case BRW_REGISTER_TYPE_UD:
- inst->src[0] = src_reg(0u);
+ inst->src[0] = brw_imm_ud(0u);
break;
default:
unreachable("not reached");
@@ -1232,7 +1189,7 @@ vec4_visitor::eliminate_find_live_channel()
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
if (depth == 0) {
inst->opcode = BRW_OPCODE_MOV;
- inst->src[0] = src_reg(0);
+ inst->src[0] = brw_imm_d(0);
inst->force_writemask_all = true;
progress = true;
}
@@ -1701,7 +1658,7 @@ vec4_visitor::emit_shader_time_end()
*/
src_reg reset_end = shader_end_time;
reset_end.swizzle = BRW_SWIZZLE_ZZZZ;
- vec4_instruction *test = emit(AND(dst_null_d(), reset_end, src_reg(1u)));
+ vec4_instruction *test = emit(AND(dst_null_ud(), reset_end, brw_imm_ud(1u)));
test->conditional_mod = BRW_CONDITIONAL_Z;
emit(IF(BRW_PREDICATE_NORMAL));
@@ -1715,12 +1672,12 @@ vec4_visitor::emit_shader_time_end()
* is 2 cycles. Remove that overhead, so I can forget about that when
* trying to determine the time taken for single instructions.
*/
- emit(ADD(diff, src_reg(diff), src_reg(-2u)));
+ emit(ADD(diff, src_reg(diff), brw_imm_ud(-2u)));
emit_shader_time_write(0, src_reg(diff));
- emit_shader_time_write(1, src_reg(1u));
+ emit_shader_time_write(1, brw_imm_ud(1u));
emit(BRW_OPCODE_ELSE);
- emit_shader_time_write(2, src_reg(1u));
+ emit_shader_time_write(2, brw_imm_ud(1u));
emit(BRW_OPCODE_ENDIF);
}
@@ -1736,7 +1693,7 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
offset.type = BRW_REGISTER_TYPE_UD;
int index = shader_time_index * 3 + shader_time_subindex;
- emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE)));
+ emit(MOV(offset, brw_imm_d(index * SHADER_TIME_STRIDE)));
time.type = BRW_REGISTER_TYPE_UD;
emit(MOV(time, value));
@@ -1762,11 +1719,6 @@ vec4_visitor::convert_to_hw_regs()
reg.negate = src.negate;
break;
- case IMM:
- reg = brw_imm_reg(src.type);
- reg.ud = src.ud;
- break;
-
case UNIFORM:
reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
(src.nr + src.reg_offset) / 2,
@@ -1783,6 +1735,7 @@ vec4_visitor::convert_to_hw_regs()
case ARF:
case FIXED_GRF:
+ case IMM:
continue;
case BAD_FILE:
@@ -1978,13 +1931,19 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *prog_data,
- const nir_shader *shader,
+ const nir_shader *src_shader,
gl_clip_plane *clip_planes,
bool use_legacy_snorm_formula,
int shader_time_index,
unsigned *final_assembly_size,
char **error_str)
{
+ nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+ shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+ compiler->scalar_stage[MESA_SHADER_VERTEX]);
+ shader = brw_postprocess_nir(shader, compiler->devinfo,
+ compiler->scalar_stage[MESA_SHADER_VERTEX]);
+
const unsigned *assembly = NULL;
unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read);
@@ -2002,7 +1961,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
* Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in
* vec4 mode, the hardware appears to wedge unless we read something.
*/
- if (compiler->scalar_vs)
+ if (compiler->scalar_stage[MESA_SHADER_VERTEX])
prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2);
else
prog_data->base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attributes, 1), 2);
@@ -2021,7 +1980,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
else
prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4);
- if (compiler->scalar_vs) {
+ if (compiler->scalar_stage[MESA_SHADER_VERTEX]) {
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 52d68c5a33d..f94f7128a07 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -276,14 +276,9 @@ public:
uint32_t surface, src_reg surface_reg,
uint32_t sampler, src_reg sampler_reg);
- uint32_t gather_channel(unsigned gather_component,
- uint32_t surface, uint32_t sampler);
src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
src_reg sampler);
void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
- void swizzle_result(ir_texture_opcode op, dst_reg dest,
- src_reg orig_val, uint32_t sampler,
- const glsl_type *dest_type);
void emit_ndc_computation();
void emit_psiz_and_flags(dst_reg reg);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
index a76a4ce4639..be1427c7db7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
@@ -484,7 +484,7 @@ namespace brw {
const dst_reg x_times_one_minus_a = vgrf(dst.type);
MUL(y_times_a, y, a);
- ADD(one_minus_a, negate(a), src_reg(1.0f));
+ ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 1a09f76a20c..b13d36e2c7d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -30,6 +30,7 @@
#include "brw_vec4_gs_visitor.h"
#include "gen6_gs_visitor.h"
#include "brw_fs.h"
+#include "brw_nir.h"
namespace brw {
@@ -153,7 +154,7 @@ vec4_gs_visitor::emit_prolog()
*/
this->current_annotation = "clear r0.2";
dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
- vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, 0u);
+ vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u));
inst->force_writemask_all = true;
/* Create a virtual register to hold the vertex count */
@@ -161,7 +162,7 @@ vec4_gs_visitor::emit_prolog()
/* Initialize the vertex_count register to 0 */
this->current_annotation = "initialize vertex_count";
- inst = emit(MOV(dst_reg(this->vertex_count), 0u));
+ inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u)));
inst->force_writemask_all = true;
if (c->control_data_header_size_bits > 0) {
@@ -176,7 +177,7 @@ vec4_gs_visitor::emit_prolog()
*/
if (c->control_data_header_size_bits <= 32) {
this->current_annotation = "initialize control data bits";
- inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+ inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
inst->force_writemask_all = true;
}
}
@@ -274,7 +275,7 @@ vec4_gs_visitor::emit_urb_write_header(int mrf)
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
inst->force_writemask_all = true;
emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
- (uint32_t) gs_prog_data->output_vertex_size_hwords);
+ brw_imm_ud(gs_prog_data->output_vertex_size_hwords));
}
@@ -354,11 +355,12 @@ vec4_gs_visitor::emit_control_data_bits()
src_reg dword_index(this, glsl_type::uint_type);
if (urb_write_flags) {
src_reg prev_count(this, glsl_type::uint_type);
- emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+ emit(ADD(dst_reg(prev_count), this->vertex_count,
+ brw_imm_ud(0xffffffffu)));
unsigned log2_bits_per_vertex =
_mesa_fls(c->control_data_bits_per_vertex);
emit(SHR(dst_reg(dword_index), prev_count,
- (uint32_t) (6 - log2_bits_per_vertex)));
+ brw_imm_ud(6 - log2_bits_per_vertex)));
}
/* Start building the URB write message. The first MRF gets a copy of
@@ -375,8 +377,9 @@ vec4_gs_visitor::emit_control_data_bits()
* the appropriate OWORD within the control data header.
*/
src_reg per_slot_offset(this, glsl_type::uint_type);
- emit(SHR(dst_reg(per_slot_offset), dword_index, 2u));
- emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
+ emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u)));
+ emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset,
+ brw_imm_ud(1u));
}
if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
@@ -388,10 +391,10 @@ vec4_gs_visitor::emit_control_data_bits()
* together.
*/
src_reg channel(this, glsl_type::uint_type);
- inst = emit(AND(dst_reg(channel), dword_index, 3u));
+ inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u)));
inst->force_writemask_all = true;
src_reg one(this, glsl_type::uint_type);
- inst = emit(MOV(dst_reg(one), 1u));
+ inst = emit(MOV(dst_reg(one), brw_imm_ud(1u)));
inst->force_writemask_all = true;
src_reg channel_mask(this, glsl_type::uint_type);
inst = emit(SHL(dst_reg(channel_mask), one, channel));
@@ -441,11 +444,11 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id)
/* reg::sid = stream_id */
src_reg sid(this, glsl_type::uint_type);
- emit(MOV(dst_reg(sid), stream_id));
+ emit(MOV(dst_reg(sid), brw_imm_ud(stream_id)));
/* reg:shift_count = 2 * (vertex_count - 1) */
src_reg shift_count(this, glsl_type::uint_type);
- emit(SHL(dst_reg(shift_count), this->vertex_count, 1u));
+ emit(SHL(dst_reg(shift_count), this->vertex_count, brw_imm_ud(1u)));
/* Note: we're relying on the fact that the GEN SHL instruction only pays
* attention to the lower 5 bits of its second source argument, so on this
@@ -503,8 +506,8 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
* vertex_count & (32 / bits_per_vertex - 1) == 0
*/
vec4_instruction *inst =
- emit(AND(dst_null_d(), this->vertex_count,
- (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
+ emit(AND(dst_null_ud(), this->vertex_count,
+ brw_imm_ud(32 / c->control_data_bits_per_vertex - 1)));
inst->conditional_mod = BRW_CONDITIONAL_Z;
emit(IF(BRW_PREDICATE_NORMAL));
@@ -512,7 +515,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
/* If vertex_count is 0, then no control data bits have been
* accumulated yet, so we skip emitting them.
*/
- emit(CMP(dst_null_d(), this->vertex_count, 0u,
+ emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u),
BRW_CONDITIONAL_NEQ));
emit(IF(BRW_PREDICATE_NORMAL));
emit_control_data_bits();
@@ -525,7 +528,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
* effect of any call to EndPrimitive() that the shader may have
* made before outputting its first vertex.
*/
- inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+ inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
inst->force_writemask_all = true;
}
emit(BRW_OPCODE_ENDIF);
@@ -586,9 +589,9 @@ vec4_gs_visitor::gs_end_primitive()
/* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
src_reg one(this, glsl_type::uint_type);
- emit(MOV(dst_reg(one), 1u));
+ emit(MOV(dst_reg(one), brw_imm_ud(1u)));
src_reg prev_count(this, glsl_type::uint_type);
- emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+ emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu)));
src_reg mask(this, glsl_type::uint_type);
/* Note: we're relying on the fact that the GEN SHL instruction only pays
* attention to the lower 5 bits of its second source argument, so on this
@@ -604,7 +607,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const struct brw_gs_prog_key *key,
struct brw_gs_prog_data *prog_data,
- const nir_shader *shader,
+ const nir_shader *src_shader,
struct gl_shader_program *shader_prog,
int shader_time_index,
unsigned *final_assembly_size,
@@ -614,6 +617,12 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
memset(&c, 0, sizeof(c));
c.key = *key;
+ nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+ shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+ compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
+ shader = brw_postprocess_nir(shader, compiler->devinfo,
+ compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
+
prog_data->include_primitive_id =
(shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0;
@@ -773,7 +782,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
if (compiler->devinfo->gen == 6)
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (output_size_bytes > max_output_size_bytes)
- return false;
+ return NULL;
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
@@ -819,7 +828,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
brw_print_vue_map(stderr, &prog_data->base.vue_map);
}
- if (compiler->scalar_gs) {
+ if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) {
/* TODO: Support instanced GS. We have basically no tests... */
assert(prog_data->invocations == 1);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index bf098b41590..260b515ad42 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -122,7 +122,7 @@ vec4_visitor::nir_setup_inputs()
{
nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
for (unsigned i = 0; i < nir->num_inputs; i++) {
- nir_inputs[i] = dst_reg();
+ nir_inputs[i] = src_reg();
}
nir_foreach_variable(var, &nir->inputs) {
@@ -373,7 +373,7 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
}
reg.writemask = writemask;
- emit(MOV(reg, src_reg(instr->value.i[i])));
+ emit(MOV(reg, brw_imm_d(instr->value.i[i])));
remaining &= ~writemask;
}
@@ -444,10 +444,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
inst->base_mrf = 2;
inst->mlen = 1; /* always at least one */
- inst->src[1] = src_reg(index);
+ inst->src[1] = brw_imm_ud(index);
/* MRF for the first parameter */
- src_reg lod = src_reg(0);
+ src_reg lod = brw_imm_d(0);
int param_base = inst->base_mrf;
int writemask = WRITEMASK_X;
emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
@@ -471,12 +471,12 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
if (const_uniform_block) {
unsigned index = prog_data->base.binding_table.ssbo_start +
const_uniform_block->u[0];
- surf_index = src_reg(index);
+ surf_index = brw_imm_ud(index);
brw_mark_surface_used(&prog_data->base, index);
} else {
surf_index = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
- src_reg(prog_data->base.binding_table.ssbo_start)));
+ brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
surf_index = emit_uniformize(surf_index);
brw_mark_surface_used(&prog_data->base,
@@ -491,7 +491,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
} else {
const_offset_bytes = instr->const_index[0];
- emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+ emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset_bytes)));
}
/* Value */
@@ -566,7 +566,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
if (skipped_channels > 0) {
if (!has_indirect) {
const_offset_bytes += 4 * skipped_channels;
- offset_reg = src_reg(const_offset_bytes);
+ offset_reg = brw_imm_ud(const_offset_bytes);
} else {
emit(ADD(dst_reg(offset_reg), offset_reg,
brw_imm_ud(4 * skipped_channels)));
@@ -614,13 +614,13 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
if (const_uniform_block) {
unsigned index = prog_data->base.binding_table.ssbo_start +
const_uniform_block->u[0];
- surf_index = src_reg(index);
+ surf_index = brw_imm_ud(index);
brw_mark_surface_used(&prog_data->base, index);
} else {
surf_index = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
- src_reg(prog_data->base.binding_table.ssbo_start)));
+ brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
surf_index = emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
@@ -637,7 +637,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1)));
} else {
const_offset_bytes = instr->const_index[0];
- emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+ emit(MOV(dst_reg(offset_reg), brw_imm_ud((const_offset_bytes))));
}
/* Read the vector */
@@ -762,7 +762,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
*/
const unsigned index = prog_data->base.binding_table.ubo_start +
const_block_index->u[0];
- surf_index = src_reg(index);
+ surf_index = brw_imm_ud(index);
brw_mark_surface_used(&prog_data->base, index);
} else {
/* The block index is not a constant. Evaluate the index expression
@@ -772,7 +772,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
surf_index = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int,
instr->num_components),
- src_reg(prog_data->base.binding_table.ubo_start)));
+ brw_imm_ud(prog_data->base.binding_table.ubo_start)));
surf_index = emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
@@ -787,11 +787,11 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg offset;
if (!has_indirect) {
- offset = src_reg(const_offset / 16);
+ offset = brw_imm_ud(const_offset / 16);
} else {
offset = src_reg(this, glsl_type::uint_type);
emit(SHR(dst_reg(offset), get_nir_src(instr->src[1], nir_type_int, 1),
- src_reg(4u)));
+ brw_imm_ud(4u)));
}
src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
@@ -848,12 +848,12 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
if (const_surface) {
unsigned surf_index = prog_data->base.binding_table.ssbo_start +
const_surface->u[0];
- surface = src_reg(surf_index);
+ surface = brw_imm_ud(surf_index);
brw_mark_surface_used(&prog_data->base, surf_index);
} else {
surface = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
- src_reg(prog_data->base.binding_table.ssbo_start)));
+ brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
/* Assume this may touch any UBO. This is the same we do for other
* UBO/SSBO accesses with non-constant surface.
@@ -1174,8 +1174,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
brw_conditional_for_nir_comparison(instr->op)));
- emit(MOV(dst, src_reg(0)));
- inst = emit(MOV(dst, src_reg(~0)));
+ emit(MOV(dst, brw_imm_d(0)));
+ inst = emit(MOV(dst, brw_imm_d(~0)));
inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
break;
}
@@ -1192,8 +1192,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
brw_conditional_for_nir_comparison(instr->op)));
- emit(MOV(dst, src_reg(0)));
- inst = emit(MOV(dst, src_reg(~0)));
+ emit(MOV(dst, brw_imm_d(0)));
+ inst = emit(MOV(dst, brw_imm_d(~0)));
inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
break;
}
@@ -1235,11 +1235,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_f2b:
- emit(CMP(dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
+ emit(CMP(dst, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ));
break;
case nir_op_i2b:
- emit(CMP(dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+ emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
break;
case nir_op_fnoise1_1:
@@ -1321,9 +1321,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
* subtract the result from 31 to convert the MSB count into an LSB count.
*/
src_reg src(dst);
- emit(CMP(dst_null_d(), src, src_reg(-1), BRW_CONDITIONAL_NZ));
+ emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ));
- inst = emit(ADD(dst, src, src_reg(31)));
+ inst = emit(ADD(dst, src, brw_imm_d(31)));
inst->predicate = BRW_PREDICATE_NORMAL;
inst->src[0].negate = true;
break;
@@ -1364,13 +1364,13 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
* Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
* zero.
*/
- emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
+ emit(CMP(dst_null_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ));
op[0].type = BRW_REGISTER_TYPE_UD;
dst.type = BRW_REGISTER_TYPE_UD;
- emit(AND(dst, op[0], src_reg(0x80000000u)));
+ emit(AND(dst, op[0], brw_imm_ud(0x80000000u)));
- inst = emit(OR(dst, src_reg(dst), src_reg(0x3f800000u)));
+ inst = emit(OR(dst, src_reg(dst), brw_imm_ud(0x3f800000u)));
inst->predicate = BRW_PREDICATE_NORMAL;
dst.type = BRW_REGISTER_TYPE_F;
@@ -1385,9 +1385,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
* -> non-negative val generates 0x00000000.
* Predicated OR sets 1 if val is positive.
*/
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
- emit(ASR(dst, op[0], src_reg(31)));
- inst = emit(OR(dst, src_reg(dst), src_reg(1)));
+ emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G));
+ emit(ASR(dst, op[0], brw_imm_d(31)));
+ inst = emit(OR(dst, src_reg(dst), brw_imm_d(1)));
inst->predicate = BRW_PREDICATE_NORMAL;
break;
@@ -1418,7 +1418,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_bcsel:
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+ emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]);
switch (dst.writemask) {
case WRITEMASK_X:
@@ -1465,10 +1465,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
unsigned swiz =
brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
- emit(CMP(dst_null_d(), swizzle(op[0], swiz), src_reg(0),
+ emit(CMP(dst_null_d(), swizzle(op[0], swiz), brw_imm_d(0),
BRW_CONDITIONAL_NZ));
- emit(MOV(dst, src_reg(0)));
- inst = emit(MOV(dst, src_reg(~0)));
+ emit(MOV(dst, brw_imm_d(0)));
+ inst = emit(MOV(dst, brw_imm_d(~0)));
inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
break;
}
@@ -1502,7 +1502,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
dst_reg masked = dst_reg(this, glsl_type::int_type);
masked.writemask = dst.writemask;
- emit(AND(masked, src_reg(dst), src_reg(1)));
+ emit(AND(masked, src_reg(dst), brw_imm_d(1)));
src_reg masked_neg = src_reg(masked);
masked_neg.negate = true;
emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg));
@@ -1551,6 +1551,7 @@ ir_texture_opcode_for_nir_texop(nir_texop texop)
case nir_texop_txf_ms: op = ir_txf_ms; break;
case nir_texop_txl: op = ir_txl; break;
case nir_texop_txs: op = ir_txs; break;
+ case nir_texop_samples_identical: op = ir_samples_identical; break;
default:
unreachable("unknown texture opcode");
}
@@ -1566,7 +1567,7 @@ glsl_type_for_nir_alu_type(nir_alu_type alu_type,
return glsl_type::vec(components);
case nir_type_int:
return glsl_type::ivec(components);
- case nir_type_unsigned:
+ case nir_type_uint:
return glsl_type::uvec(components);
case nir_type_bool:
return glsl_type::bvec(components);
@@ -1582,8 +1583,8 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
{
unsigned texture = instr->texture_index;
unsigned sampler = instr->sampler_index;
- src_reg texture_reg = src_reg(texture);
- src_reg sampler_reg = src_reg(sampler);
+ src_reg texture_reg = brw_imm_ud(texture);
+ src_reg sampler_reg = brw_imm_ud(sampler);
src_reg coordinate;
const glsl_type *coord_type = NULL;
src_reg shadow_comparitor;
@@ -1597,17 +1598,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
nir_tex_instr_dest_size(instr));
dst_reg dest = get_nir_dest(instr->dest, instr->dest_type);
- /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
- * emitting anything other than setting up the constant result.
- */
- if (instr->op == nir_texop_tg4) {
- int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component);
- if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
- emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
- return;
- }
- }
-
/* Load the texture operation sources */
for (unsigned i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
@@ -1622,6 +1612,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
switch (instr->op) {
case nir_texop_txf:
case nir_texop_txf_ms:
+ case nir_texop_samples_identical:
coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D,
src_size);
coord_type = glsl_type::ivec(src_size);
@@ -1661,14 +1652,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
case nir_tex_src_ms_index: {
sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
- assert(coord_type != NULL);
- if (devinfo->gen >= 7 &&
- key_tex->compressed_multisample_layout_mask & (1 << texture)) {
- mcs = emit_mcs_fetch(coord_type, coordinate, texture_reg);
- } else {
- mcs = src_reg(0u);
- }
- mcs = retype(mcs, BRW_REGISTER_TYPE_UD);
break;
}
@@ -1693,7 +1676,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
/* Emit code to evaluate the actual indexing expression */
src_reg src = get_nir_src(instr->src[i].src, 1);
src_reg temp(this, glsl_type::uint_type);
- emit(ADD(dst_reg(temp), src, src_reg(texture)));
+ emit(ADD(dst_reg(temp), src, brw_imm_ud(texture)));
texture_reg = emit_uniformize(temp);
break;
}
@@ -1702,7 +1685,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
/* Emit code to evaluate the actual indexing expression */
src_reg src = get_nir_src(instr->src[i].src, 1);
src_reg temp(this, glsl_type::uint_type);
- emit(ADD(dst_reg(temp), src, src_reg(sampler)));
+ emit(ADD(dst_reg(temp), src, brw_imm_ud(sampler)));
sampler_reg = emit_uniformize(temp);
break;
}
@@ -1718,6 +1701,17 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
}
}
+ if (instr->op == nir_texop_txf_ms ||
+ instr->op == nir_texop_samples_identical) {
+ assert(coord_type != NULL);
+ if (devinfo->gen >= 7 &&
+ key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
+ mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
+ } else {
+ mcs = brw_imm_ud(0u);
+ }
+ }
+
uint32_t constant_offset = 0;
for (unsigned i = 0; i < 3; i++) {
if (instr->const_offset[i] != 0) {
@@ -1727,8 +1721,17 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
}
/* Stuff the channel select bits in the top of the texture offset */
- if (instr->op == nir_texop_tg4)
- constant_offset |= gather_channel(instr->component, texture, sampler) << 16;
+ if (instr->op == nir_texop_tg4) {
+ if (instr->component == 1 &&
+ (key_tex->gather_channel_quirk_mask & (1 << texture))) {
+ /* gather4 sampler is broken for green channel on RG32F --
+ * we must ask for blue instead.
+ */
+ constant_offset |= 2 << 16;
+ } else {
+ constant_offset |= instr->component << 16;
+ }
+ }
ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
index a7c286d3ac1..28002c56cdc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
@@ -71,7 +71,7 @@ namespace {
bld.MOV(writemask(tmp, mask), src);
if (n < 4)
- bld.MOV(writemask(tmp, ~mask), 0);
+ bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
}
@@ -143,7 +143,7 @@ namespace brw {
/* Emit the message send instruction. */
const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
vec4_instruction *inst =
- bld.emit(op, dst, src_reg(payload), usurface, arg);
+ bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
inst->mlen = sz;
inst->regs_written = ret_sz;
inst->header_size = header_sz;
@@ -235,7 +235,7 @@ namespace brw {
const vec4_builder ubld = bld.exec_all();
const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
- ubld.MOV(dst, src_reg(0));
+ ubld.MOV(dst, brw_imm_d(0));
if (bld.shader->devinfo->gen == 7 &&
!bld.shader->devinfo->is_haswell) {
@@ -243,7 +243,7 @@ namespace brw {
* have no SIMD4x2 variant. We only use the two X channels
* in that case, mask everything else out.
*/
- ubld.MOV(writemask(dst, WRITEMASK_W), src_reg(0x11));
+ ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11));
}
return src_reg(dst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 6b8798da71c..caf1ee02bf0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -408,7 +408,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
* You should inspect the disasm output in order to verify that the MOV is
* not optimized away.
*/
- emit(MOV(tmp_dst, src_reg(0x12345678u)));
+ emit(MOV(tmp_dst, brw_imm_ud(0x12345678u)));
#endif
/* Give tmp the form below, where "." means untouched.
@@ -427,7 +427,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
* 0xhhhh0000
*/
tmp_src.swizzle = BRW_SWIZZLE_YYYY;
- emit(SHL(dst, tmp_src, src_reg(16u)));
+ emit(SHL(dst, tmp_src, brw_imm_ud(16u)));
/* Finally, give the write-channels of dst the form of packHalf2x16's
* output:
@@ -466,10 +466,10 @@ vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
src_reg tmp_src(tmp_dst);
tmp_dst.writemask = WRITEMASK_X;
- emit(AND(tmp_dst, src0, src_reg(0xffffu)));
+ emit(AND(tmp_dst, src0, brw_imm_ud(0xffffu)));
tmp_dst.writemask = WRITEMASK_Y;
- emit(SHR(tmp_dst, src0, src_reg(16u)));
+ emit(SHR(tmp_dst, src0, brw_imm_ud(16u)));
dst.writemask = WRITEMASK_XY;
emit(F16TO32(dst, tmp_src));
@@ -484,7 +484,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
* vector float and a type-converting MOV.
*/
dst_reg shift(this, glsl_type::uvec4_type);
- emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
+ emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78)));
dst_reg shifted(this, glsl_type::uvec4_type);
src0.swizzle = BRW_SWIZZLE_XXXX;
@@ -494,7 +494,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
dst_reg f(this, glsl_type::vec4_type);
emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
- emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
+ emit(MUL(dst, src_reg(f), brw_imm_f(1.0f / 255.0f)));
}
void
@@ -506,7 +506,7 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
* vector float and a type-converting MOV.
*/
dst_reg shift(this, glsl_type::uvec4_type);
- emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
+ emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78)));
dst_reg shifted(this, glsl_type::uvec4_type);
src0.swizzle = BRW_SWIZZLE_XXXX;
@@ -517,11 +517,11 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
dst_reg scaled(this, glsl_type::vec4_type);
- emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));
+ emit(MUL(scaled, src_reg(f), brw_imm_f(1.0f / 127.0f)));
dst_reg max(this, glsl_type::vec4_type);
- emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f));
- emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f));
+ emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), brw_imm_f(-1.0f));
+ emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), brw_imm_f(1.0f));
}
void
@@ -532,7 +532,7 @@ vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0)
inst->saturate = true;
dst_reg scaled(this, glsl_type::vec4_type);
- emit(MUL(scaled, src_reg(saturated), src_reg(255.0f)));
+ emit(MUL(scaled, src_reg(saturated), brw_imm_f(255.0f)));
dst_reg rounded(this, glsl_type::vec4_type);
emit(RNDE(rounded, src_reg(scaled)));
@@ -548,13 +548,13 @@ void
vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
{
dst_reg max(this, glsl_type::vec4_type);
- emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f));
+ emit_minmax(BRW_CONDITIONAL_GE, max, src0, brw_imm_f(-1.0f));
dst_reg min(this, glsl_type::vec4_type);
- emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f));
+ emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), brw_imm_f(1.0f));
dst_reg scaled(this, glsl_type::vec4_type);
- emit(MUL(scaled, src_reg(min), src_reg(127.0f)));
+ emit(MUL(scaled, src_reg(min), brw_imm_f(127.0f)));
dst_reg rounded(this, glsl_type::vec4_type);
emit(RNDE(rounded, src_reg(scaled)));
@@ -716,7 +716,7 @@ vec4_visitor::emit_lrp(const dst_reg &dst,
x_times_one_minus_a.writemask = dst.writemask;
emit(MUL(y_times_a, y, a));
- emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
+ emit(ADD(one_minus_a, negate(a), brw_imm_f(1.0f)));
emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
}
@@ -850,7 +850,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
coordinate));
emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
- src_reg(0)));
+ brw_imm_d(0)));
emit(inst);
return src_reg(inst->dst);
@@ -892,7 +892,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
*/
if (op == ir_tex || op == ir_query_levels) {
assert(lod.file == BAD_FILE);
- lod = src_reg(0.0f);
+ lod = brw_imm_f(0.0f);
}
enum opcode opcode;
@@ -912,12 +912,18 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
unreachable("TXB is not valid for vertex shaders.");
case ir_lod:
unreachable("LOD is not valid for vertex shaders.");
+ case ir_samples_identical: {
+ /* There are some challenges implementing this for vec4, and it seems
+ * unlikely to be used anyway. For now, just return false ways.
+ */
+ emit(MOV(dest, brw_imm_ud(0u)));
+ return;
+ }
default:
unreachable("Unrecognized tex op");
}
- vec4_instruction *inst = new(mem_ctx) vec4_instruction(
- opcode, dst_reg(this, dest_type));
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode, dest);
inst->offset = constant_offset;
@@ -963,7 +969,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
if (zero_mask != 0) {
emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
- src_reg(0)));
+ brw_imm_d(0)));
}
/* Load the shadow comparitor */
if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
@@ -1062,15 +1068,20 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
if (op == ir_txs && is_cube_array) {
emit_math(SHADER_OPCODE_INT_QUOTIENT,
writemask(inst->dst, WRITEMASK_Z),
- src_reg(inst->dst), src_reg(6));
+ src_reg(inst->dst), brw_imm_d(6));
}
if (devinfo->gen == 6 && op == ir_tg4) {
emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst);
}
- swizzle_result(op, dest,
- src_reg(inst->dst), sampler, dest_type);
+ if (op == ir_query_levels) {
+ /* # levels is in .w */
+ src_reg swizzled(dest);
+ swizzled.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W,
+ SWIZZLE_W, SWIZZLE_W);
+ emit(MOV(dest, swizzled));
+ }
}
/**
@@ -1087,7 +1098,7 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
dst_f.type = BRW_REGISTER_TYPE_F;
/* Convert from UNORM to UINT */
- emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1))));
+ emit(MUL(dst_f, src_reg(dst_f), brw_imm_f((float)((1 << width) - 1))));
emit(MOV(dst, src_reg(dst_f)));
if (wa & WA_SIGN) {
@@ -1095,90 +1106,8 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
* shifting the sign bit into place, then shifting back
* preserving sign.
*/
- emit(SHL(dst, src_reg(dst), src_reg(32 - width)));
- emit(ASR(dst, src_reg(dst), src_reg(32 - width)));
- }
-}
-
-/**
- * Set up the gather channel based on the swizzle, for gather4.
- */
-uint32_t
-vec4_visitor::gather_channel(unsigned gather_component,
- uint32_t surface, uint32_t sampler)
-{
- int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
- switch (swiz) {
- case SWIZZLE_X: return 0;
- case SWIZZLE_Y:
- /* gather4 sampler is broken for green channel on RG32F --
- * we must ask for blue instead.
- */
- if (key_tex->gather_channel_quirk_mask & (1 << surface))
- return 2;
- return 1;
- case SWIZZLE_Z: return 2;
- case SWIZZLE_W: return 3;
- default:
- unreachable("Not reached"); /* zero, one swizzles handled already */
- }
-}
-
-void
-vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
- src_reg orig_val, uint32_t sampler,
- const glsl_type *dest_type)
-{
- int s = key_tex->swizzles[sampler];
-
- dst_reg swizzled_result = dest;
-
- if (op == ir_query_levels) {
- /* # levels is in .w */
- orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
- emit(MOV(swizzled_result, orig_val));
- return;
- }
-
- if (op == ir_txs || dest_type == glsl_type::float_type
- || s == SWIZZLE_NOOP || op == ir_tg4) {
- emit(MOV(swizzled_result, orig_val));
- return;
- }
-
-
- int zero_mask = 0, one_mask = 0, copy_mask = 0;
- int swizzle[4] = {0};
-
- for (int i = 0; i < 4; i++) {
- switch (GET_SWZ(s, i)) {
- case SWIZZLE_ZERO:
- zero_mask |= (1 << i);
- break;
- case SWIZZLE_ONE:
- one_mask |= (1 << i);
- break;
- default:
- copy_mask |= (1 << i);
- swizzle[i] = GET_SWZ(s, i);
- break;
- }
- }
-
- if (copy_mask) {
- orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
- swizzled_result.writemask = copy_mask;
- emit(MOV(swizzled_result, orig_val));
- }
-
- if (zero_mask) {
- swizzled_result.writemask = zero_mask;
- emit(MOV(swizzled_result, src_reg(0.0f)));
- }
-
- if (one_mask) {
- swizzled_result.writemask = one_mask;
- emit(MOV(swizzled_result, src_reg(1.0f)));
+ emit(SHL(dst, src_reg(dst), brw_imm_d(32 - width)));
+ emit(ASR(dst, src_reg(dst), brw_imm_d(32 - width)));
}
}
@@ -1225,7 +1154,7 @@ vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
*/
vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
src_payload,
- src_reg(surf_index), src_reg(atomic_op));
+ brw_imm_ud(surf_index), brw_imm_ud(atomic_op));
inst->mlen = mlen;
}
@@ -1245,7 +1174,7 @@ vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
*/
vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
src_reg(offset),
- src_reg(surf_index), src_reg(1));
+ brw_imm_ud(surf_index), brw_imm_d(1));
inst->mlen = 1;
}
@@ -1286,14 +1215,14 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
dst_reg header1_w = header1;
header1_w.writemask = WRITEMASK_W;
- emit(MOV(header1, 0u));
+ emit(MOV(header1, brw_imm_ud(0u)));
if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
current_annotation = "Point size";
- emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
- emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
+ emit(MUL(header1_w, psiz, brw_imm_f((float)(1 << 11))));
+ emit(AND(header1_w, src_reg(header1_w), brw_imm_d(0x7ff << 8)));
}
if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
@@ -1301,13 +1230,13 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
- emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
- emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
+ emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
+ emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0));
emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
- emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
- emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
- emit(SHL(flags1, src_reg(flags1), src_reg(4)));
+ emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
+ emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0));
+ emit(SHL(flags1, src_reg(flags1), brw_imm_d(4)));
emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
}
@@ -1324,20 +1253,20 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) {
src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
ndc_w.swizzle = BRW_SWIZZLE_WWWW;
- emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
+ emit(CMP(dst_null_f(), ndc_w, brw_imm_f(0.0f), BRW_CONDITIONAL_L));
vec4_instruction *inst;
- inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
+ inst = emit(OR(header1_w, src_reg(header1_w), brw_imm_ud(1u << 6)));
inst->predicate = BRW_PREDICATE_NORMAL;
output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
- inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
+ inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], brw_imm_f(0.0f)));
inst->predicate = BRW_PREDICATE_NORMAL;
}
emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
} else if (devinfo->gen < 6) {
- emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
+ emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)));
} else {
- emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
+ emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), brw_imm_d(0)));
if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
dst_reg reg_w = reg;
reg_w.writemask = WRITEMASK_W;
@@ -1529,13 +1458,13 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
src_reg index = src_reg(this, glsl_type::int_type);
emit_before(block, inst, ADD(dst_reg(index), *reladdr,
- src_reg(reg_offset)));
+ brw_imm_d(reg_offset)));
emit_before(block, inst, MUL(dst_reg(index), index,
- src_reg(message_header_scale)));
+ brw_imm_d(message_header_scale)));
return index;
} else {
- return src_reg(reg_offset * message_header_scale);
+ return brw_imm_d(reg_offset * message_header_scale);
}
}
@@ -1547,24 +1476,24 @@ vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
src_reg index = src_reg(this, glsl_type::int_type);
emit_before(block, inst, ADD(dst_reg(index), *reladdr,
- src_reg(reg_offset)));
+ brw_imm_d(reg_offset)));
/* Pre-gen6, the message header uses byte offsets instead of vec4
* (16-byte) offset units.
*/
if (devinfo->gen < 6) {
- emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16)));
+ emit_before(block, inst, MUL(dst_reg(index), index, brw_imm_d(16)));
}
return index;
} else if (devinfo->gen >= 8) {
/* Store the offset in a GRF so we can send-from-GRF. */
src_reg offset = src_reg(this, glsl_type::int_type);
- emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset)));
+ emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset)));
return offset;
} else {
int message_header_scale = devinfo->gen < 6 ? 16 : 1;
- return src_reg(reg_offset * message_header_scale);
+ return brw_imm_d(reg_offset * message_header_scale);
}
}
@@ -1753,7 +1682,7 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
reg_offset);
emit_pull_constant_load_reg(temp,
- src_reg(index),
+ brw_imm_ud(index),
offset,
block, inst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index 5dd4f98cecc..fd8be7d972c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -50,7 +50,7 @@ vec4_vs_visitor::emit_prolog()
dst_reg dst = reg;
dst.type = brw_type_for_base_type(glsl_type::vec4_type);
dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
- emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
+ emit(MUL(dst, src_reg(dst), brw_imm_f(1.0f / 65536.0f)));
}
/* Do sign recovery for 2101010 formats if required. */
@@ -58,8 +58,8 @@ vec4_vs_visitor::emit_prolog()
if (sign_recovery_shift.file == BAD_FILE) {
/* shift constant: <22,22,22,30> */
sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
- emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
- emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
+ emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), brw_imm_ud(22u)));
+ emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), brw_imm_ud(30u)));
}
emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
@@ -87,16 +87,16 @@ vec4_vs_visitor::emit_prolog()
/* mul constant: 1 / (2^(b-1) - 1) */
es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ),
- src_reg(1.0f / ((1<<9) - 1))));
+ brw_imm_f(1.0f / ((1<<9) - 1))));
emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W),
- src_reg(1.0f / ((1<<1) - 1))));
+ brw_imm_f(1.0f / ((1<<1) - 1))));
}
dst_reg dst = reg;
dst.type = brw_type_for_base_type(glsl_type::vec4_type);
emit(MOV(dst, src_reg(reg_d)));
emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
- emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), src_reg(-1.0f));
+ emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), brw_imm_f(-1.0f));
} else {
/* The following equations are from the OpenGL 3.2 specification:
*
@@ -113,9 +113,9 @@ vec4_vs_visitor::emit_prolog()
/* 1 / (2^b - 1) for b=<10,10,10,2> */
normalize_factor = dst_reg(this, glsl_type::vec4_type);
emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ),
- src_reg(1.0f / ((1<<10) - 1))));
+ brw_imm_f(1.0f / ((1<<10) - 1))));
emit(MOV(writemask(normalize_factor, WRITEMASK_W),
- src_reg(1.0f / ((1<<2) - 1))));
+ brw_imm_f(1.0f / ((1<<2) - 1))));
}
dst_reg dst = reg;
@@ -124,8 +124,8 @@ vec4_vs_visitor::emit_prolog()
/* For signed normalization, we want the numerator to be 2c+1. */
if (wa_flags & BRW_ATTRIB_WA_SIGN) {
- emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
- emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
+ emit(MUL(dst, src_reg(dst), brw_imm_f(2.0f)));
+ emit(ADD(dst, src_reg(dst), brw_imm_f(1.0f)));
}
emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 967448e0e41..7c783f66864 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -48,6 +48,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
struct brw_vertex_program *vp,
struct brw_vs_prog_key *key)
{
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
GLuint program_size;
const GLuint *program;
struct brw_vs_prog_data prog_data;
@@ -79,7 +80,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
* by the state cache.
*/
int param_count = vp->program.Base.nir->num_uniforms;
- if (!brw->intelScreen->compiler->scalar_vs)
+ if (!compiler->scalar_stage[MESA_SHADER_VERTEX])
param_count *= 4;
if (vs)
@@ -102,7 +103,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
if (prog) {
brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base,
&prog_data.base.base,
- brw->intelScreen->compiler->scalar_vs);
+ compiler->scalar_stage[MESA_SHADER_VERTEX]);
} else {
brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base,
&prog_data.base.base);
@@ -173,7 +174,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
/* Emit GEN4 code.
*/
char *error_str;
- program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, key,
+ program = brw_compile_vs(compiler, brw, mem_ctx, key,
&prog_data, vp->program.Base.nir,
brw_select_clip_planes(&brw->ctx),
!_mesa_is_gles3(&brw->ctx),
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index 2fef188c17e..3840ce0fe57 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -65,7 +65,7 @@ gen6_gs_visitor::emit_prolog()
(prog_data->vue_map.num_slots + 1) *
nir->info.gs.vertices_out);
this->vertex_output_offset = src_reg(this, glsl_type::uint_type);
- emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u)));
+ emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
/* MRF 1 will be the header for all messages (FF_SYNC and URB_WRITES),
* so initialize it once to R0.
@@ -87,13 +87,13 @@ gen6_gs_visitor::emit_prolog()
* headers.
*/
this->first_vertex = src_reg(this, glsl_type::uint_type);
- emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START));
+ emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(URB_WRITE_PRIM_START)));
/* The FF_SYNC message requires to know the number of primitives generated,
* so keep a counter for this.
*/
this->prim_count = src_reg(this, glsl_type::uint_type);
- emit(MOV(dst_reg(this->prim_count), 0u));
+ emit(MOV(dst_reg(this->prim_count), brw_imm_ud(0u)));
if (gs_prog_data->gen6_xfb_enabled) {
/* Create a virtual register to hold destination indices in SOL */
@@ -170,7 +170,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
}
emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
+ this->vertex_output_offset, brw_imm_ud(1u)));
}
/* Now buffer flags for this vertex */
@@ -181,9 +181,9 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
/* If we are outputting points, then every vertex has PrimStart and
* PrimEnd set.
*/
- emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
- URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
- emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+ emit(MOV(dst, brw_imm_d((_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
+ URB_WRITE_PRIM_START | URB_WRITE_PRIM_END)));
+ emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u)));
} else {
/* Otherwise, we can only set the PrimStart flag, which we have stored
* in the first_vertex register. We will have to wait until we execute
@@ -191,11 +191,12 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
* vertex.
*/
emit(OR(dst, this->first_vertex,
- (gs_prog_data->output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
- emit(MOV(dst_reg(this->first_vertex), 0u));
+ brw_imm_ud(gs_prog_data->output_topology <<
+ URB_WRITE_PRIM_TYPE_SHIFT)));
+ emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(0u)));
}
emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
+ this->vertex_output_offset, brw_imm_ud(1u)));
}
void
@@ -218,10 +219,10 @@ gen6_gs_visitor::gs_end_primitive()
* below).
*/
unsigned num_output_vertices = nir->info.gs.vertices_out;
- emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
- BRW_CONDITIONAL_L));
- vec4_instruction *inst = emit(CMP(dst_null_d(),
- this->vertex_count, 0u,
+ emit(CMP(dst_null_ud(), this->vertex_count,
+ brw_imm_ud(num_output_vertices + 1), BRW_CONDITIONAL_L));
+ vec4_instruction *inst = emit(CMP(dst_null_ud(),
+ this->vertex_count, brw_imm_ud(0u),
BRW_CONDITIONAL_NEQ));
inst->predicate = BRW_PREDICATE_NORMAL;
emit(IF(BRW_PREDICATE_NORMAL));
@@ -231,19 +232,19 @@ gen6_gs_visitor::gs_end_primitive()
* vertex.
*/
src_reg offset(this, glsl_type::uint_type);
- emit(ADD(dst_reg(offset), this->vertex_output_offset, src_reg(-1)));
+ emit(ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1)));
src_reg dst(this->vertex_output);
dst.reladdr = ralloc(mem_ctx, src_reg);
memcpy(dst.reladdr, &offset, sizeof(src_reg));
- emit(OR(dst_reg(dst), dst, URB_WRITE_PRIM_END));
- emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+ emit(OR(dst_reg(dst), dst, brw_imm_d(URB_WRITE_PRIM_END)));
+ emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u)));
/* Set the first vertex flag to indicate that the next vertex will start
* a primitive.
*/
- emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START));
+ emit(MOV(dst_reg(this->first_vertex), brw_imm_d(URB_WRITE_PRIM_START)));
}
emit(BRW_OPCODE_ENDIF);
}
@@ -262,7 +263,8 @@ gen6_gs_visitor::emit_urb_write_header(int mrf)
*/
src_reg flags_offset(this, glsl_type::uint_type);
emit(ADD(dst_reg(flags_offset),
- this->vertex_output_offset, src_reg(prog_data->vue_map.num_slots)));
+ this->vertex_output_offset,
+ brw_imm_d(prog_data->vue_map.num_slots)));
src_reg flags_data(this->vertex_output);
flags_data.reladdr = ralloc(mem_ctx, src_reg);
@@ -321,7 +323,7 @@ gen6_gs_visitor::emit_thread_end()
* points because in the point case we set PrimEnd on all vertices.
*/
if (nir->info.gs.output_primitive != GL_POINTS) {
- emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
+ emit(CMP(dst_null_ud(), this->first_vertex, brw_imm_ud(0u), BRW_CONDITIONAL_Z));
emit(IF(BRW_PREDICATE_NORMAL));
gs_end_primitive();
emit(BRW_OPCODE_ENDIF);
@@ -347,7 +349,7 @@ gen6_gs_visitor::emit_thread_end()
int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
/* Issue the FF_SYNC message and obtain the initial VUE handle. */
- emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G));
+ emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_G));
emit(IF(BRW_PREDICATE_NORMAL));
{
this->current_annotation = "gen6 thread end: ff_sync";
@@ -364,15 +366,15 @@ gen6_gs_visitor::emit_thread_end()
dst_reg(this->temp), this->prim_count, this->svbi);
} else {
inst = emit(GS_OPCODE_FF_SYNC,
- dst_reg(this->temp), this->prim_count, src_reg(0u));
+ dst_reg(this->temp), this->prim_count, brw_imm_ud(0u));
}
inst->base_mrf = base_mrf;
/* Loop over all buffered vertices and emit URB write messages */
this->current_annotation = "gen6 thread end: urb writes init";
src_reg vertex(this, glsl_type::uint_type);
- emit(MOV(dst_reg(vertex), 0u));
- emit(MOV(dst_reg(this->vertex_output_offset), 0u));
+ emit(MOV(dst_reg(vertex), brw_imm_ud(0u)));
+ emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
this->current_annotation = "gen6 thread end: urb writes";
emit(BRW_OPCODE_DO);
@@ -416,7 +418,7 @@ gen6_gs_visitor::emit_thread_end()
mrf++;
emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
+ this->vertex_output_offset, brw_imm_ud(1u)));
/* If this was max_usable_mrf, we can't fit anything more into
* this URB WRITE. Same if we reached the max. message length.
@@ -437,9 +439,9 @@ gen6_gs_visitor::emit_thread_end()
* writing the next vertex.
*/
emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
+ this->vertex_output_offset, brw_imm_ud(1u)));
- emit(ADD(dst_reg(vertex), vertex, 1u));
+ emit(ADD(dst_reg(vertex), vertex, brw_imm_ud(1u)));
}
emit(BRW_OPCODE_WHILE);
@@ -468,8 +470,8 @@ gen6_gs_visitor::emit_thread_end()
if (gs_prog_data->gen6_xfb_enabled) {
/* When emitting EOT, set SONumPrimsWritten Increment Value. */
src_reg data(this, glsl_type::uint_type);
- emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu)));
- emit(SHL(dst_reg(data), data, src_reg(16u)));
+ emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu)));
+ emit(SHL(dst_reg(data), data, brw_imm_ud(16u)));
emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
}
@@ -588,8 +590,8 @@ gen6_gs_visitor::xfb_write()
this->current_annotation = "gen6 thread end: svb writes init";
- emit(MOV(dst_reg(this->vertex_output_offset), 0u));
- emit(MOV(dst_reg(this->sol_prim_written), 0u));
+ emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
+ emit(MOV(dst_reg(this->sol_prim_written), brw_imm_ud(0u)));
/* Check that at least one primitive can be written
*
@@ -600,7 +602,7 @@ gen6_gs_visitor::xfb_write()
* transform feedback is in interleaved or separate attribs mode.
*/
src_reg sol_temp(this, glsl_type::uvec4_type);
- emit(ADD(dst_reg(sol_temp), this->svbi, src_reg(num_verts)));
+ emit(ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts)));
/* Compare SVBI calculated number with the maximum value, which is
* in R1.4 (previously saved in this->max_svbi) for gen6.
@@ -623,7 +625,7 @@ gen6_gs_visitor::xfb_write()
/* Write transform feedback data for all processed vertices. */
for (int i = 0; i < (int)nir->info.gs.vertices_out; i++) {
- emit(MOV(dst_reg(sol_temp), i));
+ emit(MOV(dst_reg(sol_temp), brw_imm_d(i)));
emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
BRW_CONDITIONAL_L));
emit(IF(BRW_PREDICATE_NORMAL));
@@ -644,8 +646,8 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
/* Check for buffer overflow: we need room to write the complete primitive
* (all vertices). Otherwise, avoid writing any vertices for it
*/
- emit(ADD(dst_reg(sol_temp), this->sol_prim_written, 1u));
- emit(MUL(dst_reg(sol_temp), sol_temp, src_reg(num_verts)));
+ emit(ADD(dst_reg(sol_temp), this->sol_prim_written, brw_imm_ud(1u)));
+ emit(MUL(dst_reg(sol_temp), sol_temp, brw_imm_ud(num_verts)));
emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi));
emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
emit(IF(BRW_PREDICATE_NORMAL));
@@ -683,7 +685,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
src_reg data(this->vertex_output);
data.reladdr = ralloc(mem_ctx, src_reg);
int offset = get_vertex_output_offset_for_varying(vertex, varying);
- emit(MOV(dst_reg(this->vertex_output_offset), offset));
+ emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_d(offset)));
memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
data.type = output_reg[varying].type;
@@ -710,9 +712,9 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
*/
emit(ADD(dst_reg(this->destination_indices),
this->destination_indices,
- src_reg(num_verts)));
+ brw_imm_ud(num_verts)));
emit(ADD(dst_reg(this->sol_prim_written),
- this->sol_prim_written, 1u));
+ this->sol_prim_written, brw_imm_ud(1u)));
}
}
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index 9f4a5db3592..d508c4c9278 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -136,8 +136,8 @@ emit_pipeline_stat(struct brw_context *brw, drm_intel_bo *bo,
IA_VERTICES_COUNT, /* VERTICES_SUBMITTED */
IA_PRIMITIVES_COUNT, /* PRIMITIVES_SUBMITTED */
VS_INVOCATION_COUNT, /* VERTEX_SHADER_INVOCATIONS */
- 0, /* HS_INVOCATION_COUNT,*/ /* TESS_CONTROL_SHADER_PATCHES */
- 0, /* DS_INVOCATION_COUNT,*/ /* TESS_EVALUATION_SHADER_INVOCATIONS */
+ HS_INVOCATION_COUNT, /* TESS_CONTROL_SHADER_PATCHES */
+ DS_INVOCATION_COUNT, /* TESS_EVALUATION_SHADER_INVOCATIONS */
GS_PRIMITIVES_COUNT, /* GEOMETRY_SHADER_PRIMITIVES_EMITTED */
PS_INVOCATION_COUNT, /* FRAGMENT_SHADER_INVOCATIONS */
CS_INVOCATION_COUNT, /* COMPUTE_SHADER_INVOCATIONS */
@@ -231,6 +231,8 @@ gen6_queryobj_get_results(struct gl_context *ctx,
case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+ case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+ case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
query->Base.Result = results[1] - results[0];
break;
@@ -250,8 +252,6 @@ gen6_queryobj_get_results(struct gl_context *ctx,
query->Base.Result /= 4;
break;
- case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
- case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
default:
unreachable("Unrecognized query target in brw_queryobj_get_results()");
}
@@ -329,11 +329,11 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+ case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+ case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 0);
break;
- case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
- case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
default:
unreachable("Unrecognized query target in brw_begin_query()");
}
@@ -381,12 +381,12 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
case GL_GEOMETRY_SHADER_INVOCATIONS:
+ case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+ case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
emit_pipeline_stat(brw, query->bo,
query->Base.Stream, query->Base.Target, 1);
break;
- case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
- case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
default:
unreachable("Unrecognized query target in brw_end_query()");
}
diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
index 69162171c4e..161de77e156 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -118,7 +118,7 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
/* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
*
- * A PIPE_CONTOL command with the CS Stall bit set must be programmed
+ * A PIPE_CONTROL command with the CS Stall bit set must be programmed
* in the ring after this instruction.
*
* No such restriction exists for Haswell or Baytrail.
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index 140a6544983..9cdd1c71b4d 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -187,7 +187,13 @@ gen8_emit_fast_clear_color(struct brw_context *brw,
struct intel_mipmap_tree *mt,
uint32_t *surf)
{
- surf[7] |= mt->fast_clear_color_value;
+ if (brw->gen >= 9) {
+ surf[12] = mt->gen9_fast_clear_color.ui[0];
+ surf[13] = mt->gen9_fast_clear_color.ui[1];
+ surf[14] = mt->gen9_fast_clear_color.ui[2];
+ surf[15] = mt->gen9_fast_clear_color.ui[3];
+ } else
+ surf[7] |= mt->fast_clear_color_value;
}
static void
@@ -208,6 +214,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
unsigned tiling_mode, pitch;
const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
+ const uint32_t surf_type = translate_tex_target(target);
if (mt->format == MESA_FORMAT_S_UINT8) {
tiling_mode = GEN8_SURFACE_TILING_W;
@@ -231,9 +238,14 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
*/
if (brw->gen >= 9 || mt->num_samples == 1)
assert(mt->halign == 16);
+
+ if (brw->gen >= 9) {
+ assert(mt->num_samples > 1 ||
+ brw_losslessly_compressible_format(brw, surf_type));
+ }
+
}
- const uint32_t surf_type = translate_tex_target(target);
uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index);
surf[0] = SET_FIELD(surf_type, BRW_SURFACE_TYPE) |
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index c00d2e786f3..f53c4ab518a 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -75,6 +75,10 @@ static const struct debug_control debug_control[] = {
{ "cs", DEBUG_CS },
{ "hex", DEBUG_HEX },
{ "nocompact", DEBUG_NO_COMPACTION },
+ { "hs", DEBUG_TCS },
+ { "tcs", DEBUG_TCS },
+ { "ds", DEBUG_TES },
+ { "tes", DEBUG_TES },
{ NULL, 0 }
};
@@ -83,8 +87,8 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)
{
uint64_t flags[] = {
[MESA_SHADER_VERTEX] = DEBUG_VS,
- [MESA_SHADER_TESS_CTRL] = 0,
- [MESA_SHADER_TESS_EVAL] = 0,
+ [MESA_SHADER_TESS_CTRL] = DEBUG_TCS,
+ [MESA_SHADER_TESS_EVAL] = DEBUG_TES,
[MESA_SHADER_GEOMETRY] = DEBUG_GS,
[MESA_SHADER_FRAGMENT] = DEBUG_WM,
[MESA_SHADER_COMPUTE] = DEBUG_CS,
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index 98bd7e93956..9c6030a6d7d 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -69,6 +69,8 @@ extern uint64_t INTEL_DEBUG;
#define DEBUG_CS (1ull << 33)
#define DEBUG_HEX (1ull << 34)
#define DEBUG_NO_COMPACTION (1ull << 35)
+#define DEBUG_TCS (1ull << 36)
+#define DEBUG_TES (1ull << 37)
#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 386b63c123d..2e2459c125b 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -333,6 +333,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_texture_compression_bptc = true;
ctx->Extensions.ARB_texture_view = true;
ctx->Extensions.ARB_shader_storage_buffer_object = true;
+ ctx->Extensions.EXT_shader_samples_identical = true;
if (can_do_pipelined_register_writes(brw)) {
ctx->Extensions.ARB_draw_indirect = true;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b1a7632d82f..87e01366932 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -35,6 +35,7 @@
#include "brw_blorp.h"
#include "brw_context.h"
+#include "brw_state.h"
#include "main/enums.h"
#include "main/fbobject.h"
@@ -192,6 +193,12 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
*
* - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
* 64bpp, and 128bpp.
+ *
+ * From the Skylake documentation, it is made clear that X-tiling is no longer
+ * supported:
+ *
+ * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
+ * non-MSRTs only.
*/
static bool
intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
@@ -201,14 +208,6 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
if (brw->gen < 7)
return false;
- if (brw->gen >= 9) {
- /* FINISHME: Enable singlesample fast MCS clears on SKL after all GPU
- * FINISHME: hangs are resolved.
- */
- perf_debug("singlesample fast MCS clears disabled on gen9");
- return false;
- }
-
if (mt->disable_aux_buffers)
return false;
@@ -259,7 +258,11 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
if (!brw->format_supported_as_render_target[mt->format])
return false;
- return true;
+ if (brw->gen >= 9) {
+ const uint32_t brw_format = brw_format_for_mesa_format(mt->format);
+ return brw_losslessly_compressible_format(brw, brw_format);
+ } else
+ return true;
}
@@ -1495,6 +1498,17 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
unsigned width_divisor = block_width_px * 4;
unsigned height_divisor = block_height * 8;
+
+ /* The Skylake MCS is twice as tall as the Broadwell MCS.
+ *
+ * In pre-Skylake, each bit in the MCS contained the state of 2 cachelines
+ * in the main surface. In Skylake, it's two bits. The extra bit
+ * doubles the MCS height, not width, because in Skylake the MCS is always
+ * Y-tiled.
+ */
+ if (brw->gen >= 9)
+ height_divisor /= 2;
+
unsigned mcs_width =
ALIGN(mt->logical_width0, width_divisor) / width_divisor;
unsigned mcs_height =
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 805cd714d88..64f73ea9ae5 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -633,15 +633,22 @@ struct intel_mipmap_tree
* The SURFACE_STATE bits associated with the last fast color clear to this
* color mipmap tree, if any.
*
- * This value will only ever contain ones in bits 28-31, so it is safe to
- * OR into dword 7 of SURFACE_STATE.
+ * Prior to GEN9 there is a single bit for RGBA clear values which gives you
+ * the option of 2^4 clear colors. Each bit determines if the color channel
+ * is fully saturated or unsaturated (Cherryview does add a 32b value per
+ * channel, but it is globally applied instead of being part of the render
+ * surface state). Starting with GEN9, the surface state accepts a 32b value
+ * for each color channel.
*
* @see RENDER_SURFACE_STATE.RedClearColor
* @see RENDER_SURFACE_STATE.GreenClearColor
* @see RENDER_SURFACE_STATE.BlueClearColor
* @see RENDER_SURFACE_STATE.AlphaClearColor
*/
- uint32_t fast_clear_color_value;
+ union {
+ uint32_t fast_clear_color_value;
+ union gl_color_union gen9_fast_clear_color;
+ };
/**
* Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
index 62d39f70ec4..034d8a507fe 100644
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -107,7 +107,7 @@ TEST_F(cmod_propagation_test, basic)
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- fs_reg zero(0.0f);
+ fs_reg zero(brw_imm_f(0.0f));
bld.ADD(dest, src0, src1);
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
@@ -139,7 +139,7 @@ TEST_F(cmod_propagation_test, cmp_nonzero)
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- fs_reg nonzero(1.0f);
+ fs_reg nonzero(brw_imm_f(1.0f));
bld.ADD(dest, src0, src1);
bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE);
@@ -171,7 +171,7 @@ TEST_F(cmod_propagation_test, non_cmod_instruction)
const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::uint_type);
fs_reg src0 = v->vgrf(glsl_type::uint_type);
- fs_reg zero(0u);
+ fs_reg zero(brw_imm_ud(0u));
bld.FBL(dest, src0);
bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE);
@@ -205,7 +205,7 @@ TEST_F(cmod_propagation_test, intervening_flag_write)
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::float_type);
- fs_reg zero(0.0f);
+ fs_reg zero(brw_imm_f(0.0f));
bld.ADD(dest, src0, src1);
bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
@@ -244,7 +244,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read)
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::float_type);
- fs_reg zero(0.0f);
+ fs_reg zero(brw_imm_f(0.0f));
bld.ADD(dest0, src0, src1);
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
@@ -282,7 +282,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::vec2_type);
- fs_reg zero(0.0f);
+ fs_reg zero(brw_imm_f(0.0f));
bld.ADD(offset(dest, bld, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX, dest, src2)
->regs_written = 4;
@@ -323,7 +323,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::float_type);
- fs_reg zero(0.0f);
+ fs_reg zero(brw_imm_f(0.0f));
set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1));
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
@@ -360,7 +360,7 @@ TEST_F(cmod_propagation_test, negate)
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
- fs_reg zero(0.0f);
+ fs_reg zero(brw_imm_f(0.0f));
bld.ADD(dest, src0, src1);
dest.negate = true;
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
@@ -425,7 +425,7 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
fs_reg dest = v->vgrf(glsl_type::int_type);
fs_reg src0 = v->vgrf(glsl_type::int_type);
fs_reg src1 = v->vgrf(glsl_type::int_type);
- fs_reg zero(0.0f);
+ fs_reg zero(brw_imm_f(0.0f));
bld.ADD(dest, src0, src1);
bld.CMP(bld.null_reg_f(), retype(dest, BRW_REGISTER_TYPE_F), zero,
BRW_CONDITIONAL_GE);
@@ -458,8 +458,8 @@ TEST_F(cmod_propagation_test, andnz_one)
const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::int_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
- fs_reg zero(0.0f);
- fs_reg one(1);
+ fs_reg zero(brw_imm_f(0.0f));
+ fs_reg one(brw_imm_d(1));
bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
set_condmod(BRW_CONDITIONAL_NZ,
@@ -493,8 +493,8 @@ TEST_F(cmod_propagation_test, andnz_non_one)
const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::int_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
- fs_reg zero(0.0f);
- fs_reg nonone(38);
+ fs_reg zero(brw_imm_f(0.0f));
+ fs_reg nonone(brw_imm_d(38));
bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
set_condmod(BRW_CONDITIONAL_NZ,
@@ -528,8 +528,8 @@ TEST_F(cmod_propagation_test, andz_one)
const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::int_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
- fs_reg zero(0.0f);
- fs_reg one(1);
+ fs_reg zero(brw_imm_f(0.0f));
+ fs_reg one(brw_imm_d(1));
bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
set_condmod(BRW_CONDITIONAL_Z,
diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
index 9aa2fcc7907..e5e566c60bc 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
@@ -145,7 +145,7 @@ TEST_F(cmod_propagation_test, basic)
dst_reg dest = dst_reg(v, glsl_type::float_type);
src_reg src0 = src_reg(v, glsl_type::float_type);
src_reg src1 = src_reg(v, glsl_type::float_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
dst_reg dest_null = bld.null_reg_f();
dest_null.writemask = WRITEMASK_X;
@@ -181,7 +181,7 @@ TEST_F(cmod_propagation_test, basic_different_dst_writemask)
dst_reg dest = dst_reg(v, glsl_type::float_type);
src_reg src0 = src_reg(v, glsl_type::float_type);
src_reg src1 = src_reg(v, glsl_type::float_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
dst_reg dest_null = bld.null_reg_f();
bld.ADD(dest, src0, src1);
@@ -217,8 +217,8 @@ TEST_F(cmod_propagation_test, andz_one)
const vec4_builder bld = vec4_builder(v).at_end();
dst_reg dest = dst_reg(v, glsl_type::int_type);
src_reg src0 = src_reg(v, glsl_type::float_type);
- src_reg zero(0.0f);
- src_reg one(1);
+ src_reg zero(brw_imm_f(0.0f));
+ src_reg one(brw_imm_d(1));
bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
set_condmod(BRW_CONDITIONAL_Z,
@@ -253,7 +253,7 @@ TEST_F(cmod_propagation_test, non_cmod_instruction)
const vec4_builder bld = vec4_builder(v).at_end();
dst_reg dest = dst_reg(v, glsl_type::uint_type);
src_reg src0 = src_reg(v, glsl_type::uint_type);
- src_reg zero(0u);
+ src_reg zero(brw_imm_ud(0u));
bld.FBL(dest, src0);
bld.CMP(bld.null_reg_ud(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
@@ -288,7 +288,7 @@ TEST_F(cmod_propagation_test, intervening_flag_write)
src_reg src0 = src_reg(v, glsl_type::float_type);
src_reg src1 = src_reg(v, glsl_type::float_type);
src_reg src2 = src_reg(v, glsl_type::float_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
bld.ADD(dest, src0, src1);
bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
@@ -328,7 +328,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read)
src_reg src0 = src_reg(v, glsl_type::float_type);
src_reg src1 = src_reg(v, glsl_type::float_type);
src_reg src2 = src_reg(v, glsl_type::float_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
bld.ADD(dest0, src0, src1);
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
bld.CMP(bld.null_reg_f(), src_reg(dest0), zero, BRW_CONDITIONAL_GE);
@@ -367,7 +367,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
src_reg src0 = src_reg(v, glsl_type::float_type);
src_reg src1 = src_reg(v, glsl_type::float_type);
src_reg src2 = src_reg(v, glsl_type::vec2_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
bld.ADD(offset(dest, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX, dest, src2)
->regs_written = 4;
@@ -409,7 +409,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
src_reg src0 = src_reg(v, glsl_type::float_type);
src_reg src1 = src_reg(v, glsl_type::float_type);
src_reg src2 = src_reg(v, glsl_type::float_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
dst_reg dest_null = bld.null_reg_f();
dest_null.writemask = WRITEMASK_X;
@@ -449,7 +449,7 @@ TEST_F(cmod_propagation_test, negate)
dst_reg dest = dst_reg(v, glsl_type::float_type);
src_reg src0 = src_reg(v, glsl_type::float_type);
src_reg src1 = src_reg(v, glsl_type::float_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
bld.ADD(dest, src0, src1);
src_reg tmp_src = src_reg(dest);
tmp_src.negate = true;
@@ -521,7 +521,7 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
dst_reg dest = dst_reg(v, glsl_type::int_type);
src_reg src0 = src_reg(v, glsl_type::int_type);
src_reg src1 = src_reg(v, glsl_type::int_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
bld.ADD(dest, src0, src1);
bld.CMP(bld.null_reg_f(), retype(src_reg(dest), BRW_REGISTER_TYPE_F), zero,
BRW_CONDITIONAL_GE);
@@ -555,8 +555,8 @@ TEST_F(cmod_propagation_test, andnz_non_one)
const vec4_builder bld = vec4_builder(v).at_end();
dst_reg dest = dst_reg(v, glsl_type::int_type);
src_reg src0 = src_reg(v, glsl_type::float_type);
- src_reg zero(0.0f);
- src_reg nonone(38);
+ src_reg zero(brw_imm_f(0.0f));
+ src_reg nonone(brw_imm_d(38));
bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
set_condmod(BRW_CONDITIONAL_NZ,
@@ -594,7 +594,7 @@ TEST_F(cmod_propagation_test, basic_vec4)
dst_reg dest = dst_reg(v, glsl_type::vec4_type);
src_reg src0 = src_reg(v, glsl_type::vec4_type);
src_reg src1 = src_reg(v, glsl_type::vec4_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
bld.MUL(dest, src0, src1);
bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_NZ);
@@ -628,7 +628,7 @@ TEST_F(cmod_propagation_test, basic_vec4_different_dst_writemask)
dest.writemask = WRITEMASK_X;
src_reg src0 = src_reg(v, glsl_type::vec4_type);
src_reg src1 = src_reg(v, glsl_type::vec4_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
dst_reg dest_null = bld.null_reg_f();
bld.MUL(dest, src0, src1);
@@ -668,7 +668,7 @@ TEST_F(cmod_propagation_test, mad_one_component_vec4)
src_reg src2 = src_reg(v, glsl_type::vec4_type);
src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
src2.negate = true;
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
src_reg tmp(dest);
tmp.swizzle = BRW_SWIZZLE_XXXX;
dst_reg dest_null = bld.null_reg_f();
@@ -710,7 +710,7 @@ TEST_F(cmod_propagation_test, mad_more_one_component_vec4)
src_reg src2 = src_reg(v, glsl_type::vec4_type);
src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
src2.negate = true;
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
src_reg tmp(dest);
tmp.swizzle = BRW_SWIZZLE_XXXX;
dst_reg dest_null = bld.null_reg_f();
@@ -751,7 +751,7 @@ TEST_F(cmod_propagation_test, cmp_mov_vec4)
src_reg src0 = src_reg(v, glsl_type::ivec4_type);
src0.swizzle = BRW_SWIZZLE_XXXX;
src0.file = UNIFORM;
- src_reg nonone = retype(src_reg(16), BRW_REGISTER_TYPE_D);
+ src_reg nonone = retype(brw_imm_d(16), BRW_REGISTER_TYPE_D);
src_reg mov_src = src_reg(dest);
mov_src.swizzle = BRW_SWIZZLE_XXXX;
dst_reg dest_null = bld.null_reg_d();
@@ -790,7 +790,7 @@ TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4)
dst_reg dest = dst_reg(v, glsl_type::vec4_type);
src_reg src0 = src_reg(v, glsl_type::vec4_type);
src_reg src1 = src_reg(v, glsl_type::vec4_type);
- src_reg zero(0.0f);
+ src_reg zero(brw_imm_f(0.0f));
src_reg cmp_src = src_reg(dest);
cmp_src.swizzle = BRW_SWIZZLE4(0,1,3,2);
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
index a1f91d9c56a..ede409b6919 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -162,7 +162,7 @@ TEST_F(copy_propagation_test, test_swizzle_writemask)
SWIZZLE_X,
SWIZZLE_Z))));
- v->emit(v->MOV(writemask(a, WRITEMASK_XYZ), src_reg(1.0f)));
+ v->emit(v->MOV(writemask(a, WRITEMASK_XYZ), brw_imm_f(1.0f)));
vec4_instruction *test_mov =
v->MOV(c, swizzle(src_reg(b), BRW_SWIZZLE4(SWIZZLE_W,
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index d84e2e98ec0..90a6bc3618f 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -135,7 +135,7 @@ TEST_F(register_coalesce_test, test_compute_to_mrf)
m0.writemask = WRITEMASK_X;
m0.type = BRW_REGISTER_TYPE_F;
- vec4_instruction *mul = v->emit(v->MUL(temp, something, src_reg(1.0f)));
+ vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f)));
v->emit(v->MOV(m0, src_reg(temp)));
register_coalesce(v);
@@ -159,7 +159,7 @@ TEST_F(register_coalesce_test, test_multiple_use)
m1.type = BRW_REGISTER_TYPE_F;
src_reg src = src_reg(temp);
- vec4_instruction *mul = v->emit(v->MUL(temp, something, src_reg(1.0f)));
+ vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f)));
src.swizzle = BRW_SWIZZLE_XXXX;
v->emit(v->MOV(m0, src));
src.swizzle = BRW_SWIZZLE_XYZW;
diff --git a/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp b/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp
index 6a8bceabf16..7af97d0d097 100644
--- a/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp
+++ b/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp
@@ -40,15 +40,10 @@ void vf_float_conversion_test::SetUp() {
int ebits = (vf >> 4) & 0x7;
int mbits = vf & 0xf;
- int e = ebits - 3;
+ float x = 1.0f + mbits / 16.0f;
+ int exp = ebits - 3;
- float value = 1.0f;
-
- value += mbits / 16.0f;
-
- value *= exp2f(e);
-
- vf_to_float[vf] = value;
+ vf_to_float[vf] = ldexpf(x, exp);
}
}
@@ -98,3 +93,18 @@ TEST_F(vf_float_conversion_test, test_special_case_0)
EXPECT_EQ(f2u(brw_vf_to_float(brw_float_to_vf(+0.0f))), f2u(+0.0f));
EXPECT_EQ(f2u(brw_vf_to_float(brw_float_to_vf(-0.0f))), f2u(-0.0f));
}
+
+TEST_F(vf_float_conversion_test, test_nonrepresentable_float_input)
+{
+ EXPECT_EQ(brw_float_to_vf(+32.0f), -1);
+ EXPECT_EQ(brw_float_to_vf(-32.0f), -1);
+
+ EXPECT_EQ(brw_float_to_vf(+16.5f), -1);
+ EXPECT_EQ(brw_float_to_vf(-16.5f), -1);
+
+ EXPECT_EQ(brw_float_to_vf(+8.25f), -1);
+ EXPECT_EQ(brw_float_to_vf(-8.25f), -1);
+
+ EXPECT_EQ(brw_float_to_vf(+4.125f), -1);
+ EXPECT_EQ(brw_float_to_vf(-4.125f), -1);
+}
diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h
index a8c31b741ed..14f5e71fadf 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.h
+++ b/src/mesa/drivers/dri/r200/r200_tex.h
@@ -63,7 +63,9 @@ static const struct tx_table tx_table_be[] =
[ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
[ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
[ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_B8G8R8X8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 },
[ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_X8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 },
[ MESA_FORMAT_BGR_UNORM8 ] = { 0xffffffff, 0 },
[ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
[ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
@@ -91,7 +93,9 @@ static const struct tx_table tx_table_le[] =
[ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
[ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
[ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_B8G8R8X8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 },
[ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_X8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 },
[ MESA_FORMAT_BGR_UNORM8 ] = { R200_TXFORMAT_ARGB8888, 0 },
[ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
[ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
index f8ec432755a..37c2fa0dc2f 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.h
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
@@ -63,6 +63,8 @@ static const struct tx_table tx_table[] =
[ MESA_FORMAT_R8G8B8A8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
[ MESA_FORMAT_B8G8R8A8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
[ MESA_FORMAT_A8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_B8G8R8X8_UNORM ] = { RADEON_TXFORMAT_ARGB8888, 0 },
+ [ MESA_FORMAT_X8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888, 0 },
[ MESA_FORMAT_BGR_UNORM8 ] = { RADEON_TXFORMAT_ARGB8888, 0 },
[ MESA_FORMAT_B5G6R5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
[ MESA_FORMAT_R5G6B5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
index ddf7f497f1e..2ae22e9e691 100644
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@@ -67,7 +67,7 @@ legal_src_factor(const struct gl_context *ctx, GLenum factor)
case GL_SRC1_ALPHA:
case GL_ONE_MINUS_SRC1_COLOR:
case GL_ONE_MINUS_SRC1_ALPHA:
- return _mesa_is_desktop_gl(ctx)
+ return ctx->API != API_OPENGLES
&& ctx->Extensions.ARB_blend_func_extended;
default:
return GL_FALSE;
@@ -100,14 +100,14 @@ legal_dst_factor(const struct gl_context *ctx, GLenum factor)
case GL_ONE_MINUS_CONSTANT_ALPHA:
return _mesa_is_desktop_gl(ctx) || ctx->API == API_OPENGLES2;
case GL_SRC_ALPHA_SATURATE:
- return (_mesa_is_desktop_gl(ctx)
+ return (ctx->API != API_OPENGLES
&& ctx->Extensions.ARB_blend_func_extended)
|| _mesa_is_gles3(ctx);
case GL_SRC1_COLOR:
case GL_SRC1_ALPHA:
case GL_ONE_MINUS_SRC1_COLOR:
case GL_ONE_MINUS_SRC1_ALPHA:
- return _mesa_is_desktop_gl(ctx)
+ return ctx->API != API_OPENGLES
&& ctx->Extensions.ARB_blend_func_extended;
default:
return GL_FALSE;
@@ -404,7 +404,7 @@ _mesa_BlendEquation( GLenum mode )
ctx->Color._BlendEquationPerBuffer = GL_FALSE;
if (ctx->Driver.BlendEquationSeparate)
- (*ctx->Driver.BlendEquationSeparate)( ctx, mode, mode );
+ ctx->Driver.BlendEquationSeparate(ctx, mode, mode);
}
@@ -582,7 +582,7 @@ _mesa_BlendColor( GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha )
ctx->Color.BlendColor[3] = CLAMP(tmp[3], 0.0F, 1.0F);
if (ctx->Driver.BlendColor)
- (*ctx->Driver.BlendColor)(ctx, ctx->Color.BlendColor);
+ ctx->Driver.BlendColor(ctx, ctx->Color.BlendColor);
}
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index 93588a2ee18..83e238ae825 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -731,7 +731,7 @@ _mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
/* Call the device driver function only if fb is the bound read buffer */
if (fb == ctx->ReadBuffer) {
if (ctx->Driver.ReadBuffer)
- (*ctx->Driver.ReadBuffer)(ctx, buffer);
+ ctx->Driver.ReadBuffer(ctx, buffer);
}
}
diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index 42f67990784..a8a667e3c12 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -369,10 +369,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
break;
case GL_DEBUG_OUTPUT:
case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB:
- if (!_mesa_is_desktop_gl(ctx))
- goto invalid_enum_error;
- else
- _mesa_set_debug_state_int(ctx, cap, state);
+ _mesa_set_debug_state_int(ctx, cap, state);
break;
case GL_DITHER:
if (ctx->Color.DitherFlag == state)
@@ -1225,10 +1222,7 @@ _mesa_IsEnabled( GLenum cap )
return ctx->Polygon.CullFlag;
case GL_DEBUG_OUTPUT:
case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB:
- if (!_mesa_is_desktop_gl(ctx))
- goto invalid_enum_error;
- else
- return (GLboolean) _mesa_get_debug_state_int(ctx, cap);
+ return (GLboolean) _mesa_get_debug_state_int(ctx, cap);
case GL_DEPTH_TEST:
return ctx->Depth.Test;
case GL_DITHER:
diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c
index f720de316e4..366b119aba3 100644
--- a/src/mesa/main/errors.c
+++ b/src/mesa/main/errors.c
@@ -978,9 +978,13 @@ _mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id,
GLenum severity, GLint length,
const GLchar *buf)
{
- const char *callerstr = "glDebugMessageInsert";
-
GET_CURRENT_CONTEXT(ctx);
+ const char *callerstr;
+
+ if (_mesa_is_desktop_gl(ctx))
+ callerstr = "glDebugMessageInsert";
+ else
+ callerstr = "glDebugMessageInsertKHR";
if (!validate_params(ctx, INSERT, callerstr, source, type, severity))
return; /* GL_INVALID_ENUM */
@@ -1004,15 +1008,21 @@ _mesa_GetDebugMessageLog(GLuint count, GLsizei logSize, GLenum *sources,
{
GET_CURRENT_CONTEXT(ctx);
struct gl_debug_state *debug;
+ const char *callerstr;
GLuint ret;
+ if (_mesa_is_desktop_gl(ctx))
+ callerstr = "glGetDebugMessageLog";
+ else
+ callerstr = "glGetDebugMessageLogKHR";
+
if (!messageLog)
logSize = 0;
if (logSize < 0) {
_mesa_error(ctx, GL_INVALID_VALUE,
- "glGetDebugMessageLog(logSize=%d : logSize must not be"
- " negative)", logSize);
+ "%s(logSize=%d : logSize must not be negative)",
+ callerstr, logSize);
return 0;
}
@@ -1066,9 +1076,14 @@ _mesa_DebugMessageControl(GLenum gl_source, GLenum gl_type,
enum mesa_debug_source source = gl_enum_to_debug_source(gl_source);
enum mesa_debug_type type = gl_enum_to_debug_type(gl_type);
enum mesa_debug_severity severity = gl_enum_to_debug_severity(gl_severity);
- const char *callerstr = "glDebugMessageControl";
+ const char *callerstr;
struct gl_debug_state *debug;
+ if (_mesa_is_desktop_gl(ctx))
+ callerstr = "glDebugMessageControl";
+ else
+ callerstr = "glDebugMessageControlKHR";
+
if (count < 0) {
_mesa_error(ctx, GL_INVALID_VALUE,
"%s(count=%d : count must not be negative)", callerstr,
@@ -1124,10 +1139,15 @@ _mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length,
const GLchar *message)
{
GET_CURRENT_CONTEXT(ctx);
- const char *callerstr = "glPushDebugGroup";
+ const char *callerstr;
struct gl_debug_state *debug;
struct gl_debug_message *emptySlot;
+ if (_mesa_is_desktop_gl(ctx))
+ callerstr = "glPushDebugGroup";
+ else
+ callerstr = "glPushDebugGroupKHR";
+
switch(source) {
case GL_DEBUG_SOURCE_APPLICATION:
case GL_DEBUG_SOURCE_THIRD_PARTY:
@@ -1176,10 +1196,15 @@ void GLAPIENTRY
_mesa_PopDebugGroup(void)
{
GET_CURRENT_CONTEXT(ctx);
- const char *callerstr = "glPopDebugGroup";
+ const char *callerstr;
struct gl_debug_state *debug;
struct gl_debug_message *gdmessage, msg;
+ if (_mesa_is_desktop_gl(ctx))
+ callerstr = "glPopDebugGroup";
+ else
+ callerstr = "glPopDebugGroupKHR";
+
debug = _mesa_lock_debug_state(ctx);
if (!debug)
return;
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index e94d2b74749..fa50cb68cca 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -40,7 +40,6 @@
struct gl_extensions _mesa_extension_override_enables;
struct gl_extensions _mesa_extension_override_disables;
static char *extra_extensions = NULL;
-static char *cant_disable_extensions = NULL;
/**
@@ -68,29 +67,30 @@ const struct mesa_extension _mesa_extension_table[] = {
#undef EXT
};
+static bool disabled_extensions[ARRAY_SIZE(_mesa_extension_table)];
/**
* Given an extension name, lookup up the corresponding member of struct
- * gl_extensions and return that member's offset (in bytes). If the name is
- * not found in the \c _mesa_extension_table, return 0.
+ * gl_extensions and return that member's index. If the name is
+ * not found in the \c _mesa_extension_table, return -1.
*
* \param name Name of extension.
- * \return Offset of member in struct gl_extensions.
+ * \return Index of member in struct gl_extensions.
*/
-static size_t
-name_to_offset(const char* name)
+static int
+name_to_index(const char* name)
{
unsigned i;
if (name == 0)
- return 0;
+ return -1;
for (i = 0; i < ARRAY_SIZE(_mesa_extension_table); ++i) {
if (strcmp(name, _mesa_extension_table[i].name) == 0)
- return _mesa_extension_table[i].offset;
+ return i;
}
- return 0;
+ return -1;
}
/**
@@ -206,11 +206,11 @@ _mesa_enable_sw_extensions(struct gl_context *ctx)
* \return offset of extensions withint `ext' or 0 if extension is not known
*/
static size_t
-set_extension(struct gl_extensions *ext, const char *name, GLboolean state)
+set_extension(struct gl_extensions *ext, int i, GLboolean state)
{
size_t offset;
- offset = name_to_offset(name);
+ offset = i < 0 ? 0 : _mesa_extension_table[i].offset;
if (offset != 0 && (offset != o(dummy_true) || state != GL_FALSE)) {
((GLboolean *) ext)[offset] = state;
}
@@ -240,12 +240,6 @@ get_extension_override( struct gl_context *ctx )
{
override_extensions_in_context(ctx);
- if (cant_disable_extensions != NULL) {
- _mesa_problem(ctx,
- "Trying to disable permanently enabled extensions: %s",
- cant_disable_extensions);
- }
-
if (extra_extensions == NULL) {
return calloc(1, sizeof(char));
} else {
@@ -257,7 +251,7 @@ get_extension_override( struct gl_context *ctx )
/**
- * \brief Free extra_extensions and cant_disable_extensions strings
+ * \brief Free extra_extensions string
*
* These strings are allocated early during the first context creation by
* _mesa_one_time_init_extension_overrides.
@@ -266,7 +260,6 @@ static void
free_unknown_extensions_strings(void)
{
free(extra_extensions);
- free(cant_disable_extensions);
}
@@ -295,21 +288,20 @@ _mesa_one_time_init_extension_overrides(void)
/* extra_exts: List of unrecognized extensions. */
extra_extensions = calloc(ALIGN(strlen(env_const) + 2, 4), sizeof(char));
- cant_disable_extensions = calloc(ALIGN(strlen(env_const) + 2, 4), sizeof(char));
/* Copy env_const because strtok() is destructive. */
env = strdup(env_const);
- if (env == NULL || extra_extensions == NULL ||
- cant_disable_extensions == NULL) {
- free(env);
- free(extra_extensions);
- free(cant_disable_extensions);
- return;
+ if (env == NULL ||
+ extra_extensions == NULL) {
+ free(env);
+ free(extra_extensions);
+ return;
}
for (ext = strtok(env, " "); ext != NULL; ext = strtok(NULL, " ")) {
int enable;
+ int i;
bool recognized;
switch (ext[0]) {
case '+':
@@ -325,7 +317,8 @@ _mesa_one_time_init_extension_overrides(void)
break;
}
- offset = set_extension(&_mesa_extension_override_enables, ext, enable);
+ i = name_to_index(ext);
+ offset = set_extension(&_mesa_extension_override_enables, i, enable);
if (offset != 0 && (offset != o(dummy_true) || enable != GL_FALSE)) {
((GLboolean *) &_mesa_extension_override_disables)[offset] = !enable;
recognized = true;
@@ -333,14 +326,12 @@ _mesa_one_time_init_extension_overrides(void)
recognized = false;
}
- if (!recognized) {
- if (enable) {
- strcat(extra_extensions, ext);
- strcat(extra_extensions, " ");
- } else if (offset == o(dummy_true)) {
- strcat(cant_disable_extensions, ext);
- strcat(cant_disable_extensions, " ");
- }
+ if (i >= 0)
+ disabled_extensions[i] = !enable;
+
+ if (!recognized && enable) {
+ strcat(extra_extensions, ext);
+ strcat(extra_extensions, " ");
}
}
@@ -354,13 +345,6 @@ _mesa_one_time_init_extension_overrides(void)
} else if (extra_extensions[len - 1] == ' ') {
extra_extensions[len - 1] = '\0';
}
- len = strlen(cant_disable_extensions);
- if (len == 0) {
- free(cant_disable_extensions);
- cant_disable_extensions = NULL;
- } else if (cant_disable_extensions[len - 1] == ' ') {
- cant_disable_extensions[len - 1] = '\0';
- }
}
@@ -401,7 +385,8 @@ _mesa_extension_supported(const struct gl_context *ctx, extension_index i)
const bool *base = (bool *) &ctx->Extensions;
const struct mesa_extension *ext = _mesa_extension_table + i;
- return (ctx->Version >= ext->version[ctx->API]) && base[ext->offset];
+ return !disabled_extensions[i] &&
+ (ctx->Version >= ext->version[ctx->API]) && base[ext->offset];
}
/**
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index d12fd9f1c8d..051d69a3613 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -1,8 +1,31 @@
+/* The extension table is alphabetically sorted by the extension name string column. */
+
#define GLL 0
#define GLC 0
#define ES1 0
#define ES2 0
#define x ~0
+
+EXT(3DFX_texture_compression_FXT1 , TDFX_texture_compression_FXT1 , GLL, GLC, x , x , 1999)
+
+EXT(AMD_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2009)
+EXT(AMD_draw_buffers_blend , ARB_draw_buffers_blend , GLL, GLC, x , x , 2009)
+EXT(AMD_performance_monitor , AMD_performance_monitor , GLL, GLC, x , x , 2007)
+EXT(AMD_pinned_memory , AMD_pinned_memory , GLL, GLC, x , x , 2013)
+EXT(AMD_seamless_cubemap_per_texture , AMD_seamless_cubemap_per_texture , GLL, GLC, x , x , 2009)
+EXT(AMD_shader_stencil_export , ARB_shader_stencil_export , GLL, GLC, x , x , 2009)
+EXT(AMD_shader_trinary_minmax , dummy_true , GLL, GLC, x , x , 2012)
+EXT(AMD_vertex_shader_layer , AMD_vertex_shader_layer , x , GLC, x , x , 2012)
+EXT(AMD_vertex_shader_viewport_index , AMD_vertex_shader_viewport_index , x , GLC, x , x , 2012)
+
+EXT(ANGLE_texture_compression_dxt3 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011)
+EXT(ANGLE_texture_compression_dxt5 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011)
+
+EXT(APPLE_object_purgeable , APPLE_object_purgeable , GLL, GLC, x , x , 2006)
+EXT(APPLE_packed_pixels , dummy_true , GLL, x , x , x , 2002)
+EXT(APPLE_texture_max_level , dummy_true , x , x , ES1, ES2, 2009)
+EXT(APPLE_vertex_array_object , dummy_true , GLL, x , x , x , 2002)
+
EXT(ARB_ES2_compatibility , ARB_ES2_compatibility , GLL, GLC, x , x , 2009)
EXT(ARB_ES3_compatibility , ARB_ES3_compatibility , GLL, GLC, x , x , 2012)
EXT(ARB_arrays_of_arrays , ARB_arrays_of_arrays , GLL, GLC, x , x , 2012)
@@ -16,9 +39,9 @@ EXT(ARB_color_buffer_float , ARB_color_buffer_float
EXT(ARB_compressed_texture_pixel_storage , dummy_true , GLL, GLC, x , x , 2011)
EXT(ARB_compute_shader , ARB_compute_shader , GLL, GLC, x , x , 2012)
EXT(ARB_conditional_render_inverted , ARB_conditional_render_inverted , GLL, GLC, x , x , 2014)
+EXT(ARB_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2011)
EXT(ARB_copy_buffer , dummy_true , GLL, GLC, x , x , 2008)
EXT(ARB_copy_image , ARB_copy_image , GLL, GLC, x , x , 2012)
-EXT(ARB_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2011)
EXT(ARB_debug_output , dummy_true , GLL, GLC, x , x , 2009)
EXT(ARB_depth_buffer_float , ARB_depth_buffer_float , GLL, GLC, x , x , 2008)
EXT(ARB_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2003)
@@ -56,8 +79,8 @@ EXT(ARB_multi_bind , dummy_true
EXT(ARB_multi_draw_indirect , ARB_draw_indirect , x , GLC, x , x , 2012)
EXT(ARB_multisample , dummy_true , GLL, x , x , x , 1994)
EXT(ARB_multitexture , dummy_true , GLL, x , x , x , 1998)
-EXT(ARB_occlusion_query2 , ARB_occlusion_query2 , GLL, GLC, x , x , 2003)
EXT(ARB_occlusion_query , ARB_occlusion_query , GLL, x , x , x , 2001)
+EXT(ARB_occlusion_query2 , ARB_occlusion_query2 , GLL, GLC, x , x , 2003)
EXT(ARB_pipeline_statistics_query , ARB_pipeline_statistics_query , GLL, GLC, x , x , 2014)
EXT(ARB_pixel_buffer_object , EXT_pixel_buffer_object , GLL, GLC, x , x , 2004)
EXT(ARB_point_parameters , EXT_point_parameters , GLL, x , x , x , 1997)
@@ -83,13 +106,13 @@ EXT(ARB_shader_subroutine , ARB_shader_subroutine
EXT(ARB_shader_texture_image_samples , ARB_shader_texture_image_samples , GLL, GLC, x , x , 2014)
EXT(ARB_shader_texture_lod , ARB_shader_texture_lod , GLL, GLC, x , x , 2009)
EXT(ARB_shading_language_100 , dummy_true , GLL, x , x , x , 2003)
-EXT(ARB_shading_language_packing , ARB_shading_language_packing , GLL, GLC, x , x , 2011)
EXT(ARB_shading_language_420pack , ARB_shading_language_420pack , GLL, GLC, x , x , 2011)
+EXT(ARB_shading_language_packing , ARB_shading_language_packing , GLL, GLC, x , x , 2011)
EXT(ARB_shadow , ARB_shadow , GLL, x , x , x , 2001)
EXT(ARB_stencil_texturing , ARB_stencil_texturing , GLL, GLC, x , x , 2012)
EXT(ARB_sync , ARB_sync , GLL, GLC, x , x , 2003)
-EXT(ARB_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2014)
EXT(ARB_tessellation_shader , ARB_tessellation_shader , x , GLC, x , x , 2009)
+EXT(ARB_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2014)
EXT(ARB_texture_border_clamp , ARB_texture_border_clamp , GLL, x , x , x , 2000)
EXT(ARB_texture_buffer_object , ARB_texture_buffer_object , x , GLC, x , x , 2008)
EXT(ARB_texture_buffer_object_rgb32 , ARB_texture_buffer_object_rgb32 , x , GLC, x , x , 2009)
@@ -105,20 +128,20 @@ EXT(ARB_texture_env_crossbar , ARB_texture_env_crossbar
EXT(ARB_texture_env_dot3 , ARB_texture_env_dot3 , GLL, x , x , x , 2001)
EXT(ARB_texture_float , ARB_texture_float , GLL, GLC, x , x , 2004)
EXT(ARB_texture_gather , ARB_texture_gather , GLL, GLC, x , x , 2009)
-EXT(ARB_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 2001)
EXT(ARB_texture_mirror_clamp_to_edge , ARB_texture_mirror_clamp_to_edge , GLL, GLC, x , x , 2013)
+EXT(ARB_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 2001)
EXT(ARB_texture_multisample , ARB_texture_multisample , GLL, GLC, x , x , 2009)
EXT(ARB_texture_non_power_of_two , ARB_texture_non_power_of_two , GLL, GLC, x , x , 2003)
EXT(ARB_texture_query_levels , ARB_texture_query_levels , GLL, GLC, x , x , 2012)
EXT(ARB_texture_query_lod , ARB_texture_query_lod , GLL, GLC, x , x , 2009)
EXT(ARB_texture_rectangle , NV_texture_rectangle , GLL, GLC, x , x , 2004)
-EXT(ARB_texture_rgb10_a2ui , ARB_texture_rgb10_a2ui , GLL, GLC, x , x , 2009)
EXT(ARB_texture_rg , ARB_texture_rg , GLL, GLC, x , x , 2008)
+EXT(ARB_texture_rgb10_a2ui , ARB_texture_rgb10_a2ui , GLL, GLC, x , x , 2009)
EXT(ARB_texture_stencil8 , ARB_texture_stencil8 , GLL, GLC, x , x , 2013)
EXT(ARB_texture_storage , dummy_true , GLL, GLC, x , x , 2011)
EXT(ARB_texture_storage_multisample , ARB_texture_multisample , GLL, GLC, x , x , 2012)
-EXT(ARB_texture_view , ARB_texture_view , GLL, GLC, x , x , 2012)
EXT(ARB_texture_swizzle , EXT_texture_swizzle , GLL, GLC, x , x , 2008)
+EXT(ARB_texture_view , ARB_texture_view , GLL, GLC, x , x , 2012)
EXT(ARB_timer_query , ARB_timer_query , GLL, GLC, x , x , 2010)
EXT(ARB_transform_feedback2 , ARB_transform_feedback2 , GLL, GLC, x , x , 2010)
EXT(ARB_transform_feedback3 , ARB_transform_feedback3 , GLL, GLC, x , x , 2010)
@@ -127,28 +150,39 @@ EXT(ARB_transpose_matrix , dummy_true
EXT(ARB_uniform_buffer_object , ARB_uniform_buffer_object , GLL, GLC, x , x , 2009)
EXT(ARB_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008)
EXT(ARB_vertex_array_object , dummy_true , GLL, GLC, x , x , 2006)
+EXT(ARB_vertex_attrib_64bit , ARB_vertex_attrib_64bit , x , GLC, x , x , 2010)
EXT(ARB_vertex_attrib_binding , dummy_true , GLL, GLC, x , x , 2012)
EXT(ARB_vertex_buffer_object , dummy_true , GLL, x , x , x , 2003)
EXT(ARB_vertex_program , ARB_vertex_program , GLL, x , x , x , 2002)
EXT(ARB_vertex_shader , ARB_vertex_shader , GLL, GLC, x , x , 2002)
-EXT(ARB_vertex_attrib_64bit , ARB_vertex_attrib_64bit , x , GLC, x , x , 2010)
EXT(ARB_vertex_type_10f_11f_11f_rev , ARB_vertex_type_10f_11f_11f_rev , GLL, GLC, x , x , 2013)
EXT(ARB_vertex_type_2_10_10_10_rev , ARB_vertex_type_2_10_10_10_rev , GLL, GLC, x , x , 2009)
EXT(ARB_viewport_array , ARB_viewport_array , x , GLC, x , x , 2010)
EXT(ARB_window_pos , dummy_true , GLL, x , x , x , 2001)
+EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003)
+EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002)
+EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001)
+EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006)
+EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004)
+EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002)
+EXT(ATI_texture_float , ARB_texture_float , GLL, GLC, x , x , 2002)
+EXT(ATI_texture_mirror_once , ATI_texture_mirror_once , GLL, GLC, x , x , 2006)
+
EXT(EXT_abgr , dummy_true , GLL, GLC, x , x , 1995)
EXT(EXT_bgra , dummy_true , GLL, x , x , x , 1995)
EXT(EXT_blend_color , EXT_blend_color , GLL, x , x , x , 1995)
EXT(EXT_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003)
+EXT(EXT_blend_func_extended , ARB_blend_func_extended , x , x , x , ES2, 2015)
EXT(EXT_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999)
-EXT(EXT_buffer_storage , ARB_buffer_storage , x , x , x , 31, 2015)
-EXT(EXT_discard_framebuffer , dummy_true , x , x , ES1, ES2, 2009)
EXT(EXT_blend_minmax , EXT_blend_minmax , GLL, x , ES1, ES2, 1995)
EXT(EXT_blend_subtract , dummy_true , GLL, x , x , x , 1995)
+EXT(EXT_buffer_storage , ARB_buffer_storage , x , x , x , 31, 2015)
+EXT(EXT_color_buffer_float , dummy_true , x , x , ES1, 30, 2013)
EXT(EXT_compiled_vertex_array , dummy_true , GLL, x , x , x , 1996)
EXT(EXT_copy_texture , dummy_true , GLL, x , x , x , 1995)
EXT(EXT_depth_bounds_test , EXT_depth_bounds_test , GLL, GLC, x , x , 2002)
+EXT(EXT_discard_framebuffer , dummy_true , x , x , ES1, ES2, 2009)
EXT(EXT_draw_buffers , dummy_true , x , x , x , ES2, 2012)
EXT(EXT_draw_buffers2 , EXT_draw_buffers2 , GLL, GLC, x , x , 2006)
EXT(EXT_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014)
@@ -172,20 +206,21 @@ EXT(EXT_point_parameters , EXT_point_parameters
EXT(EXT_polygon_offset , dummy_true , GLL, x , x , x , 1995)
EXT(EXT_polygon_offset_clamp , EXT_polygon_offset_clamp , GLL, GLC, x , x , 2014)
EXT(EXT_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009)
+EXT(EXT_read_format_bgra , dummy_true , x , x , ES1, ES2, 2009)
EXT(EXT_rescale_normal , dummy_true , GLL, x , x , x , 1997)
EXT(EXT_secondary_color , dummy_true , GLL, x , x , x , 1999)
EXT(EXT_separate_shader_objects , dummy_true , x , x , x , ES2, 2013)
EXT(EXT_separate_specular_color , dummy_true , GLL, x , x , x , 1997)
-EXT(EXT_shader_integer_mix , EXT_shader_integer_mix , GLL, GLC, ES1, 30, 2013)
+EXT(EXT_shader_integer_mix , EXT_shader_integer_mix , GLL, GLC, x , 30, 2013)
+EXT(EXT_shader_samples_identical , EXT_shader_samples_identical , GLL, GLC, x , 31, 2015)
EXT(EXT_shadow_funcs , ARB_shadow , GLL, x , x , x , 2002)
EXT(EXT_stencil_two_side , EXT_stencil_two_side , GLL, x , x , x , 2001)
EXT(EXT_stencil_wrap , dummy_true , GLL, x , x , x , 2002)
EXT(EXT_subtexture , dummy_true , GLL, x , x , x , 1995)
+EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996)
EXT(EXT_texture3D , dummy_true , GLL, x , x , x , 1996)
EXT(EXT_texture_array , EXT_texture_array , GLL, GLC, x , x , 2006)
EXT(EXT_texture_compression_dxt1 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2004)
-EXT(ANGLE_texture_compression_dxt3 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011)
-EXT(ANGLE_texture_compression_dxt5 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2011)
EXT(EXT_texture_compression_latc , EXT_texture_compression_latc , GLL, x , x , x , 2006)
EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004)
EXT(EXT_texture_compression_s3tc , EXT_texture_compression_s3tc , GLL, GLC, x , x , 2000)
@@ -196,28 +231,66 @@ EXT(EXT_texture_env_combine , dummy_true
EXT(EXT_texture_env_dot3 , EXT_texture_env_dot3 , GLL, x , x , x , 2000)
EXT(EXT_texture_filter_anisotropic , EXT_texture_filter_anisotropic , GLL, GLC, ES1, ES2, 1999)
EXT(EXT_texture_format_BGRA8888 , dummy_true , x , x , ES1, ES2, 2005)
-EXT(EXT_texture_rg , ARB_texture_rg , x , x , x , ES2, 2011)
-EXT(EXT_read_format_bgra , dummy_true , x , x , ES1, ES2, 2009)
EXT(EXT_texture_integer , EXT_texture_integer , GLL, GLC, x , x , 2006)
EXT(EXT_texture_lod_bias , dummy_true , GLL, x , ES1, x , 1999)
EXT(EXT_texture_mirror_clamp , EXT_texture_mirror_clamp , GLL, GLC, x , x , 2004)
EXT(EXT_texture_object , dummy_true , GLL, x , x , x , 1995)
-EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996)
EXT(EXT_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2004)
-EXT(EXT_texture_shared_exponent , EXT_texture_shared_exponent , GLL, GLC, x , x , 2004)
-EXT(EXT_texture_snorm , EXT_texture_snorm , GLL, GLC, x , x , 2009)
+EXT(EXT_texture_rg , ARB_texture_rg , x , x , x , ES2, 2011)
EXT(EXT_texture_sRGB , EXT_texture_sRGB , GLL, GLC, x , x , 2004)
EXT(EXT_texture_sRGB_decode , EXT_texture_sRGB_decode , GLL, GLC, x , x , 2006)
+EXT(EXT_texture_shared_exponent , EXT_texture_shared_exponent , GLL, GLC, x , x , 2004)
+EXT(EXT_texture_snorm , EXT_texture_snorm , GLL, GLC, x , x , 2009)
EXT(EXT_texture_swizzle , EXT_texture_swizzle , GLL, GLC, x , x , 2008)
EXT(EXT_texture_type_2_10_10_10_REV , dummy_true , x , x , x , ES2, 2008)
EXT(EXT_timer_query , EXT_timer_query , GLL, GLC, x , x , 2006)
EXT(EXT_transform_feedback , EXT_transform_feedback , GLL, GLC, x , x , 2011)
EXT(EXT_unpack_subimage , dummy_true , x , x , x , ES2, 2011)
-EXT(EXT_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008)
EXT(EXT_vertex_array , dummy_true , GLL, x , x , x , 1995)
-EXT(EXT_color_buffer_float , dummy_true , x , x , ES1, 30, 2013)
+EXT(EXT_vertex_array_bgra , EXT_vertex_array_bgra , GLL, GLC, x , x , 2008)
+EXT(IBM_multimode_draw_arrays , dummy_true , GLL, GLC, x , x , 1998)
+EXT(IBM_rasterpos_clip , dummy_true , GLL, x , x , x , 1996)
+EXT(IBM_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 1998)
+EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999)
+
+EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013)
+
+EXT(KHR_context_flush_control , dummy_true , GLL, GLC, x , ES2, 2014)
+EXT(KHR_debug , dummy_true , GLL, GLC, ES1, ES2, 2012)
+EXT(KHR_texture_compression_astc_hdr , KHR_texture_compression_astc_hdr , GLL, GLC, x , ES2, 2012)
+EXT(KHR_texture_compression_astc_ldr , KHR_texture_compression_astc_ldr , GLL, GLC, x , ES2, 2012)
+
+EXT(MESA_pack_invert , MESA_pack_invert , GLL, GLC, x , x , 2002)
+EXT(MESA_texture_signed_rgba , EXT_texture_snorm , GLL, GLC, x , x , 2009)
+EXT(MESA_window_pos , dummy_true , GLL, x , x , x , 2000)
+EXT(MESA_ycbcr_texture , MESA_ycbcr_texture , GLL, GLC, x , x , 2002)
+
+EXT(NV_blend_square , dummy_true , GLL, x , x , x , 1999)
+EXT(NV_conditional_render , NV_conditional_render , GLL, GLC, x , x , 2008)
+EXT(NV_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2001)
+EXT(NV_draw_buffers , dummy_true , x , x , x , ES2, 2011)
+EXT(NV_fbo_color_attachments , dummy_true , x , x , x , ES2, 2010)
+EXT(NV_fog_distance , NV_fog_distance , GLL, x , x , x , 2001)
+EXT(NV_fragment_program_option , NV_fragment_program_option , GLL, x , x , x , 2005)
+EXT(NV_light_max_exponent , dummy_true , GLL, x , x , x , 1999)
+EXT(NV_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2000)
+EXT(NV_point_sprite , NV_point_sprite , GLL, GLC, x , x , 2001)
+EXT(NV_primitive_restart , NV_primitive_restart , GLL, x , x , x , 2002)
+EXT(NV_read_buffer , dummy_true , x , x , x , ES2, 2011)
+EXT(NV_read_depth , dummy_true , x , x , x , ES2, 2011)
+EXT(NV_read_depth_stencil , dummy_true , x , x , x , ES2, 2011)
+EXT(NV_read_stencil , dummy_true , x , x , x , ES2, 2011)
+EXT(NV_texgen_reflection , dummy_true , GLL, x , x , x , 1999)
+EXT(NV_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2009)
+EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999)
+EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000)
+EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010)
+
+EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
+EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010)
+EXT(OES_EGL_sync , dummy_true , x , x , ES1, ES2, 2010)
EXT(OES_blend_equation_separate , EXT_blend_equation_separate , x , x , ES1, x , 2009)
EXT(OES_blend_func_separate , EXT_blend_func_separate , x , x , ES1, x , 2009)
EXT(OES_blend_subtract , dummy_true , x , x , ES1, x , 2009)
@@ -230,9 +303,6 @@ EXT(OES_depth_texture , ARB_depth_texture
EXT(OES_depth_texture_cube_map , OES_depth_texture_cube_map , x , x , x , ES2, 2012)
EXT(OES_draw_elements_base_vertex , ARB_draw_elements_base_vertex , x , x , x , ES2, 2014)
EXT(OES_draw_texture , OES_draw_texture , x , x , ES1, x , 2004)
-EXT(OES_EGL_sync , dummy_true , x , x , ES1, ES2, 2010)
-EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
-EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010)
EXT(OES_element_index_uint , dummy_true , x , x , ES1, ES2, 2005)
EXT(OES_fbo_render_mipmap , dummy_true , x , x , ES1, ES2, 2005)
EXT(OES_fixed_point , dummy_true , x , x , ES1, x , 2002)
@@ -260,73 +330,17 @@ EXT(OES_texture_float_linear , OES_texture_float_linear
EXT(OES_texture_half_float , OES_texture_half_float , x , x , x , ES2, 2005)
EXT(OES_texture_half_float_linear , OES_texture_half_float_linear , x , x , x , ES2, 2005)
EXT(OES_texture_mirrored_repeat , dummy_true , x , x , ES1, x , 2005)
-EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014)
EXT(OES_texture_npot , ARB_texture_non_power_of_two , x , x , ES1, ES2, 2005)
+EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014)
EXT(OES_vertex_array_object , dummy_true , x , x , ES1, ES2, 2010)
-
-EXT(KHR_debug , dummy_true , GLL, GLC, x , x , 2012)
-EXT(KHR_context_flush_control , dummy_true , GLL, GLC, x , ES2, 2014)
-EXT(KHR_texture_compression_astc_hdr , KHR_texture_compression_astc_hdr , GLL, GLC, x , ES2, 2012)
-EXT(KHR_texture_compression_astc_ldr , KHR_texture_compression_astc_ldr , GLL, GLC, x , ES2, 2012)
-
-
-EXT(3DFX_texture_compression_FXT1 , TDFX_texture_compression_FXT1 , GLL, GLC, x , x , 1999)
-EXT(AMD_conservative_depth , ARB_conservative_depth , GLL, GLC, x , x , 2009)
-EXT(AMD_draw_buffers_blend , ARB_draw_buffers_blend , GLL, GLC, x , x , 2009)
-EXT(AMD_performance_monitor , AMD_performance_monitor , GLL, GLC, x , x , 2007)
-EXT(AMD_pinned_memory , AMD_pinned_memory , GLL, GLC, x , x , 2013)
-EXT(AMD_seamless_cubemap_per_texture , AMD_seamless_cubemap_per_texture , GLL, GLC, x , x , 2009)
-EXT(AMD_shader_stencil_export , ARB_shader_stencil_export , GLL, GLC, x , x , 2009)
-EXT(AMD_shader_trinary_minmax , dummy_true , GLL, GLC, x , x , 2012)
-EXT(AMD_vertex_shader_layer , AMD_vertex_shader_layer , x , GLC, x , x , 2012)
-EXT(AMD_vertex_shader_viewport_index , AMD_vertex_shader_viewport_index , x , GLC, x , x , 2012)
-EXT(APPLE_object_purgeable , APPLE_object_purgeable , GLL, GLC, x , x , 2006)
-EXT(APPLE_packed_pixels , dummy_true , GLL, x , x , x , 2002)
-EXT(APPLE_texture_max_level , dummy_true , x , x , ES1, ES2, 2009)
-EXT(APPLE_vertex_array_object , dummy_true , GLL, x , x , x , 2002)
-EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003)
-EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002)
-EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001)
-EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006)
-EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004)
-EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002)
-EXT(ATI_texture_float , ARB_texture_float , GLL, GLC, x , x , 2002)
-EXT(ATI_texture_mirror_once , ATI_texture_mirror_once , GLL, GLC, x , x , 2006)
-EXT(IBM_multimode_draw_arrays , dummy_true , GLL, GLC, x , x , 1998)
-EXT(IBM_rasterpos_clip , dummy_true , GLL, x , x , x , 1996)
-EXT(IBM_texture_mirrored_repeat , dummy_true , GLL, x , x , x , 1998)
-EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999)
-EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013)
-EXT(MESA_pack_invert , MESA_pack_invert , GLL, GLC, x , x , 2002)
-EXT(MESA_texture_signed_rgba , EXT_texture_snorm , GLL, GLC, x , x , 2009)
-EXT(MESA_window_pos , dummy_true , GLL, x , x , x , 2000)
-EXT(MESA_ycbcr_texture , MESA_ycbcr_texture , GLL, GLC, x , x , 2002)
-EXT(NV_blend_square , dummy_true , GLL, x , x , x , 1999)
-EXT(NV_conditional_render , NV_conditional_render , GLL, GLC, x , x , 2008)
-EXT(NV_depth_clamp , ARB_depth_clamp , GLL, GLC, x , x , 2001)
-EXT(NV_draw_buffers , dummy_true , x , x , x , ES2, 2011)
-EXT(NV_fbo_color_attachments , dummy_true , x , x , x , ES2, 2010)
-EXT(NV_fog_distance , NV_fog_distance , GLL, x , x , x , 2001)
-EXT(NV_fragment_program_option , NV_fragment_program_option , GLL, x , x , x , 2005)
-EXT(NV_light_max_exponent , dummy_true , GLL, x , x , x , 1999)
-EXT(NV_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2000)
-EXT(NV_point_sprite , NV_point_sprite , GLL, GLC, x , x , 2001)
-EXT(NV_primitive_restart , NV_primitive_restart , GLL, x , x , x , 2002)
-EXT(NV_read_buffer , dummy_true , x , x , x , ES2, 2011)
-EXT(NV_read_depth , dummy_true , x , x , x , ES2, 2011)
-EXT(NV_read_depth_stencil , dummy_true , x , x , x , ES2, 2011)
-EXT(NV_read_stencil , dummy_true , x , x , x , ES2, 2011)
-EXT(NV_texgen_reflection , dummy_true , GLL, x , x , x , 1999)
-EXT(NV_texture_barrier , NV_texture_barrier , GLL, GLC, x , x , 2009)
-EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999)
-EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000)
-EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010)
EXT(S3_s3tc , ANGLE_texture_compression_dxt , GLL, GLC, x , x , 1999)
+
EXT(SGIS_generate_mipmap , dummy_true , GLL, x , x , x , 1997)
EXT(SGIS_texture_border_clamp , ARB_texture_border_clamp , GLL, x , x , x , 1997)
EXT(SGIS_texture_edge_clamp , dummy_true , GLL, x , x , x , 1997)
EXT(SGIS_texture_lod , dummy_true , GLL, x , x , x , 1997)
+
EXT(SUN_multi_draw_arrays , dummy_true , GLL, x , x , x , 1999)
#undef GLL
#undef GLC
diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c
index 45f343d61c8..1ad939cfde6 100644
--- a/src/mesa/main/fog.c
+++ b/src/mesa/main/fog.c
@@ -190,7 +190,7 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params )
}
if (ctx->Driver.Fogfv) {
- (*ctx->Driver.Fogfv)( ctx, pname, params );
+ ctx->Driver.Fogfv( ctx, pname, params );
}
return;
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index fbc7b8f8602..9b22b91ac1b 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -330,6 +330,9 @@ descriptor=[
# GL_KHR_context_flush_control
[ "CONTEXT_RELEASE_BEHAVIOR", "CONTEXT_ENUM(Const.ContextReleaseBehavior), NO_EXTRA" ],
+
+# blend_func_extended
+ [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ],
]},
# GLES3 is not a typo.
@@ -801,7 +804,6 @@ descriptor=[
# GL_ARB_robustness
[ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
- [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ],
# GL_ARB_uniform_buffer_object
[ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
index 9873fdbf1a4..87c5a3a194f 100644
--- a/src/mesa/main/getstring.c
+++ b/src/mesa/main/getstring.c
@@ -121,7 +121,7 @@ _mesa_GetString( GLenum name )
assert(ctx->Driver.GetString);
{
/* Give the driver the chance to handle this query */
- const GLubyte *str = (*ctx->Driver.GetString)(ctx, name);
+ const GLubyte *str = ctx->Driver.GetString(ctx, name);
if (str)
return str;
}
@@ -203,12 +203,18 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params )
{
GET_CURRENT_CONTEXT(ctx);
const GLuint clientUnit = ctx->Array.ActiveTexture;
+ const char *callerstr;
+
+ if (_mesa_is_desktop_gl(ctx))
+ callerstr = "glGetPointerv";
+ else
+ callerstr = "glGetPointervKHR";
if (!params)
return;
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetPointerv %s\n", _mesa_enum_to_string(pname));
+ _mesa_debug(ctx, "%s %s\n", callerstr, _mesa_enum_to_string(pname));
switch (pname) {
case GL_VERTEX_ARRAY_POINTER:
@@ -268,10 +274,7 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params )
break;
case GL_DEBUG_CALLBACK_FUNCTION_ARB:
case GL_DEBUG_CALLBACK_USER_PARAM_ARB:
- if (!_mesa_is_desktop_gl(ctx))
- goto invalid_pname;
- else
- *params = _mesa_get_debug_state_ptr(ctx, pname);
+ *params = _mesa_get_debug_state_ptr(ctx, pname);
break;
default:
goto invalid_pname;
@@ -280,7 +283,7 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params )
return;
invalid_pname:
- _mesa_error( ctx, GL_INVALID_ENUM, "glGetPointerv" );
+ _mesa_error( ctx, GL_INVALID_ENUM, "%s", callerstr);
return;
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 95cbba4ed57..4a849fb090d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2193,6 +2193,7 @@ struct gl_ati_fragment_shader_state
struct gl_subroutine_function
{
char *name;
+ int index;
int num_compat_types;
const struct glsl_type **types;
};
@@ -3766,6 +3767,7 @@ struct gl_extensions
GLboolean EXT_polygon_offset_clamp;
GLboolean EXT_provoking_vertex;
GLboolean EXT_shader_integer_mix;
+ GLboolean EXT_shader_samples_identical;
GLboolean EXT_stencil_two_side;
GLboolean EXT_texture_array;
GLboolean EXT_texture_compression_latc;
diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c
index 1019f893ba8..41f370ce485 100644
--- a/src/mesa/main/objectlabel.c
+++ b/src/mesa/main/objectlabel.c
@@ -243,13 +243,19 @@ _mesa_ObjectLabel(GLenum identifier, GLuint name, GLsizei length,
const GLchar *label)
{
GET_CURRENT_CONTEXT(ctx);
+ const char *callerstr;
char **labelPtr;
- labelPtr = get_label_pointer(ctx, identifier, name, "glObjectLabel");
+ if (_mesa_is_desktop_gl(ctx))
+ callerstr = "glObjectLabel";
+ else
+ callerstr = "glObjectLabelKHR";
+
+ labelPtr = get_label_pointer(ctx, identifier, name, callerstr);
if (!labelPtr)
return;
- set_label(ctx, labelPtr, label, length, "glObjectLabel");
+ set_label(ctx, labelPtr, label, length, callerstr);
}
void GLAPIENTRY
@@ -257,15 +263,21 @@ _mesa_GetObjectLabel(GLenum identifier, GLuint name, GLsizei bufSize,
GLsizei *length, GLchar *label)
{
GET_CURRENT_CONTEXT(ctx);
+ const char *callerstr;
char **labelPtr;
+ if (_mesa_is_desktop_gl(ctx))
+ callerstr = "glGetObjectLabel";
+ else
+ callerstr = "glGetObjectLabelKHR";
+
if (bufSize < 0) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectLabel(bufSize = %d)",
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(bufSize = %d)", callerstr,
bufSize);
return;
}
- labelPtr = get_label_pointer(ctx, identifier, name, "glGetObjectLabel");
+ labelPtr = get_label_pointer(ctx, identifier, name, callerstr);
if (!labelPtr)
return;
@@ -276,17 +288,24 @@ void GLAPIENTRY
_mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label)
{
GET_CURRENT_CONTEXT(ctx);
- char **labelPtr;
struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr;
+ const char *callerstr;
+ char **labelPtr;
+
+ if (_mesa_is_desktop_gl(ctx))
+ callerstr = "glObjectPtrLabel";
+ else
+ callerstr = "glObjectPtrLabelKHR";
if (!_mesa_validate_sync(ctx, syncObj)) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glObjectPtrLabel (not a valid sync object)");
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)",
+ callerstr);
return;
}
labelPtr = &syncObj->Label;
- set_label(ctx, labelPtr, label, length, "glObjectPtrLabel");
+ set_label(ctx, labelPtr, label, length, callerstr);
}
void GLAPIENTRY
@@ -294,17 +313,24 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length,
GLchar *label)
{
GET_CURRENT_CONTEXT(ctx);
- char **labelPtr;
struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr;
+ const char *callerstr;
+ char **labelPtr;
+
+ if (_mesa_is_desktop_gl(ctx))
+ callerstr = "glGetObjectPtrLabel";
+ else
+ callerstr = "glGetObjectPtrLabelKHR";
if (bufSize < 0) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectPtrLabel(bufSize = %d)",
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(bufSize = %d)", callerstr,
bufSize);
return;
}
if (!_mesa_validate_sync(ctx, syncObj)) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectPtrLabel (not a valid sync object)");
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)",
+ callerstr);
return;
}
diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c
index 863e3c1af32..c2f2b6399cb 100644
--- a/src/mesa/main/points.c
+++ b/src/mesa/main/points.c
@@ -209,7 +209,7 @@ _mesa_PointParameterfv( GLenum pname, const GLfloat *params)
}
if (ctx->Driver.PointParameterfv)
- (*ctx->Driver.PointParameterfv)(ctx, pname, params);
+ ctx->Driver.PointParameterfv(ctx, pname, params);
}
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 58ba04153e6..79a91b5b6bd 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -661,6 +661,13 @@ _mesa_program_resource_index(struct gl_shader_program *shProg,
switch (res->Type) {
case GL_ATOMIC_COUNTER_BUFFER:
return RESOURCE_ATC(res) - shProg->AtomicBuffers;
+ case GL_VERTEX_SUBROUTINE:
+ case GL_GEOMETRY_SUBROUTINE:
+ case GL_FRAGMENT_SUBROUTINE:
+ case GL_COMPUTE_SUBROUTINE:
+ case GL_TESS_CONTROL_SUBROUTINE:
+ case GL_TESS_EVALUATION_SUBROUTINE:
+ return RESOURCE_SUB(res)->index;
case GL_UNIFORM_BLOCK:
case GL_SHADER_STORAGE_BLOCK:
case GL_TRANSFORM_FEEDBACK_VARYING:
@@ -1413,9 +1420,19 @@ _mesa_validate_pipeline_io(struct gl_pipeline_object *pipeline)
for (idx = prev + 1; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) {
if (shProg[idx]) {
- if (!validate_io(shProg[prev]->_LinkedShaders[prev],
- shProg[idx]->_LinkedShaders[idx]))
- return false;
+ /* Since we now only validate precision, we can skip this step for
+ * desktop GLSL shaders, there precision qualifier is ignored.
+ *
+ * From OpenGL 4.50 Shading Language spec, section 4.7:
+ * "For the purposes of determining if an output from one shader
+ * stage matches an input of the next stage, the precision
+ * qualifier need not match."
+ */
+ if (shProg[prev]->IsES || shProg[idx]->IsES) {
+ if (!validate_io(shProg[prev]->_LinkedShaders[prev],
+ shProg[idx]->_LinkedShaders[idx]))
+ return false;
+ }
prev = idx;
}
}
diff --git a/src/mesa/main/tests/Makefile.am b/src/mesa/main/tests/Makefile.am
index bd7ab7365c0..d6977e20e85 100644
--- a/src/mesa/main/tests/Makefile.am
+++ b/src/mesa/main/tests/Makefile.am
@@ -27,6 +27,7 @@ AM_CPPFLAGS += -DHAVE_SHARED_GLAPI
main_test_SOURCES += \
dispatch_sanity.cpp \
mesa_formats.cpp \
+ mesa_extensions.cpp \
program_state_string.cpp
main_test_LDADD += \
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp
index abe0f432572..97f81f932f6 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -2421,6 +2421,11 @@ const struct function gles3_functions_possible[] = {
{ "glProgramUniform4uiEXT", 30, -1 },
{ "glProgramUniform4uivEXT", 30, -1 },
+ /* GL_EXT_blend_func_extended */
+ { "glBindFragDataLocationIndexedEXT", 30, -1 },
+ { "glGetFragDataIndexEXT", 30, -1 },
+ { "glBindFragDataLocationEXT", 30, -1 },
+
{ NULL, 0, -1 }
};
@@ -2509,5 +2514,8 @@ const struct function gles31_functions_possible[] = {
/* GL_EXT_buffer_storage */
{ "glBufferStorageEXT", 31, -1 },
+ /* GL_EXT_blend_func_extended */
+ { "glGetProgramResourceLocationIndexEXT", 31, -1 },
+
{ NULL, 0, -1 },
};
diff --git a/src/mesa/main/tests/mesa_extensions.cpp b/src/mesa/main/tests/mesa_extensions.cpp
new file mode 100644
index 00000000000..0c7addd4282
--- /dev/null
+++ b/src/mesa/main/tests/mesa_extensions.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \name mesa_extensions.cpp
+ *
+ * Verify that the extensions table is sorted.
+ */
+
+#include <gtest/gtest.h>
+#include "util/macros.h"
+
+/**
+ * Debug/test: verify the extension table is alphabetically sorted.
+ */
+TEST(MesaExtensionsTest, AlphabeticallySorted)
+{
+ const char *ext_names[] = {
+ #define EXT(name_str, ...) #name_str,
+ #include "main/extensions_table.h"
+ #undef EXT
+ };
+
+ for (unsigned i = 0; i < ARRAY_SIZE(ext_names) - 1; ++i) {
+ const char *current_str = ext_names[i];
+ const char *next_str = ext_names[i+1];
+
+ /* We expect the extension table to be alphabetically sorted */
+ ASSERT_LT(strcmp(current_str, next_str), 0);
+ }
+}
diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c
index 091922161c5..93c680650bb 100644
--- a/src/mesa/main/texenv.c
+++ b/src/mesa/main/texenv.c
@@ -495,7 +495,7 @@ _mesa_TexEnvfv( GLenum target, GLenum pname, const GLfloat *param )
/* Tell device driver about the new texture environment */
if (ctx->Driver.TexEnv) {
- (*ctx->Driver.TexEnv)( ctx, target, pname, param );
+ ctx->Driver.TexEnv(ctx, target, pname, param);
}
}
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index d9453e3a281..ac7599f9fd4 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1333,21 +1333,6 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
break;
case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY:
case GL_TEXTURE_CUBE_MAP_ARRAY:
- /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec:
- *
- * "The ETC2/EAC texture compression algorithm supports only
- * two-dimensional images. If internalformat is an ETC2/EAC format,
- * glCompressedTexImage3D will generate an INVALID_OPERATION error if
- * target is not TEXTURE_2D_ARRAY."
- *
- * This should also be applicable for glTexStorage3D(). Other available
- * targets for these functions are: TEXTURE_3D and TEXTURE_CUBE_MAP_ARRAY.
- */
- if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx))
- return write_error(error, GL_INVALID_OPERATION);
-
- target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map_array;
-
/* From the KHR_texture_compression_astc_hdr spec:
*
* Add a second new column "3D Tex." which is empty for all non-ASTC
@@ -1368,16 +1353,24 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
* 8.19 is *not* checked'
*
* The instances of <internalformat> above should say <target>.
+ *
+ * ETC2/EAC formats are the only alternative in GLES and thus such errors
+ * have already been handled by normal ETC2/EAC behavior.
*/
- /* Throw an INVALID_OPERATION error if the target is
- * TEXTURE_CUBE_MAP_ARRAY and the format is not ASTC.
+ /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec:
+ *
+ * "The ETC2/EAC texture compression algorithm supports only
+ * two-dimensional images. If internalformat is an ETC2/EAC format,
+ * glCompressedTexImage3D will generate an INVALID_OPERATION error if
+ * target is not TEXTURE_2D_ARRAY."
+ *
+ * This should also be applicable for glTexStorage3D(). Other available
+ * targets for these functions are: TEXTURE_3D and TEXTURE_CUBE_MAP_ARRAY.
*/
- if (target_can_be_compresed &&
- ctx->Extensions.KHR_texture_compression_astc_ldr &&
- layout != MESA_FORMAT_LAYOUT_ASTC)
- return write_error(error, GL_INVALID_OPERATION);
-
+ if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx))
+ return write_error(error, GL_INVALID_OPERATION);
+ target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map_array;
break;
case GL_TEXTURE_3D:
switch (layout) {
@@ -1401,12 +1394,6 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
return write_error(error, GL_INVALID_OPERATION);
break;
default:
- /* Throw an INVALID_OPERATION error if the target is TEXTURE_3D and
- * the format is not ASTC.
- * See comment in switch case GL_TEXTURE_CUBE_MAP_ARRAY for more info.
- */
- if (ctx->Extensions.KHR_texture_compression_astc_ldr)
- return write_error(error, GL_INVALID_OPERATION);
break;
}
default:
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1099d79d834..c5d8c483429 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1920,6 +1920,8 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
case ir_query_levels:
assert(!"Unexpected ir_query_levels opcode");
break;
+ case ir_samples_identical:
+ unreachable("Unexpected ir_samples_identical opcode");
case ir_texture_samples:
unreachable("Unexpected ir_texture_samples opcode");
}
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c b/src/mesa/state_tracker/st_cb_perfmon.c
index 1bb5be397ae..8fdf0e8497f 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -36,69 +36,24 @@
#include "pipe/p_screen.h"
#include "util/u_memory.h"
-/**
- * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if
- * the driver-specific query doesn't exist.
- */
-static int
-find_query_type(struct pipe_screen *screen, const char *name)
-{
- int num_queries;
- int type = -1;
- int i;
-
- num_queries = screen->get_driver_query_info(screen, 0, NULL);
- if (!num_queries)
- return type;
-
- for (i = 0; i < num_queries; i++) {
- struct pipe_driver_query_info info;
-
- if (!screen->get_driver_query_info(screen, i, &info))
- continue;
-
- if (!strncmp(info.name, name, strlen(name))) {
- type = info.query_type;
- break;
- }
- }
- return type;
-}
-
-/**
- * Return TRUE if the underlying driver expose GPU counters.
- */
-static bool
-has_gpu_counters(struct pipe_screen *screen)
-{
- int num_groups, gid;
-
- num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
- for (gid = 0; gid < num_groups; gid++) {
- struct pipe_driver_query_group_info group_info;
-
- if (!screen->get_driver_query_group_info(screen, gid, &group_info))
- continue;
-
- if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- return true;
- }
- return false;
-}
-
static bool
init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
{
+ struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
- struct pipe_screen *screen = st_context(ctx)->pipe->screen;
- struct pipe_context *pipe = st_context(ctx)->pipe;
+ struct pipe_context *pipe = st->pipe;
+ unsigned *batch = NULL;
+ unsigned num_active_counters = 0;
+ unsigned max_batch_counters = 0;
+ unsigned num_batch_counters = 0;
int gid, cid;
- st_flush_bitmap_cache(st_context(ctx));
+ st_flush_bitmap_cache(st);
- /* Create a query for each active counter. */
+ /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+ const struct st_perf_monitor_group *stg = &st->perfmon[gid];
if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
/* Maximum number of counters reached. Cannot start the session. */
@@ -109,53 +64,96 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
return false;
}
- for (cid = 0; cid < g->NumCounters; cid++) {
- const struct gl_perf_monitor_counter *c = &g->Counters[cid];
- struct st_perf_counter_object *cntr;
- int query_type;
+ num_active_counters += m->ActiveGroups[gid];
+ if (stg->has_batch)
+ max_batch_counters += m->ActiveGroups[gid];
+ }
- if (!BITSET_TEST(m->ActiveCounters[gid], cid))
- continue;
+ if (!num_active_counters)
+ return true;
+
+ stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+ if (!stm->active_counters)
+ return false;
- query_type = find_query_type(screen, c->Name);
- assert(query_type != -1);
+ if (max_batch_counters) {
+ batch = CALLOC(max_batch_counters, sizeof(*batch));
+ if (!batch)
+ return false;
+ }
+
+ /* Create a query for each active counter. */
+ for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+ const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+ const struct st_perf_monitor_group *stg = &st->perfmon[gid];
+ BITSET_WORD tmp;
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
- return false;
+ BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
+ const struct st_perf_monitor_counter *stc = &stg->counters[cid];
+ struct st_perf_counter_object *cntr =
+ &stm->active_counters[stm->num_active_counters];
- cntr->query = pipe->create_query(pipe, query_type, 0);
cntr->id = cid;
cntr->group_id = gid;
-
- list_addtail(&cntr->list, &stm->active_counters);
+ if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+ cntr->batch_index = num_batch_counters;
+ batch[num_batch_counters++] = stc->query_type;
+ } else {
+ cntr->query = pipe->create_query(pipe, stc->query_type, 0);
+ if (!cntr->query)
+ goto fail;
+ }
+ ++stm->num_active_counters;
}
}
+
+ /* Create the batch query. */
+ if (num_batch_counters) {
+ stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
+ batch);
+ stm->batch_result = CALLOC(num_batch_counters, sizeof(stm->batch_result->batch[0]));
+ if (!stm->batch_query || !stm->batch_result)
+ goto fail;
+ }
+
+ FREE(batch);
return true;
+
+fail:
+ FREE(batch);
+ return false;
}
static void
reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
{
- struct st_perf_counter_object *cntr, *tmp;
+ unsigned i;
- LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, &stm->active_counters, list) {
- if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
- list_del(&cntr->list);
- free(cntr);
+ for (i = 0; i < stm->num_active_counters; ++i) {
+ struct pipe_query *query = stm->active_counters[i].query;
+ if (query)
+ pipe->destroy_query(pipe, query);
}
+ FREE(stm->active_counters);
+ stm->active_counters = NULL;
+ stm->num_active_counters = 0;
+
+ if (stm->batch_query) {
+ pipe->destroy_query(pipe, stm->batch_query);
+ stm->batch_query = NULL;
+ }
+ FREE(stm->batch_result);
+ stm->batch_result = NULL;
}
static struct gl_perf_monitor_object *
st_NewPerfMonitor(struct gl_context *ctx)
{
struct st_perf_monitor_object *stq = ST_CALLOC_STRUCT(st_perf_monitor_object);
- if (stq) {
- list_inithead(&stq->active_counters);
+ if (stq)
return &stq->base;
- }
return NULL;
}
@@ -174,9 +172,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
{
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_perf_counter_object *cntr;
+ unsigned i;
- if (LIST_IS_EMPTY(&stm->active_counters)) {
+ if (!stm->num_active_counters) {
/* Create a query for each active counter before starting
* a new monitoring session. */
if (!init_perf_monitor(ctx, m))
@@ -184,10 +182,15 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
}
/* Start the query for each active counter. */
- LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) {
- if (!pipe->begin_query(pipe, cntr->query))
+ for (i = 0; i < stm->num_active_counters; ++i) {
+ struct pipe_query *query = stm->active_counters[i].query;
+ if (query && !pipe->begin_query(pipe, query))
goto fail;
}
+
+ if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
+ goto fail;
+
return true;
fail:
@@ -201,11 +204,17 @@ st_EndPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
{
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_perf_counter_object *cntr;
+ unsigned i;
/* Stop the query for each active counter. */
- LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list)
- pipe->end_query(pipe, cntr->query);
+ for (i = 0; i < stm->num_active_counters; ++i) {
+ struct pipe_query *query = stm->active_counters[i].query;
+ if (query)
+ pipe->end_query(pipe, query);
+ }
+
+ if (stm->batch_query)
+ pipe->end_query(pipe, stm->batch_query);
}
static void
@@ -229,20 +238,26 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
{
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_perf_counter_object *cntr;
+ unsigned i;
- if (LIST_IS_EMPTY(&stm->active_counters))
+ if (!stm->num_active_counters)
return false;
/* The result of a monitoring session is only available if the query of
* each active counter is idle. */
- LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) {
+ for (i = 0; i < stm->num_active_counters; ++i) {
+ struct pipe_query *query = stm->active_counters[i].query;
union pipe_query_result result;
- if (!pipe->get_query_result(pipe, cntr->query, FALSE, &result)) {
+ if (query && !pipe->get_query_result(pipe, query, FALSE, &result)) {
/* The query is busy. */
return false;
}
}
+
+ if (stm->batch_query &&
+ !pipe->get_query_result(pipe, stm->batch_query, FALSE, stm->batch_result))
+ return false;
+
return true;
}
@@ -255,7 +270,7 @@ st_GetPerfMonitorResult(struct gl_context *ctx,
{
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_perf_counter_object *cntr;
+ unsigned i;
/* Copy data to the supplied array (data).
*
@@ -263,9 +278,15 @@ st_GetPerfMonitorResult(struct gl_context *ctx,
* active counter. The API allows counters to appear in any order.
*/
GLsizei offset = 0;
+ bool have_batch_query = false;
+
+ if (stm->batch_query)
+ have_batch_query = pipe->get_query_result(pipe, stm->batch_query, TRUE,
+ stm->batch_result);
/* Read query results for each active counter. */
- LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) {
+ for (i = 0; i < stm->num_active_counters; ++i) {
+ struct st_perf_counter_object *cntr = &stm->active_counters[i];
union pipe_query_result result = { 0 };
int gid, cid;
GLenum type;
@@ -274,8 +295,14 @@ st_GetPerfMonitorResult(struct gl_context *ctx,
gid = cntr->group_id;
type = ctx->PerfMonitor.Groups[gid].Counters[cid].Type;
- if (!pipe->get_query_result(pipe, cntr->query, TRUE, &result))
- continue;
+ if (cntr->query) {
+ if (!pipe->get_query_result(pipe, cntr->query, TRUE, &result))
+ continue;
+ } else {
+ if (!have_batch_query)
+ continue;
+ result.batch[0] = stm->batch_result->batch[cntr->batch_index];
+ }
data[offset++] = gid;
data[offset++] = cid;
@@ -307,18 +334,13 @@ st_init_perfmon(struct st_context *st)
struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor;
struct pipe_screen *screen = st->pipe->screen;
struct gl_perf_monitor_group *groups = NULL;
+ struct st_perf_monitor_group *stgroups = NULL;
int num_counters, num_groups;
int gid, cid;
if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
return false;
- if (!has_gpu_counters(screen)) {
- /* According to the spec, GL_AMD_performance_monitor must only
- * expose GPU counters. */
- return false;
- }
-
/* Get the number of available queries. */
num_counters = screen->get_driver_query_info(screen, 0, NULL);
if (!num_counters)
@@ -331,29 +353,37 @@ st_init_perfmon(struct st_context *st)
if (!groups)
return false;
+ stgroups = CALLOC(num_groups, sizeof(*stgroups));
+ if (!stgroups)
+ goto fail_only_groups;
+
for (gid = 0; gid < num_groups; gid++) {
struct gl_perf_monitor_group *g = &groups[perfmon->NumGroups];
+ struct st_perf_monitor_group *stg = &stgroups[perfmon->NumGroups];
struct pipe_driver_query_group_info group_info;
struct gl_perf_monitor_counter *counters = NULL;
+ struct st_perf_monitor_counter *stcounters = NULL;
if (!screen->get_driver_query_group_info(screen, gid, &group_info))
continue;
- if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- continue;
-
g->Name = group_info.name;
g->MaxActiveCounters = group_info.max_active_queries;
- g->NumCounters = 0;
- g->Counters = NULL;
if (group_info.num_queries)
counters = CALLOC(group_info.num_queries, sizeof(*counters));
if (!counters)
goto fail;
+ g->Counters = counters;
+
+ stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
+ if (!stcounters)
+ goto fail;
+ stg->counters = stcounters;
for (cid = 0; cid < num_counters; cid++) {
struct gl_perf_monitor_counter *c = &counters[g->NumCounters];
+ struct st_perf_monitor_counter *stc = &stcounters[g->NumCounters];
struct pipe_driver_query_info info;
if (!screen->get_driver_query_info(screen, cid, &info))
@@ -364,6 +394,9 @@ st_init_perfmon(struct st_context *st)
c->Name = info.name;
switch (info.type) {
case PIPE_DRIVER_QUERY_TYPE_UINT64:
+ case PIPE_DRIVER_QUERY_TYPE_BYTES:
+ case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+ case PIPE_DRIVER_QUERY_TYPE_HZ:
c->Minimum.u64 = 0;
c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
c->Type = GL_UNSIGNED_INT64_AMD;
@@ -386,18 +419,28 @@ st_init_perfmon(struct st_context *st)
default:
unreachable("Invalid driver query type!");
}
+
+ stc->query_type = info.query_type;
+ stc->flags = info.flags;
+ if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH)
+ stg->has_batch = true;
+
g->NumCounters++;
}
- g->Counters = counters;
perfmon->NumGroups++;
}
perfmon->Groups = groups;
+ st->perfmon = stgroups;
return true;
fail:
- for (gid = 0; gid < num_groups; gid++)
+ for (gid = 0; gid < num_groups; gid++) {
+ FREE(stgroups[gid].counters);
FREE((void *)groups[gid].Counters);
+ }
+ FREE(stgroups);
+fail_only_groups:
FREE(groups);
return false;
}
@@ -408,8 +451,11 @@ st_destroy_perfmon(struct st_context *st)
struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor;
int gid;
- for (gid = 0; gid < perfmon->NumGroups; gid++)
+ for (gid = 0; gid < perfmon->NumGroups; gid++) {
+ FREE(st->perfmon[gid].counters);
FREE((void *)perfmon->Groups[gid].Counters);
+ }
+ FREE(st->perfmon);
FREE((void *)perfmon->Groups);
}
diff --git a/src/mesa/state_tracker/st_cb_perfmon.h b/src/mesa/state_tracker/st_cb_perfmon.h
index 0b195de47fe..29732866bf8 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.h
+++ b/src/mesa/state_tracker/st_cb_perfmon.h
@@ -26,21 +26,41 @@
#include "util/list.h"
+struct st_perf_counter_object
+{
+ struct pipe_query *query;
+ int id;
+ int group_id;
+ unsigned batch_index;
+};
+
/**
* Subclass of gl_perf_monitor_object
*/
struct st_perf_monitor_object
{
struct gl_perf_monitor_object base;
- struct list_head active_counters;
+ unsigned num_active_counters;
+ struct st_perf_counter_object *active_counters;
+
+ struct pipe_query *batch_query;
+ union pipe_query_result *batch_result;
};
-struct st_perf_counter_object
+/**
+ * Extra data per counter, supplementing gl_perf_monitor_counter with
+ * driver-specific information.
+ */
+struct st_perf_monitor_counter
{
- struct list_head list;
- struct pipe_query *query;
- int id;
- int group_id;
+ unsigned query_type;
+ unsigned flags;
+};
+
+struct st_perf_monitor_group
+{
+ struct st_perf_monitor_counter *counters;
+ bool has_batch;
};
/**
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index c243f5cd966..60a9a4bb0d5 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -46,6 +46,7 @@ struct draw_stage;
struct gen_mipmap_state;
struct st_context;
struct st_fragment_program;
+struct st_perf_monitor_group;
struct u_upload_mgr;
@@ -217,6 +218,8 @@ struct st_context
int32_t read_stamp;
struct st_config_options options;
+
+ struct st_perf_monitor_group *perfmon;
};
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 99e96e1f3ae..a2418e28a91 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -343,7 +343,7 @@ struct st_extension_cap_mapping {
struct st_extension_format_mapping {
int extension_offset[2];
- enum pipe_format format[8];
+ enum pipe_format format[32];
/* If TRUE, at least one format must be supported for the extensions to be
* advertised. If FALSE, all the formats must be supported. */
@@ -569,6 +569,36 @@ void st_init_extensions(struct pipe_screen *screen,
PIPE_FORMAT_BPTC_RGB_FLOAT,
PIPE_FORMAT_BPTC_RGB_UFLOAT } },
+ { { o(KHR_texture_compression_astc_ldr) },
+ { PIPE_FORMAT_ASTC_4x4,
+ PIPE_FORMAT_ASTC_5x4,
+ PIPE_FORMAT_ASTC_5x5,
+ PIPE_FORMAT_ASTC_6x5,
+ PIPE_FORMAT_ASTC_6x6,
+ PIPE_FORMAT_ASTC_8x5,
+ PIPE_FORMAT_ASTC_8x6,
+ PIPE_FORMAT_ASTC_8x8,
+ PIPE_FORMAT_ASTC_10x5,
+ PIPE_FORMAT_ASTC_10x6,
+ PIPE_FORMAT_ASTC_10x8,
+ PIPE_FORMAT_ASTC_10x10,
+ PIPE_FORMAT_ASTC_12x10,
+ PIPE_FORMAT_ASTC_12x12,
+ PIPE_FORMAT_ASTC_4x4_SRGB,
+ PIPE_FORMAT_ASTC_5x4_SRGB,
+ PIPE_FORMAT_ASTC_5x5_SRGB,
+ PIPE_FORMAT_ASTC_6x5_SRGB,
+ PIPE_FORMAT_ASTC_6x6_SRGB,
+ PIPE_FORMAT_ASTC_8x5_SRGB,
+ PIPE_FORMAT_ASTC_8x6_SRGB,
+ PIPE_FORMAT_ASTC_8x8_SRGB,
+ PIPE_FORMAT_ASTC_10x5_SRGB,
+ PIPE_FORMAT_ASTC_10x6_SRGB,
+ PIPE_FORMAT_ASTC_10x8_SRGB,
+ PIPE_FORMAT_ASTC_10x10_SRGB,
+ PIPE_FORMAT_ASTC_12x10_SRGB,
+ PIPE_FORMAT_ASTC_12x12_SRGB } },
+
{ { o(EXT_texture_shared_exponent) },
{ PIPE_FORMAT_R9G9B9E5_FLOAT } },
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 144b7d6f659..2b92bade440 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -465,6 +465,64 @@ st_mesa_format_to_pipe_format(struct st_context *st, mesa_format mesaFormat)
case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
return st->has_etc2 ? PIPE_FORMAT_ETC2_SRGB8A1 : PIPE_FORMAT_B8G8R8A8_SRGB;
+ case MESA_FORMAT_RGBA_ASTC_4x4:
+ return PIPE_FORMAT_ASTC_4x4;
+ case MESA_FORMAT_RGBA_ASTC_5x4:
+ return PIPE_FORMAT_ASTC_5x4;
+ case MESA_FORMAT_RGBA_ASTC_5x5:
+ return PIPE_FORMAT_ASTC_5x5;
+ case MESA_FORMAT_RGBA_ASTC_6x5:
+ return PIPE_FORMAT_ASTC_6x5;
+ case MESA_FORMAT_RGBA_ASTC_6x6:
+ return PIPE_FORMAT_ASTC_6x6;
+ case MESA_FORMAT_RGBA_ASTC_8x5:
+ return PIPE_FORMAT_ASTC_8x5;
+ case MESA_FORMAT_RGBA_ASTC_8x6:
+ return PIPE_FORMAT_ASTC_8x6;
+ case MESA_FORMAT_RGBA_ASTC_8x8:
+ return PIPE_FORMAT_ASTC_8x8;
+ case MESA_FORMAT_RGBA_ASTC_10x5:
+ return PIPE_FORMAT_ASTC_10x5;
+ case MESA_FORMAT_RGBA_ASTC_10x6:
+ return PIPE_FORMAT_ASTC_10x6;
+ case MESA_FORMAT_RGBA_ASTC_10x8:
+ return PIPE_FORMAT_ASTC_10x8;
+ case MESA_FORMAT_RGBA_ASTC_10x10:
+ return PIPE_FORMAT_ASTC_10x10;
+ case MESA_FORMAT_RGBA_ASTC_12x10:
+ return PIPE_FORMAT_ASTC_12x10;
+ case MESA_FORMAT_RGBA_ASTC_12x12:
+ return PIPE_FORMAT_ASTC_12x12;
+
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4:
+ return PIPE_FORMAT_ASTC_4x4_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4:
+ return PIPE_FORMAT_ASTC_5x4_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5:
+ return PIPE_FORMAT_ASTC_5x5_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5:
+ return PIPE_FORMAT_ASTC_6x5_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6:
+ return PIPE_FORMAT_ASTC_6x6_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5:
+ return PIPE_FORMAT_ASTC_8x5_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6:
+ return PIPE_FORMAT_ASTC_8x6_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8:
+ return PIPE_FORMAT_ASTC_8x8_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5:
+ return PIPE_FORMAT_ASTC_10x5_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6:
+ return PIPE_FORMAT_ASTC_10x6_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8:
+ return PIPE_FORMAT_ASTC_10x8_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10:
+ return PIPE_FORMAT_ASTC_10x10_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10:
+ return PIPE_FORMAT_ASTC_12x10_SRGB;
+ case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12:
+ return PIPE_FORMAT_ASTC_12x12_SRGB;
+
default:
return PIPE_FORMAT_NONE;
}
@@ -883,6 +941,64 @@ st_pipe_format_to_mesa_format(enum pipe_format format)
case PIPE_FORMAT_ETC2_RG11_SNORM:
return MESA_FORMAT_ETC2_SIGNED_RG11_EAC;
+ case PIPE_FORMAT_ASTC_4x4:
+ return MESA_FORMAT_RGBA_ASTC_4x4;
+ case PIPE_FORMAT_ASTC_5x4:
+ return MESA_FORMAT_RGBA_ASTC_5x4;
+ case PIPE_FORMAT_ASTC_5x5:
+ return MESA_FORMAT_RGBA_ASTC_5x5;
+ case PIPE_FORMAT_ASTC_6x5:
+ return MESA_FORMAT_RGBA_ASTC_6x5;
+ case PIPE_FORMAT_ASTC_6x6:
+ return MESA_FORMAT_RGBA_ASTC_6x6;
+ case PIPE_FORMAT_ASTC_8x5:
+ return MESA_FORMAT_RGBA_ASTC_8x5;
+ case PIPE_FORMAT_ASTC_8x6:
+ return MESA_FORMAT_RGBA_ASTC_8x6;
+ case PIPE_FORMAT_ASTC_8x8:
+ return MESA_FORMAT_RGBA_ASTC_8x8;
+ case PIPE_FORMAT_ASTC_10x5:
+ return MESA_FORMAT_RGBA_ASTC_10x5;
+ case PIPE_FORMAT_ASTC_10x6:
+ return MESA_FORMAT_RGBA_ASTC_10x6;
+ case PIPE_FORMAT_ASTC_10x8:
+ return MESA_FORMAT_RGBA_ASTC_10x8;
+ case PIPE_FORMAT_ASTC_10x10:
+ return MESA_FORMAT_RGBA_ASTC_10x10;
+ case PIPE_FORMAT_ASTC_12x10:
+ return MESA_FORMAT_RGBA_ASTC_12x10;
+ case PIPE_FORMAT_ASTC_12x12:
+ return MESA_FORMAT_RGBA_ASTC_12x12;
+
+ case PIPE_FORMAT_ASTC_4x4_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4;
+ case PIPE_FORMAT_ASTC_5x4_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4;
+ case PIPE_FORMAT_ASTC_5x5_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5;
+ case PIPE_FORMAT_ASTC_6x5_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5;
+ case PIPE_FORMAT_ASTC_6x6_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6;
+ case PIPE_FORMAT_ASTC_8x5_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5;
+ case PIPE_FORMAT_ASTC_8x6_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6;
+ case PIPE_FORMAT_ASTC_8x8_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8;
+ case PIPE_FORMAT_ASTC_10x5_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5;
+ case PIPE_FORMAT_ASTC_10x6_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6;
+ case PIPE_FORMAT_ASTC_10x8_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8;
+ case PIPE_FORMAT_ASTC_10x10_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10;
+ case PIPE_FORMAT_ASTC_12x10_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10;
+ case PIPE_FORMAT_ASTC_12x12_SRGB:
+ return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12;
+
default:
return MESA_FORMAT_NONE;
}
@@ -1386,6 +1502,121 @@ static const struct format_mapping format_map[] = {
{ PIPE_FORMAT_BPTC_RGB_UFLOAT, 0 },
},
+ /* ASTC */
+ {
+ { GL_COMPRESSED_RGBA_ASTC_4x4_KHR, 0 },
+ { PIPE_FORMAT_ASTC_4x4, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_5x4_KHR, 0 },
+ { PIPE_FORMAT_ASTC_5x4, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_5x5_KHR, 0 },
+ { PIPE_FORMAT_ASTC_5x5, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_6x5_KHR, 0 },
+ { PIPE_FORMAT_ASTC_6x5, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_6x6_KHR, 0 },
+ { PIPE_FORMAT_ASTC_6x6, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_8x5_KHR, 0 },
+ { PIPE_FORMAT_ASTC_8x5, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_8x6_KHR, 0 },
+ { PIPE_FORMAT_ASTC_8x6, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_8x8_KHR, 0 },
+ { PIPE_FORMAT_ASTC_8x8, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_10x5_KHR, 0 },
+ { PIPE_FORMAT_ASTC_10x5, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_10x6_KHR, 0 },
+ { PIPE_FORMAT_ASTC_10x6, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_10x8_KHR, 0 },
+ { PIPE_FORMAT_ASTC_10x8, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_10x10_KHR, 0 },
+ { PIPE_FORMAT_ASTC_10x10, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_12x10_KHR, 0 },
+ { PIPE_FORMAT_ASTC_12x10, 0},
+ },
+ {
+ { GL_COMPRESSED_RGBA_ASTC_12x12_KHR, 0 },
+ { PIPE_FORMAT_ASTC_12x12, 0},
+ },
+
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, 0 },
+ { PIPE_FORMAT_ASTC_4x4_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR, 0 },
+ { PIPE_FORMAT_ASTC_5x4_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, 0 },
+ { PIPE_FORMAT_ASTC_5x5_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR, 0 },
+ { PIPE_FORMAT_ASTC_6x5_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, 0 },
+ { PIPE_FORMAT_ASTC_6x6_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR, 0 },
+ { PIPE_FORMAT_ASTC_8x5_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, 0 },
+ { PIPE_FORMAT_ASTC_8x6_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, 0 },
+ { PIPE_FORMAT_ASTC_8x8_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, 0 },
+ { PIPE_FORMAT_ASTC_10x5_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR, 0 },
+ { PIPE_FORMAT_ASTC_10x6_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, 0 },
+ { PIPE_FORMAT_ASTC_10x8_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, 0 },
+ { PIPE_FORMAT_ASTC_10x10_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, 0 },
+ { PIPE_FORMAT_ASTC_12x10_SRGB, 0},
+ },
+ {
+ { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, 0 },
+ { PIPE_FORMAT_ASTC_12x12_SRGB, 0},
+ },
+
/* signed/unsigned integer formats.
*/
{
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 3ad1afdecda..40c77258de7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3236,6 +3236,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
case ir_texture_samples:
opcode = TGSI_OPCODE_TXQS;
break;
+ case ir_samples_identical:
+ unreachable("Unexpected ir_samples_identical opcode");
}
if (ir->projector) {
diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c
index 1193d1e7a5d..3d9e0705626 100644
--- a/src/vulkan/anv_pipeline.c
+++ b/src/vulkan/anv_pipeline.c
@@ -125,17 +125,7 @@ bool
anv_is_scalar_shader_stage(const struct brw_compiler *compiler,
VkShaderStage stage)
{
- switch (stage) {
- case VK_SHADER_STAGE_VERTEX:
- return compiler->scalar_vs;
- case VK_SHADER_STAGE_GEOMETRY:
- return false;
- case VK_SHADER_STAGE_FRAGMENT:
- case VK_SHADER_STAGE_COMPUTE:
- return true;
- default:
- unreachable("Unsupported shader stage");
- }
+ return compiler->scalar_stage[vk_shader_stage_to_mesa_stage[stage]];
}
/* Eventually, this will become part of anv_CreateShader. Unfortunately,
@@ -187,8 +177,7 @@ anv_shader_compile_to_nir(struct anv_device *device,
}
assert(entrypoint != NULL);
- brw_preprocess_nir(nir, &device->info,
- anv_is_scalar_shader_stage(compiler, vk_stage));
+ nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]);
nir_shader_gather_info(nir, entrypoint);
@@ -411,7 +400,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
prog_data->binding_table.image_start = bias;
/* Finish the optimization and compilation process */
- brw_postprocess_nir(nir, &pipeline->device->info,
+ nir = brw_lower_nir(nir, &pipeline->device->info, NULL,
anv_is_scalar_shader_stage(compiler, stage));
/* nir_lower_io will only handle the push constants; we need to set this